Update contrib/libs/apache/orc to 2.0.0

28031d32eb02ad8a790abc416b7db3264738c474
author: thegeorg <thegeorg@yandex-team.com> 2024-03-17 04:47:32 +0300
committer: thegeorg <thegeorg@yandex-team.com> 2024-03-17 04:57:12 +0300
commit: 0816a937aebb4bb8ff5d68730c625cb1c99c9b4b (patch)
tree: 45dd2b2d18017590838384a1a7687279ac280444 /contrib/libs/apache/orc
parent: 6d5eb3aff8e43031b7dcb8be42d649799cd8a6c3 (diff)
download: ydb-0816a937aebb4bb8ff5d68730c625cb1c99c9b4b.tar.gz
94 files changed, 10066 insertions, 8196 deletions
diff --git a/contrib/libs/apache/orc/README.md b/contrib/libs/apache/orc/README.md
index a7d959247e..60b0da5fcb 100644
--- a/contrib/libs/apache/orc/README.md
+++ b/contrib/libs/apache/orc/README.md
@@ -18,9 +18,9 @@ lists, maps, and unions.
 This project includes both a Java library and a C++ library for reading and writing the _Optimized Row Columnar_ (ORC) file format. The C++ and Java libraries are completely independent of each other and will each read all versions of ORC files.
 
 Releases:
-* Latest: <a href="http://orc.apache.org/releases">Apache ORC releases</a>
-* Maven Central: <a href="http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.orc%22">![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.orc/orc/badge.svg)</a>
-* Downloads: <a href="http://orc.apache.org/downloads">Apache ORC downloads</a>
+* Latest: <a href="https://orc.apache.org/releases">Apache ORC releases</a>
+* Maven Central: <a href="https://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.orc%22">![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.orc/orc/badge.svg)</a>
+* Downloads: <a href="https://orc.apache.org/downloads">Apache ORC downloads</a>
 * Release tags: <a href="https://github.com/apache/orc/releases">Apache ORC release tags</a>
 * Plan: <a href="https://github.com/apache/orc/milestones">Apache ORC future release plan</a>
 
@@ -28,7 +28,7 @@ The current build status:
 * Main branch <a href="https://github.com/apache/orc/actions/workflows/build_and_test.yml?query=branch%3Amain">
 ![main build status](https://github.com/apache/orc/actions/workflows/build_and_test.yml/badge.svg?branch=main)</a>
 
-Bug tracking: <a href="http://orc.apache.org/bugs">Apache Jira</a>
+Bug tracking: <a href="https://orc.apache.org/bugs">Apache Jira</a>
 
 
 The subdirectories are:
@@ -37,15 +37,14 @@ The subdirectories are:
 * docker - docker scripts to build and test on various linuxes
 * examples - various ORC example files that are used to test compatibility
 * java - the java reader and writer
-* proto - the protocol buffer definition for the ORC metadata
 * site - the website and documentation
 * tools - the c++ tools for reading and inspecting ORC files
 
 ### Building
 
-* Install java 1.8 or higher
-* Install maven 3.8.6 or higher
-* Install cmake
+* Install java 17 or higher
+* Install maven 3.9.6 or higher
+* Install cmake 3.12 or higher
 
 To build a release version with debug information:
 ```shell
@@ -93,3 +92,18 @@ To build only the C++ library:
 % make test-out
 
 ```
+
+To build the C++ library with AVX512 enabled:
+```shell
+export ORC_USER_SIMD_LEVEL=AVX512
+% mkdir build
+% cd build
+% cmake .. -DBUILD_JAVA=OFF -DBUILD_ENABLE_AVX512=ON
+% make package
+% make test-out
+```
+Cmake option BUILD_ENABLE_AVX512 can be set to "ON" or (default value)"OFF" at the compile time. At compile time, it defines the SIMD level(AVX512) to be compiled into the binaries.
+
+Environment variable ORC_USER_SIMD_LEVEL can be set to "AVX512" or (default value)"NONE" at the run time. At run time, it defines the SIMD level to dispatch the code which can apply SIMD optimization.
+
+Note that if ORC_USER_SIMD_LEVEL is set to "NONE" at run time, AVX512 will not take effect at run time even if BUILD_ENABLE_AVX512 is set to "ON" at compile time.
diff --git a/contrib/libs/apache/orc/c++/include/orc/BloomFilter.hh b/contrib/libs/apache/orc/c++/include/orc/BloomFilter.hh
index 91277392c7..d08f6deac7 100644
--- a/contrib/libs/apache/orc/c++/include/orc/BloomFilter.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/BloomFilter.hh
@@ -27,11 +27,11 @@
 namespace orc {
 
   class BloomFilter {
-  public:
+   public:
     virtual ~BloomFilter();
 
     // test if the element exists in BloomFilter
-    virtual bool testBytes(const char * data, int64_t length) const = 0;
+    virtual bool testBytes(const char* data, int64_t length) const = 0;
     virtual bool testLong(int64_t data) const = 0;
     virtual bool testDouble(double data) const = 0;
   };
@@ -40,6 +40,6 @@ namespace orc {
     std::vector<std::shared_ptr<BloomFilter>> entries;
   };
 
-}
+}  // namespace orc
 
-#endif //ORC_BLOOMFILTER_HH
+#endif  // ORC_BLOOMFILTER_HH
diff --git a/contrib/libs/apache/orc/c++/include/orc/ColumnPrinter.hh b/contrib/libs/apache/orc/c++/include/orc/ColumnPrinter.hh
index aa19214738..328c0e84b6 100644
--- a/contrib/libs/apache/orc/c++/include/orc/ColumnPrinter.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/ColumnPrinter.hh
@@ -19,12 +19,11 @@
 #ifndef ORC_COLUMN_PRINTER_HH
 #define ORC_COLUMN_PRINTER_HH
 
-#include "orc/orc-config.hh"
 #include "orc/OrcFile.hh"
 #include "orc/Vector.hh"
+#include "orc/orc-config.hh"
 
 #include <stdio.h>
-#include <string>
 #include <memory>
 #include <string>
 #include <vector>
@@ -32,12 +31,12 @@
 namespace orc {
 
   class ColumnPrinter {
-  protected:
-    std::string &buffer;
-    bool hasNulls ;
+   protected:
+    std::string& buffer;
+    bool hasNulls;
     const char* notNull;
 
-  public:
+   public:
     ColumnPrinter(std::string&);
     virtual ~ColumnPrinter();
     virtual void printRow(uint64_t rowId) = 0;
@@ -45,7 +44,6 @@ namespace orc {
     virtual void reset(const ColumnVectorBatch& batch);
   };
 
-  ORC_UNIQUE_PTR<ColumnPrinter> createColumnPrinter(std::string&,
-                                                    const Type* type);
-}
+  std::unique_ptr<ColumnPrinter> createColumnPrinter(std::string&, const Type* type);
+}  // namespace orc
 #endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Common.hh b/contrib/libs/apache/orc/c++/include/orc/Common.hh
index e51e37e710..9da67a3f19 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Common.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Common.hh
@@ -19,47 +19,45 @@
 #ifndef ORC_COMMON_HH
 #define ORC_COMMON_HH
 
-#include "orc/Vector.hh"
-#include "orc/Type.hh"
 #include "orc/Exceptions.hh"
+#include "orc/Type.hh"
+#include "orc/Vector.hh"
 
 #include <string>
 
 namespace orc {
 
   class FileVersion {
-  private:
+   private:
     uint32_t majorVersion;
     uint32_t minorVersion;
-  public:
+
+   public:
     static const FileVersion& v_0_11();
     static const FileVersion& v_0_12();
     static const FileVersion& UNSTABLE_PRE_2_0();
 
-    FileVersion(uint32_t major, uint32_t minor) :
-                majorVersion(major), minorVersion(minor) {
-    }
+    FileVersion(uint32_t major, uint32_t minor) : majorVersion(major), minorVersion(minor) {}
 
     /**
      * Get major version
      */
     uint32_t getMajor() const {
-        return this->majorVersion;
+      return this->majorVersion;
     }
 
     /**
      * Get minor version
      */
     uint32_t getMinor() const {
-        return this->minorVersion;
+      return this->minorVersion;
     }
 
-    bool operator == (const FileVersion & right) const {
-      return this->majorVersion == right.getMajor() &&
-              this->minorVersion == right.getMinor();
+    bool operator==(const FileVersion& right) const {
+      return this->majorVersion == right.getMajor() && this->minorVersion == right.getMinor();
     }
 
-    bool operator != (const FileVersion & right) const {
+    bool operator!=(const FileVersion& right) const {
       return !(*this == right);
     }
 
@@ -72,6 +70,7 @@ namespace orc {
     PRESTO_WRITER = 2,
     SCRITCHLEY_GO = 3,
     TRINO_WRITER = 4,
+    CUDF_WRITER = 5,
     UNKNOWN_WRITER = INT32_MAX
   };
 
@@ -140,7 +139,7 @@ namespace orc {
   std::string streamKindToString(StreamKind kind);
 
   class StreamInformation {
-  public:
+   public:
     virtual ~StreamInformation();
 
     virtual StreamKind getKind() const = 0;
@@ -159,7 +158,7 @@ namespace orc {
   std::string columnEncodingKindToString(ColumnEncodingKind kind);
 
   class StripeInformation {
-  public:
+   public:
     virtual ~StripeInformation();
 
     /**
@@ -184,7 +183,7 @@ namespace orc {
      * Get the length of the stripe's data.
      * @return the number of bytes in the stripe
      */
-    virtual uint64_t getDataLength()const = 0;
+    virtual uint64_t getDataLength() const = 0;
 
     /**
      * Get the length of the stripe's tail section, which contains its index.
@@ -206,8 +205,7 @@ namespace orc {
     /**
      * Get the StreamInformation for the given stream.
      */
-    virtual ORC_UNIQUE_PTR<StreamInformation>
-    getStreamInformation(uint64_t streamId) const = 0;
+    virtual std::unique_ptr<StreamInformation> getStreamInformation(uint64_t streamId) const = 0;
 
     /**
      * Get the column encoding for the given column.
@@ -238,10 +236,8 @@ namespace orc {
   template <>
   inline bool compare(Decimal val1, Decimal val2) {
     // compare integral parts
-    Int128 integral1 = scaleDownInt128ByPowerOfTen(val1.value,
-                                                   val1.scale);
-    Int128 integral2 = scaleDownInt128ByPowerOfTen(val2.value,
-                                                   val2.scale);
+    Int128 integral1 = scaleDownInt128ByPowerOfTen(val1.value, val1.scale);
+    Int128 integral2 = scaleDownInt128ByPowerOfTen(val2.value, val2.scale);
 
     if (integral1 < integral2) {
       return true;
@@ -253,25 +249,17 @@ namespace orc {
     // unnecessary to check overflow here because the scaled number will not
     // exceed original ones
     bool overflow = false, positive = val1.value >= 0;
-    val1.value -= scaleUpInt128ByPowerOfTen(integral1,
-                                            val1.scale,
-                                            overflow);
-    val2.value -= scaleUpInt128ByPowerOfTen(integral2,
-                                            val2.scale,
-                                            overflow);
+    val1.value -= scaleUpInt128ByPowerOfTen(integral1, val1.scale, overflow);
+    val2.value -= scaleUpInt128ByPowerOfTen(integral2, val2.scale, overflow);
 
     int32_t diff = val1.scale - val2.scale;
     if (diff > 0) {
-      val2.value = scaleUpInt128ByPowerOfTen(val2.value,
-                                             diff,
-                                             overflow);
+      val2.value = scaleUpInt128ByPowerOfTen(val2.value, diff, overflow);
       if (overflow) {
         return positive ? true : false;
       }
     } else {
-      val1.value = scaleUpInt128ByPowerOfTen(val1.value,
-                                             -diff,
-                                             overflow);
+      val1.value = scaleUpInt128ByPowerOfTen(val1.value, -diff, overflow);
       if (overflow) {
         return positive ? false : true;
       }
@@ -317,6 +305,6 @@ namespace orc {
     return !(lhs != rhs);
   }
 
-}
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Exceptions.hh b/contrib/libs/apache/orc/c++/include/orc/Exceptions.hh
index 9765d4fd6b..0536dbd164 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Exceptions.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Exceptions.hh
@@ -26,35 +26,47 @@
 
 namespace orc {
 
-  class NotImplementedYet: public std::logic_error {
-  public:
+  class NotImplementedYet : public std::logic_error {
+   public:
     explicit NotImplementedYet(const std::string& what_arg);
     explicit NotImplementedYet(const char* what_arg);
-    virtual ~NotImplementedYet() ORC_NOEXCEPT;
+    ~NotImplementedYet() noexcept override;
     NotImplementedYet(const NotImplementedYet&);
-  private:
+
+   private:
     NotImplementedYet& operator=(const NotImplementedYet&);
   };
 
-  class ParseError: public std::runtime_error {
-  public:
+  class ParseError : public std::runtime_error {
+   public:
     explicit ParseError(const std::string& what_arg);
     explicit ParseError(const char* what_arg);
-    virtual ~ParseError() ORC_NOEXCEPT;
+    ~ParseError() noexcept override;
     ParseError(const ParseError&);
-  private:
+
+   private:
     ParseError& operator=(const ParseError&);
   };
 
-  class InvalidArgument: public std::runtime_error {
-  public:
+  class InvalidArgument : public std::runtime_error {
+   public:
     explicit InvalidArgument(const std::string& what_arg);
     explicit InvalidArgument(const char* what_arg);
-    virtual ~InvalidArgument() ORC_NOEXCEPT;
+    ~InvalidArgument() noexcept override;
     InvalidArgument(const InvalidArgument&);
-  private:
+
+   private:
     InvalidArgument& operator=(const InvalidArgument&);
   };
-}
+
+  class SchemaEvolutionError : public std::logic_error {
+   public:
+    explicit SchemaEvolutionError(const std::string& what_arg);
+    explicit SchemaEvolutionError(const char* what_arg);
+    virtual ~SchemaEvolutionError() noexcept override;
+    SchemaEvolutionError(const SchemaEvolutionError&);
+    SchemaEvolutionError& operator=(const SchemaEvolutionError&) = delete;
+  };
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Int128.hh b/contrib/libs/apache/orc/c++/include/orc/Int128.hh
index 1f68b2b119..bcb4a58e22 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Int128.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Int128.hh
@@ -35,7 +35,7 @@ namespace orc {
    *
    */
   class Int128 {
-  public:
+   public:
     Int128() {
       highbits = 0;
       lowbits = 0;
@@ -110,7 +110,7 @@ namespace orc {
      * @param right the number to add
      * @return *this
      */
-    Int128& operator+=(const Int128 &right) {
+    Int128& operator+=(const Int128& right) {
       uint64_t sum = lowbits + right.lowbits;
       highbits += right.highbits;
       if (sum < lowbits) {
@@ -125,7 +125,7 @@ namespace orc {
      * @param right the number to subtract
      * @return *this
      */
-    Int128& operator-=(const Int128 &right) {
+    Int128& operator-=(const Int128& right) {
       uint64_t diff = lowbits - right.lowbits;
       highbits -= right.highbits;
       if (diff > lowbits) {
@@ -140,7 +140,7 @@ namespace orc {
      * @param right the number to multiply by
      * @return *this
      */
-    Int128& operator*=(const Int128 &right);
+    Int128& operator*=(const Int128& right);
 
     /**
      * Divide this number by right and return the result. This operation is
@@ -154,14 +154,14 @@ namespace orc {
      * @param right the number to divide by
      * @param remainder the remainder after the division
      */
-    Int128 divide(const Int128 &right, Int128& remainder) const;
+    Int128 divide(const Int128& right, Int128& remainder) const;
 
     /**
      * Logical or between two Int128.
      * @param right the number to or in
      * @return *this
      */
-    Int128& operator|=(const Int128 &right) {
+    Int128& operator|=(const Int128& right) {
       lowbits |= right.lowbits;
       highbits |= right.highbits;
       return *this;
@@ -172,7 +172,7 @@ namespace orc {
      * @param right the number to and in
      * @return *this
      */
-    Int128& operator&=(const Int128 &right) {
+    Int128& operator&=(const Int128& right) {
       lowbits &= right.lowbits;
       highbits &= right.highbits;
       return *this;
@@ -183,7 +183,7 @@ namespace orc {
      * @param right the number to and in
      * @return logical and result
      */
-    Int128 operator&(const Int128 &right) {
+    Int128 operator&(const Int128& right) {
       Int128 value = *this;
       value &= right;
       return value;
@@ -219,8 +219,7 @@ namespace orc {
         if (bits < 64) {
           lowbits >>= bits;
           lowbits |= static_cast<uint64_t>(highbits << (64 - bits));
-          highbits = static_cast<int64_t>
-            (static_cast<uint64_t>(highbits) >> bits);
+          highbits = static_cast<int64_t>(static_cast<uint64_t>(highbits) >> bits);
         } else if (bits < 128) {
           lowbits = static_cast<uint64_t>(highbits >> (bits - 64));
           highbits = highbits >= 0 ? 0 : -1l;
@@ -240,7 +239,7 @@ namespace orc {
       return highbits != right.highbits || lowbits != right.lowbits;
     }
 
-    bool operator<(const Int128 &right) const {
+    bool operator<(const Int128& right) const {
       if (highbits == right.highbits) {
         return lowbits < right.lowbits;
       } else {
@@ -248,7 +247,7 @@ namespace orc {
       }
     }
 
-    bool operator<=(const Int128 &right) const {
+    bool operator<=(const Int128& right) const {
       if (highbits == right.highbits) {
         return lowbits <= right.lowbits;
       } else {
@@ -256,7 +255,7 @@ namespace orc {
       }
     }
 
-    bool operator>(const Int128 &right) const {
+    bool operator>(const Int128& right) const {
       if (highbits == right.highbits) {
         return lowbits > right.lowbits;
       } else {
@@ -264,7 +263,7 @@ namespace orc {
       }
     }
 
-    bool operator>=(const Int128 &right) const {
+    bool operator>=(const Int128& right) const {
       if (highbits == right.highbits) {
         return lowbits >= right.lowbits;
       } else {
@@ -273,10 +272,8 @@ namespace orc {
     }
 
     uint32_t hash() const {
-      return static_cast<uint32_t>(highbits >> 32) ^
-        static_cast<uint32_t>(highbits) ^
-        static_cast<uint32_t>(lowbits >> 32) ^
-        static_cast<uint32_t>(lowbits);
+      return static_cast<uint32_t>(highbits >> 32) ^ static_cast<uint32_t>(highbits) ^
+             static_cast<uint32_t>(lowbits >> 32) ^ static_cast<uint32_t>(lowbits);
     }
 
     /**
@@ -284,17 +281,17 @@ namespace orc {
      */
     bool fitsInLong() const {
       switch (highbits) {
-      case 0:
-        return 0 == (lowbits & LONG_SIGN_BIT);
-      case -1:
-        return 0 != (lowbits & LONG_SIGN_BIT);
-      default:
-        return false;
+        case 0:
+          return 0 == (lowbits & LONG_SIGN_BIT);
+        case -1:
+          return 0 != (lowbits & LONG_SIGN_BIT);
+        default:
+          return false;
       }
     }
 
     /**
-     * Convert the value to a long and
+     * Convert the value to a long and throw std::range_error on overflow.
      */
     int64_t toLong() const {
       if (fitsInLong()) {
@@ -304,6 +301,11 @@ namespace orc {
     }
 
     /**
+     * Convert the value to a double, the return value may not be precise.
+     */
+    double toDouble() const;
+
+    /**
      * Return the base 10 string representation of the integer.
      */
     std::string toString() const;
@@ -316,8 +318,7 @@ namespace orc {
      * @param trimTrailingZeros whether or not to trim trailing zeros
      * @return converted string representation
      */
-    std::string toDecimalString(int32_t scale = 0,
-                                bool trimTrailingZeros = false) const;
+    std::string toDecimalString(int32_t scale = 0, bool trimTrailingZeros = false) const;
 
     /**
      * Return the base 16 string representation of the two's complement with
@@ -329,14 +330,14 @@ namespace orc {
     /**
      * Get the high bits of the twos complement representation of the number.
      */
-    int64_t getHighBits() {
+    int64_t getHighBits() const {
       return highbits;
     }
 
     /**
      * Get the low bits of the twos complement representation of the number.
      */
-    uint64_t getLowBits() {
+    uint64_t getLowBits() const {
       return lowbits;
     }
 
@@ -347,15 +348,14 @@ namespace orc {
      * @param wasNegative set to true if the original number was negative
      * @return the number of elements that were set in the array (1 to 4)
      */
-    int64_t fillInArray(uint32_t* array, bool &wasNegative) const;
+    int64_t fillInArray(uint32_t* array, bool& wasNegative) const;
 
-  private:
+   private:
     static const uint64_t LONG_SIGN_BIT = 0x8000000000000000u;
     int64_t highbits;
     uint64_t lowbits;
   };
 
-
   /**
    * Scales up an Int128 value
    * @param value the Int128 value to scale
@@ -363,9 +363,7 @@ namespace orc {
    * @param overflow returns whether the result overflows or not
    * @return the scaled value
    */
-  Int128 scaleUpInt128ByPowerOfTen(Int128 value,
-                                   int32_t power,
-                                   bool &overflow);
+  Int128 scaleUpInt128ByPowerOfTen(Int128 value, int32_t power, bool& overflow);
   /**
    * Scales down an Int128 value
    * @param value the Int128 value to scale
@@ -373,5 +371,35 @@ namespace orc {
    * @return the scaled value
    */
   Int128 scaleDownInt128ByPowerOfTen(Int128 value, int32_t power);
-}
+
+  /**
+   * Converts decimal value to different precision/scale
+   * @param value the Int128 value to convert
+   * @param fromScale the scale of the value
+   * @param toPrecision the precision to convert to
+   * @param toScale the scale to convert to
+   * @param round whether to round the value or truncate
+   * @return whether the conversion overflows and the converted value if does not overflow
+   */
+  std::pair<bool, Int128> convertDecimal(Int128 value, int32_t fromScale, int32_t toPrecision,
+                                         int32_t toScale, bool round = true);
+
+  /**
+   * Converts a float value to decimal
+   * @param value the float value to convert
+   * @param precision the precision of the decimal
+   * @param scale the scale of the decimal
+   * @return whether the conversion overflows and the converted value if does not overflow
+   */
+  template <typename T>
+  std::enable_if_t<std::is_floating_point_v<T>, std::pair<bool, Int128>> convertDecimal(
+      T value, int32_t precision, int32_t scale);
+
+  extern template std::pair<bool, Int128> convertDecimal<float>(float value, int32_t precision,
+                                                                int32_t scale);
+
+  extern template std::pair<bool, Int128> convertDecimal<double>(double value, int32_t precision,
+                                                                 int32_t scale);
+
+}  // namespace orc
 #endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/MemoryPool.hh b/contrib/libs/apache/orc/c++/include/orc/MemoryPool.hh
index 71d76c438a..6d999d3aa8 100644
--- a/contrib/libs/apache/orc/c++/include/orc/MemoryPool.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/MemoryPool.hh
@@ -19,15 +19,13 @@
 #ifndef MEMORYPOOL_HH_
 #define MEMORYPOOL_HH_
 
-#include "orc/orc-config.hh"
-#include "orc/Int128.hh"
-
 #include <memory>
-
+#include "orc/Int128.hh"
+#include "orc/orc-config.hh"
 namespace orc {
 
   class MemoryPool {
-  public:
+   public:
     virtual ~MemoryPool();
 
     virtual char* malloc(uint64_t size) = 0;
@@ -37,7 +35,7 @@ namespace orc {
 
   template <class T>
   class DataBuffer {
-  private:
+   private:
     MemoryPool& memoryPool;
     T* buf;
     // current size
@@ -49,10 +47,10 @@ namespace orc {
     DataBuffer(DataBuffer& buffer);
     DataBuffer& operator=(DataBuffer& buffer);
 
-  public:
+   public:
     DataBuffer(MemoryPool& pool, uint64_t _size = 0);
 
-    DataBuffer(DataBuffer<T>&& buffer) ORC_NOEXCEPT;
+    DataBuffer(DataBuffer<T>&& buffer) noexcept;
 
     virtual ~DataBuffer();
 
@@ -64,20 +62,25 @@ namespace orc {
       return buf;
     }
 
-    uint64_t size() {
+    uint64_t size() const {
       return currentSize;
     }
 
-    uint64_t capacity() {
+    uint64_t capacity() const {
       return currentCapacity;
     }
 
+    const T& operator[](uint64_t i) const {
+      return buf[i];
+    }
+
     T& operator[](uint64_t i) {
       return buf[i];
     }
 
     void reserve(uint64_t _size);
     void resize(uint64_t _size);
+    void zeroOut();
   };
 
   // Specializations for char
@@ -104,6 +107,14 @@ namespace orc {
   template <>
   void DataBuffer<double>::resize(uint64_t newSize);
 
+  // Specializations for float
+
+  template <>
+  DataBuffer<float>::~DataBuffer();
+
+  template <>
+  void DataBuffer<float>::resize(uint64_t newSize);
+
   // Specializations for int64_t
 
   template <>
@@ -112,6 +123,30 @@ namespace orc {
   template <>
   void DataBuffer<int64_t>::resize(uint64_t newSize);
 
+  // Specializations for int32_t
+
+  template <>
+  DataBuffer<int32_t>::~DataBuffer();
+
+  template <>
+  void DataBuffer<int32_t>::resize(uint64_t newSize);
+
+  // Specializations for int16_t
+
+  template <>
+  DataBuffer<int16_t>::~DataBuffer();
+
+  template <>
+  void DataBuffer<int16_t>::resize(uint64_t newSize);
+
+  // Specializations for int8_t
+
+  template <>
+  DataBuffer<int8_t>::~DataBuffer();
+
+  template <>
+  void DataBuffer<int8_t>::resize(uint64_t newSize);
+
   // Specializations for uint64_t
 
   template <>
@@ -128,23 +163,31 @@ namespace orc {
   template <>
   void DataBuffer<unsigned char>::resize(uint64_t newSize);
 
-  #ifdef __clang__
-    #pragma clang diagnostic push
-    #pragma clang diagnostic ignored "-Wweak-template-vtables"
-  #endif
+  // Specializations for Int128
+
+  template <>
+  void DataBuffer<Int128>::zeroOut();
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wweak-template-vtables"
+#endif
 
   extern template class DataBuffer<char>;
   extern template class DataBuffer<char*>;
   extern template class DataBuffer<double>;
+  extern template class DataBuffer<float>;
   extern template class DataBuffer<Int128>;
   extern template class DataBuffer<int64_t>;
+  extern template class DataBuffer<int32_t>;
+  extern template class DataBuffer<int16_t>;
+  extern template class DataBuffer<int8_t>;
   extern template class DataBuffer<uint64_t>;
   extern template class DataBuffer<unsigned char>;
 
-  #ifdef __clang__
-    #pragma clang diagnostic pop
-  #endif
-} // namespace orc
-
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+}  // namespace orc
 
 #endif /* MEMORYPOOL_HH_ */
diff --git a/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh b/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh
index c64853168a..6e4a07bf7c 100644
--- a/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh
@@ -21,9 +21,9 @@
 
 #include <string>
 
-#include "orc/orc-config.hh"
 #include "orc/Reader.hh"
 #include "orc/Writer.hh"
+#include "orc/orc-config.hh"
 
 /** /file orc/OrcFile.hh
     @brief The top level interface to ORC.
@@ -35,7 +35,7 @@ namespace orc {
    * An abstract interface for providing ORC readers a stream of bytes.
    */
   class InputStream {
-  public:
+   public:
     virtual ~InputStream();
 
     /**
@@ -56,9 +56,7 @@ namespace orc {
      * @param length the number of bytes to read.
      * @param offset the position in the stream to read from.
      */
-    virtual void read(void* buf,
-                      uint64_t length,
-                      uint64_t offset) = 0;
+    virtual void read(void* buf, uint64_t length, uint64_t offset) = 0;
 
     /**
      * Get the name of the stream for error messages.
@@ -70,7 +68,7 @@ namespace orc {
    * An abstract interface for providing ORC writer a stream of bytes.
    */
   class OutputStream {
-  public:
+   public:
     virtual ~OutputStream();
 
     /**
@@ -100,38 +98,50 @@ namespace orc {
      * Close the stream and flush any pending data to the disk.
      */
     virtual void close() = 0;
+
+    /**
+     * Flush any pending data to the disk.
+     */
+    virtual void flush() {
+      throw NotImplementedYet("Not supported");
+    }
   };
 
   /**
    * Create a stream to a local file or HDFS file if path begins with "hdfs://"
    * @param path the name of the file in the local file system or HDFS
+   * @param metrics the metrics of the reader
    */
-  ORC_UNIQUE_PTR<InputStream> readFile(const std::string& path);
+  std::unique_ptr<InputStream> readFile(const std::string& path, ReaderMetrics* metrics = nullptr);
 
   /**
    * Create a stream to a local file.
    * @param path the name of the file in the local file system
+   * @param metrics the metrics of the reader
    */
-  ORC_UNIQUE_PTR<InputStream> readLocalFile(const std::string& path);
+  std::unique_ptr<InputStream> readLocalFile(const std::string& path,
+                                             ReaderMetrics* metrics = nullptr);
 
   /**
    * Create a stream to an HDFS file.
    * @param path the uri of the file in HDFS
+   * @param metrics the metrics of the reader
    */
-  ORC_UNIQUE_PTR<InputStream> readHdfsFile(const std::string& path);
+  std::unique_ptr<InputStream> readHdfsFile(const std::string& path,
+                                            ReaderMetrics* metrics = nullptr);
 
   /**
    * Create a reader to read the ORC file.
    * @param stream the stream to read
    * @param options the options for reading the file
    */
-  ORC_UNIQUE_PTR<Reader> createReader(ORC_UNIQUE_PTR<InputStream> stream,
-                                      const ReaderOptions& options);
+  std::unique_ptr<Reader> createReader(std::unique_ptr<InputStream> stream,
+                                       const ReaderOptions& options);
   /**
    * Create a stream to write to a local file.
    * @param path the name of the file in the local file system
    */
-  ORC_UNIQUE_PTR<OutputStream> writeLocalFile(const std::string& path);
+  std::unique_ptr<OutputStream> writeLocalFile(const std::string& path);
 
   /**
    * Create a writer to write the ORC file.
@@ -139,10 +149,8 @@ namespace orc {
    * @param stream the stream to write to
    * @param options the options for writing the file
    */
-  ORC_UNIQUE_PTR<Writer> createWriter(
-                                      const Type& type,
-                                      OutputStream* stream,
-                                      const WriterOptions& options);
-}
+  std::unique_ptr<Writer> createWriter(const Type& type, OutputStream* stream,
+                                       const WriterOptions& options);
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Reader.hh b/contrib/libs/apache/orc/c++/include/orc/Reader.hh
index ddc8b55055..b631c2c6ea 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Reader.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Reader.hh
@@ -21,12 +21,13 @@
 
 #include "orc/BloomFilter.hh"
 #include "orc/Common.hh"
-#include "orc/orc-config.hh"
 #include "orc/Statistics.hh"
-#include "orc/sargs/SearchArgument.hh"
 #include "orc/Type.hh"
 #include "orc/Vector.hh"
+#include "orc/orc-config.hh"
+#include "orc/sargs/SearchArgument.hh"
 
+#include <atomic>
 #include <map>
 #include <memory>
 #include <set>
@@ -40,13 +41,35 @@ namespace orc {
   struct RowReaderOptionsPrivate;
 
   /**
+   * Expose the reader metrics including the latency and
+   * number of calls of the decompression/decoding/IO modules.
+   */
+  struct ReaderMetrics {
+    std::atomic<uint64_t> ReaderCall{0};
+    // ReaderInclusiveLatencyUs contains the latency of
+    // the decompression/decoding/IO modules.
+    std::atomic<uint64_t> ReaderInclusiveLatencyUs{0};
+    std::atomic<uint64_t> DecompressionCall{0};
+    std::atomic<uint64_t> DecompressionLatencyUs{0};
+    std::atomic<uint64_t> DecodingCall{0};
+    std::atomic<uint64_t> DecodingLatencyUs{0};
+    std::atomic<uint64_t> ByteDecodingCall{0};
+    std::atomic<uint64_t> ByteDecodingLatencyUs{0};
+    std::atomic<uint64_t> IOCount{0};
+    std::atomic<uint64_t> IOBlockingLatencyUs{0};
+    std::atomic<uint64_t> SelectedRowGroupCount{0};
+    std::atomic<uint64_t> EvaluatedRowGroupCount{0};
+  };
+  ReaderMetrics* getDefaultReaderMetrics();
+
+  /**
    * Options for creating a Reader.
    */
   class ReaderOptions {
-  private:
-    ORC_UNIQUE_PTR<ReaderOptionsPrivate> privateBits;
+   private:
+    std::unique_ptr<ReaderOptionsPrivate> privateBits;
 
-  public:
+   public:
     ReaderOptions();
     ReaderOptions(const ReaderOptions&);
     ReaderOptions(ReaderOptions&);
@@ -77,6 +100,14 @@ namespace orc {
     ReaderOptions& setMemoryPool(MemoryPool& pool);
 
     /**
+     * Set the reader metrics.
+     *
+     * Defaults to nullptr.
+     * When set to nullptr, the reader metrics will be disabled.
+     */
+    ReaderOptions& setReaderMetrics(ReaderMetrics* metrics);
+
+    /**
      * Set the location of the tail as defined by the logical length of the
      * file.
      */
@@ -102,16 +133,21 @@ namespace orc {
      * Get the memory allocator.
      */
     MemoryPool* getMemoryPool() const;
+
+    /**
+     * Get the reader metrics.
+     */
+    ReaderMetrics* getReaderMetrics() const;
   };
 
   /**
    * Options for creating a RowReader.
    */
   class RowReaderOptions {
-  private:
-    ORC_UNIQUE_PTR<RowReaderOptionsPrivate> privateBits;
+   private:
+    std::unique_ptr<RowReaderOptionsPrivate> privateBits;
 
-  public:
+   public:
     RowReaderOptions();
     RowReaderOptions(const RowReaderOptions&);
     RowReaderOptions(RowReaderOptions&);
@@ -164,8 +200,7 @@ namespace orc {
      * @param idReadIntentMap a map of IdReadIntentMap.
      * @return this
      */
-    RowReaderOptions&
-    includeTypesWithIntents(const IdReadIntentMap& idReadIntentMap);
+    RowReaderOptions& includeTypesWithIntents(const IdReadIntentMap& idReadIntentMap);
 
     /**
      * Set the section of the file to process.
@@ -289,8 +324,39 @@ namespace orc {
      * Get the IdReadIntentMap map that was supplied by client.
      */
     const IdReadIntentMap getIdReadIntentMap() const;
-  };
 
+    /**
+     * Set whether use fixed width numeric vectorBatch or not, such as int32_t / int16_t / int8_t /
+     * float vectorBatch.
+     */
+    RowReaderOptions& setUseTightNumericVector(bool useTightNumericVector);
+
+    /**
+     * Get whether or not to use fixed width numeric columnVectorBatch.
+     * @return if not set, the default is false
+     */
+    bool getUseTightNumericVector() const;
+
+    /**
+     * Set read type for schema evolution
+     */
+    RowReaderOptions& setReadType(std::shared_ptr<Type> type);
+
+    /**
+     * Get read type for schema evolution
+     */
+    std::shared_ptr<Type>& getReadType() const;
+
+    /**
+     * Set whether reader throws or returns null when value overflows for schema evolution.
+     */
+    RowReaderOptions& throwOnSchemaEvolutionOverflow(bool shouldThrow);
+
+    /**
+     * Whether reader throws or returns null when value overflows for schema evolution.
+     */
+    bool getThrowOnSchemaEvolutionOverflow() const;
+  };
 
   class RowReader;
 
@@ -299,7 +365,7 @@ namespace orc {
    * This is an an abstract class that will be subclassed as necessary.
    */
   class Reader {
-  public:
+   public:
     virtual ~Reader();
 
     /**
@@ -389,8 +455,7 @@ namespace orc {
      * @param stripeIndex the index of the stripe (0 to N-1) to get information about
      * @return the information about that stripe
      */
-    virtual ORC_UNIQUE_PTR<StripeInformation>
-    getStripe(uint64_t stripeIndex) const = 0;
+    virtual std::unique_ptr<StripeInformation> getStripe(uint64_t stripeIndex) const = 0;
 
     /**
      * Get the number of stripe statistics in the file.
@@ -403,8 +468,7 @@ namespace orc {
      * @param stripeIndex the index of the stripe (0 to N-1) to get statistics about
      * @return the statistics about that stripe
      */
-    virtual ORC_UNIQUE_PTR<StripeStatistics>
-    getStripeStatistics(uint64_t stripeIndex) const = 0;
+    virtual std::unique_ptr<StripeStatistics> getStripeStatistics(uint64_t stripeIndex) const = 0;
 
     /**
      * Get the length of the data stripes in the file.
@@ -440,15 +504,14 @@ namespace orc {
      * Get the statistics about the columns in the file.
      * @return the information about the column
      */
-    virtual ORC_UNIQUE_PTR<Statistics> getStatistics() const = 0;
+    virtual std::unique_ptr<Statistics> getStatistics() const = 0;
 
     /**
      * Get the statistics about a single column in the file.
      * @param columnId id of the column
      * @return the information about the column
      */
-    virtual ORC_UNIQUE_PTR<ColumnStatistics>
-    getColumnStatistics(uint32_t columnId) const = 0;
+    virtual std::unique_ptr<ColumnStatistics> getColumnStatistics(uint32_t columnId) const = 0;
 
     /**
      * Check if the file has correct column statistics.
@@ -456,6 +519,12 @@ namespace orc {
     virtual bool hasCorrectStatistics() const = 0;
 
     /**
+     * Get metrics of the reader
+     * @return the accumulated reader metrics to current state.
+     */
+    virtual const ReaderMetrics* getReaderMetrics() const = 0;
+
+    /**
      * Get the serialized file tail.
      * Usefull if another reader of the same file wants to avoid re-reading
      * the file tail. See ReaderOptions.setSerializedFileTail().
@@ -474,14 +543,14 @@ namespace orc {
      * Create a RowReader based on this reader with the default options.
      * @return a RowReader to read the rows
      */
-    virtual ORC_UNIQUE_PTR<RowReader> createRowReader() const = 0;
+    virtual std::unique_ptr<RowReader> createRowReader() const = 0;
 
     /**
      * Create a RowReader based on this reader.
      * @param options RowReader Options
      * @return a RowReader to read the rows
      */
-    virtual ORC_UNIQUE_PTR<RowReader> createRowReader(const RowReaderOptions& options) const = 0;
+    virtual std::unique_ptr<RowReader> createRowReader(const RowReaderOptions& options) const = 0;
 
     /**
      * Get the name of the input stream.
@@ -493,13 +562,13 @@ namespace orc {
      * based on the information in the file footer.
      * The bound is less tight if only few columns are read or compression is
      * used.
-    */
+     */
     /**
      * @param stripeIx index of the stripe to be read (if not specified,
      *        all stripes are considered).
      * @return upper bound on memory use by all columns
      */
-    virtual uint64_t getMemoryUse(int stripeIx=-1) = 0;
+    virtual uint64_t getMemoryUse(int stripeIx = -1) = 0;
 
     /**
      * @param include Column Field Ids
@@ -507,7 +576,8 @@ namespace orc {
      *        all stripes are considered).
      * @return upper bound on memory use by selected columns
      */
-    virtual uint64_t getMemoryUseByFieldId(const std::list<uint64_t>& include, int stripeIx=-1) = 0;
+    virtual uint64_t getMemoryUseByFieldId(const std::list<uint64_t>& include,
+                                           int stripeIx = -1) = 0;
 
     /**
      * @param names Column Names
@@ -515,7 +585,7 @@ namespace orc {
      *        all stripes are considered).
      * @return upper bound on memory use by selected columns
      */
-    virtual uint64_t getMemoryUseByName(const std::list<std::string>& names, int stripeIx=-1) = 0;
+    virtual uint64_t getMemoryUseByName(const std::list<std::string>& names, int stripeIx = -1) = 0;
 
     /**
      * @param include Column Type Ids
@@ -523,7 +593,8 @@ namespace orc {
      *        all stripes are considered).
      * @return upper bound on memory use by selected columns
      */
-    virtual uint64_t getMemoryUseByTypeId(const std::list<uint64_t>& include, int stripeIx=-1) = 0;
+    virtual uint64_t getMemoryUseByTypeId(const std::list<uint64_t>& include,
+                                          int stripeIx = -1) = 0;
 
     /**
      * Get BloomFiters of all selected columns in the specified stripe
@@ -532,8 +603,8 @@ namespace orc {
      *        all columns that have bloom filters are considered).
      * @return map of bloom filters with the key standing for the index of column.
      */
-    virtual std::map<uint32_t, BloomFilterIndex>
-    getBloomFilters(uint32_t stripeIndex, const std::set<uint32_t>& included) const = 0;
+    virtual std::map<uint32_t, BloomFilterIndex> getBloomFilters(
+        uint32_t stripeIndex, const std::set<uint32_t>& included) const = 0;
   };
 
   /**
@@ -541,7 +612,7 @@ namespace orc {
    * This is an an abstract class that will be subclassed as necessary.
    */
   class RowReader {
-  public:
+   public:
     virtual ~RowReader();
     /**
      * Get the selected type of the rows in the file. The file's row type
@@ -563,8 +634,7 @@ namespace orc {
      * @param size the number of rows to read
      * @return a new ColumnVectorBatch to read into
      */
-    virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size
-                                                             ) const = 0;
+    virtual std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size) const = 0;
 
     /**
      * Read the next row batch from the current position.
@@ -587,8 +657,7 @@ namespace orc {
      * @param rowNumber the next row the reader should return
      */
     virtual void seekToRow(uint64_t rowNumber) = 0;
-
   };
-}
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Statistics.hh b/contrib/libs/apache/orc/c++/include/orc/Statistics.hh
index 4d7caeab3d..4ba8c35f7d 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Statistics.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Statistics.hh
@@ -19,9 +19,11 @@
 #ifndef ORC_STATISTICS_HH
 #define ORC_STATISTICS_HH
 
-#include "orc/orc-config.hh"
 #include "orc/Type.hh"
 #include "orc/Vector.hh"
+#include "orc/orc-config.hh"
+
+#include <sstream>
 
 namespace orc {
 
@@ -29,7 +31,7 @@ namespace orc {
    * Statistics that are available for all types of columns.
    */
   class ColumnStatistics {
-  public:
+   public:
     virtual ~ColumnStatistics();
 
     /**
@@ -54,9 +56,9 @@ namespace orc {
   /**
    * Statistics for binary columns.
    */
-  class BinaryColumnStatistics: public ColumnStatistics {
-  public:
-    virtual ~BinaryColumnStatistics();
+  class BinaryColumnStatistics : public ColumnStatistics {
+   public:
+    ~BinaryColumnStatistics() override;
 
     /**
      * Check whether column has total length.
@@ -70,9 +72,9 @@ namespace orc {
   /**
    * Statistics for boolean columns.
    */
-  class BooleanColumnStatistics: public ColumnStatistics {
-  public:
-    virtual ~BooleanColumnStatistics();
+  class BooleanColumnStatistics : public ColumnStatistics {
+   public:
+    ~BooleanColumnStatistics() override;
 
     /**
      * Check whether column has true/false count.
@@ -87,9 +89,9 @@ namespace orc {
   /**
    * Statistics for date columns.
    */
-  class DateColumnStatistics: public ColumnStatistics {
-  public:
-    virtual ~DateColumnStatistics();
+  class DateColumnStatistics : public ColumnStatistics {
+   public:
+    ~DateColumnStatistics() override;
 
     /**
      * Check whether column has minimum.
@@ -119,9 +121,9 @@ namespace orc {
   /**
    * Statistics for decimal columns.
    */
-  class DecimalColumnStatistics: public ColumnStatistics {
-  public:
-    virtual ~DecimalColumnStatistics();
+  class DecimalColumnStatistics : public ColumnStatistics {
+   public:
+    ~DecimalColumnStatistics() override;
 
     /**
      * Check whether column has minimum.
@@ -163,9 +165,9 @@ namespace orc {
   /**
    * Statistics for float and double columns.
    */
-  class DoubleColumnStatistics: public ColumnStatistics {
-  public:
-    virtual ~DoubleColumnStatistics();
+  class DoubleColumnStatistics : public ColumnStatistics {
+   public:
+    ~DoubleColumnStatistics() override;
 
     /**
      * Check whether column has minimum.
@@ -210,9 +212,9 @@ namespace orc {
    * Statistics for all of the integer columns, such as byte, short, int, and
    * long.
    */
-  class IntegerColumnStatistics: public ColumnStatistics {
-  public:
-    virtual ~IntegerColumnStatistics();
+  class IntegerColumnStatistics : public ColumnStatistics {
+   public:
+    ~IntegerColumnStatistics() override;
 
     /**
      * Check whether column has minimum.
@@ -256,9 +258,9 @@ namespace orc {
   /**
    * Statistics for string columns.
    */
-  class StringColumnStatistics: public ColumnStatistics {
-  public:
-    virtual ~StringColumnStatistics();
+  class StringColumnStatistics : public ColumnStatistics {
+   public:
+    ~StringColumnStatistics() override;
 
     /**
      * Check whether column has minimum.
@@ -282,13 +284,13 @@ namespace orc {
      * Get the minimum value for the column.
      * @return minimum value
      */
-    virtual const std::string & getMinimum() const = 0;
+    virtual const std::string& getMinimum() const = 0;
 
     /**
      * Get the maximum value for the column.
      * @return maximum value
      */
-    virtual const std::string & getMaximum() const = 0;
+    virtual const std::string& getMaximum() const = 0;
 
     /**
      * Get the total length of all values.
@@ -300,9 +302,9 @@ namespace orc {
   /**
    * Statistics for timestamp columns.
    */
-  class TimestampColumnStatistics: public ColumnStatistics {
-  public:
-    virtual ~TimestampColumnStatistics();
+  class TimestampColumnStatistics : public ColumnStatistics {
+   public:
+    ~TimestampColumnStatistics() override;
 
     /**
      * Check whether minimum timestamp exists.
@@ -366,7 +368,7 @@ namespace orc {
   };
 
   class Statistics {
-  public:
+   public:
     virtual ~Statistics();
 
     /**
@@ -374,8 +376,7 @@ namespace orc {
      * @param colId id of the column
      * @return one column's statistics
      */
-    virtual const ColumnStatistics* getColumnStatistics(uint32_t colId
-                                                        ) const = 0;
+    virtual const ColumnStatistics* getColumnStatistics(uint32_t colId) const = 0;
 
     /**
      * Get the number of columns.
@@ -388,8 +389,8 @@ namespace orc {
    * Statistics for all of collections such as Map and List.
    */
   class CollectionColumnStatistics : public ColumnStatistics {
-  public:
-    virtual ~CollectionColumnStatistics();
+   public:
+    ~CollectionColumnStatistics() override;
 
     /**
      * check whether column has minimum number of children
@@ -453,8 +454,8 @@ namespace orc {
   };
 
   class StripeStatistics : public Statistics {
-  public:
-    virtual ~StripeStatistics();
+   public:
+    ~StripeStatistics() override;
 
     /**
      * Get the statistics of a given RowIndex entry in a given column.
@@ -462,9 +463,8 @@ namespace orc {
      * @param rowIndexId RowIndex entry id
      * @return statistics of the given RowIndex entry
      */
-    virtual const ColumnStatistics*
-                      getRowIndexStatistics(
-                          uint32_t columnId, uint32_t rowIndexId) const = 0;
+    virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId,
+                                                          uint32_t rowIndexId) const = 0;
 
     /**
      * Get the number of RowIndex statistics in a given column.
@@ -473,6 +473,6 @@ namespace orc {
      */
     virtual uint32_t getNumberOfRowIndexStats(uint32_t columnId) const = 0;
   };
-}
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Type.hh b/contrib/libs/apache/orc/c++/include/orc/Type.hh
index a7df8307e6..82e0e3cc86 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Type.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Type.hh
@@ -19,9 +19,9 @@
 #ifndef ORC_TYPE_HH
 #define ORC_TYPE_HH
 
-#include "orc/orc-config.hh"
-#include "orc/Vector.hh"
 #include "MemoryPool.hh"
+#include "orc/Vector.hh"
+#include "orc/orc-config.hh"
 
 namespace orc {
 
@@ -48,7 +48,7 @@ namespace orc {
   };
 
   class Type {
-  public:
+   public:
     virtual ~Type();
     virtual uint64_t getColumnId() const = 0;
     virtual uint64_t getMaximumColumnId() const = 0;
@@ -59,21 +59,28 @@ namespace orc {
     virtual uint64_t getMaximumLength() const = 0;
     virtual uint64_t getPrecision() const = 0;
     virtual uint64_t getScale() const = 0;
-    virtual Type& setAttribute(const std::string& key,
-                               const std::string& value) = 0;
+    virtual Type& setAttribute(const std::string& key, const std::string& value) = 0;
     virtual bool hasAttributeKey(const std::string& key) const = 0;
     virtual Type& removeAttribute(const std::string& key) = 0;
     virtual std::vector<std::string> getAttributeKeys() const = 0;
     virtual std::string getAttributeValue(const std::string& key) const = 0;
     virtual std::string toString() const = 0;
+    /**
+     * Get the Type with the given column ID
+     * @param colId the column ID
+     * @return the type corresponding to the column Id, nullptr if not exists
+     */
+    virtual const Type* getTypeByColumnId(uint64_t colId) const = 0;
 
     /**
      * Create a row batch for this type.
      */
-    virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size,
-                                                             MemoryPool& pool,
-                                                             bool encoded = false
-                                                             ) const = 0;
+    virtual std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size, MemoryPool& pool,
+                                                              bool encoded = false) const = 0;
+
+    virtual std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size, MemoryPool& pool,
+                                                              bool encoded,
+                                                              bool useTightNumericVector) const = 0;
 
     /**
      * Add a new field to a struct type.
@@ -81,38 +88,33 @@ namespace orc {
      * @param fieldType the type of the new field
      * @return a reference to the struct type
      */
-    virtual Type* addStructField(const std::string& fieldName,
-                                 ORC_UNIQUE_PTR<Type> fieldType) = 0;
+    virtual Type* addStructField(const std::string& fieldName, std::unique_ptr<Type> fieldType) = 0;
 
     /**
      * Add a new child to a union type.
      * @param fieldType the type of the new field
      * @return a reference to the union type
      */
-    virtual Type* addUnionChild(ORC_UNIQUE_PTR<Type> fieldType) = 0;
+    virtual Type* addUnionChild(std::unique_ptr<Type> fieldType) = 0;
 
     /**
      * Build a Type object from string text representation.
      */
-    static ORC_UNIQUE_PTR<Type> buildTypeFromString(const std::string& input);
+    static std::unique_ptr<Type> buildTypeFromString(const std::string& input);
   };
 
   const int64_t DEFAULT_DECIMAL_SCALE = 18;
   const int64_t DEFAULT_DECIMAL_PRECISION = 38;
 
-  ORC_UNIQUE_PTR<Type> createPrimitiveType(TypeKind kind);
-  ORC_UNIQUE_PTR<Type> createCharType(TypeKind kind,
-                                      uint64_t maxLength);
-  ORC_UNIQUE_PTR<Type>
-                createDecimalType(uint64_t precision=
-                                    DEFAULT_DECIMAL_PRECISION,
-                                  uint64_t scale=DEFAULT_DECIMAL_SCALE);
+  std::unique_ptr<Type> createPrimitiveType(TypeKind kind);
+  std::unique_ptr<Type> createCharType(TypeKind kind, uint64_t maxLength);
+  std::unique_ptr<Type> createDecimalType(uint64_t precision = DEFAULT_DECIMAL_PRECISION,
+                                          uint64_t scale = DEFAULT_DECIMAL_SCALE);
 
-  ORC_UNIQUE_PTR<Type> createStructType();
-  ORC_UNIQUE_PTR<Type> createListType(ORC_UNIQUE_PTR<Type> elements);
-  ORC_UNIQUE_PTR<Type> createMapType(ORC_UNIQUE_PTR<Type> key,
-                                      ORC_UNIQUE_PTR<Type> value);
-  ORC_UNIQUE_PTR<Type> createUnionType();
+  std::unique_ptr<Type> createStructType();
+  std::unique_ptr<Type> createListType(std::unique_ptr<Type> elements);
+  std::unique_ptr<Type> createMapType(std::unique_ptr<Type> key, std::unique_ptr<Type> value);
+  std::unique_ptr<Type> createUnionType();
 
-}
+}  // namespace orc
 #endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Vector.hh b/contrib/libs/apache/orc/c++/include/orc/Vector.hh
index 752e1af78a..0dfe926965 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Vector.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Vector.hh
@@ -19,17 +19,17 @@
 #ifndef ORC_VECTOR_HH
 #define ORC_VECTOR_HH
 
-#include "orc/orc-config.hh"
-#include "MemoryPool.hh"
 #include "Int128.hh"
+#include "MemoryPool.hh"
+#include "orc/orc-config.hh"
 
+#include <cstdlib>
+#include <cstring>
 #include <list>
 #include <memory>
-#include <cstring>
-#include <vector>
+#include <sstream>
 #include <stdexcept>
-#include <cstdlib>
-#include <iostream>
+#include <vector>
 
 namespace orc {
 
@@ -37,6 +37,11 @@ namespace orc {
    * The base class for each of the column vectors. This class handles
    * the generic attributes such as number of elements, capacity, and
    * notNull vector.
+   * Note: If hasNull is false, the values in the notNull buffer are not required.
+   * On the writer side, it does not read values from notNull buffer so users are
+   * not expected to write notNull buffer if hasNull is false. On the reader side,
+   * it does not set notNull buffer if hasNull is false, meaning that it is undefined
+   * behavior to consume values from notNull buffer in this case by downstream users.
    */
   struct ColumnVectorBatch {
     ColumnVectorBatch(uint64_t capacity, MemoryPool& pool);
@@ -83,40 +88,128 @@ namespace orc {
      */
     virtual bool hasVariableLength();
 
-  private:
+   private:
     ColumnVectorBatch(const ColumnVectorBatch&);
     ColumnVectorBatch& operator=(const ColumnVectorBatch&);
   };
 
-  struct LongVectorBatch: public ColumnVectorBatch {
-    LongVectorBatch(uint64_t capacity, MemoryPool& pool);
-    virtual ~LongVectorBatch();
+  template <typename ValueType>
+  struct IntegerVectorBatch : public ColumnVectorBatch {
+    IntegerVectorBatch(uint64_t cap, MemoryPool& pool)
+        : ColumnVectorBatch(cap, pool), data(pool, cap) {
+      // PASS
+    }
+
+    ~IntegerVectorBatch() override = default;
 
-    DataBuffer<int64_t> data;
-    std::string toString() const;
-    void resize(uint64_t capacity);
-    void clear();
-    uint64_t getMemoryUsage();
+    inline std::string toString() const override;
+
+    void resize(uint64_t cap) override {
+      if (capacity < cap) {
+        ColumnVectorBatch::resize(cap);
+        data.resize(cap);
+      }
+    }
+
+    void clear() override {
+      numElements = 0;
+    }
+
+    uint64_t getMemoryUsage() override {
+      return ColumnVectorBatch::getMemoryUsage() +
+             static_cast<uint64_t>(data.capacity() * sizeof(ValueType));
+    }
+
+    DataBuffer<ValueType> data;
   };
 
-  struct DoubleVectorBatch: public ColumnVectorBatch {
-    DoubleVectorBatch(uint64_t capacity, MemoryPool& pool);
-    virtual ~DoubleVectorBatch();
-    std::string toString() const;
-    void resize(uint64_t capacity);
-    void clear();
-    uint64_t getMemoryUsage();
+  using LongVectorBatch = IntegerVectorBatch<int64_t>;
+  using IntVectorBatch = IntegerVectorBatch<int32_t>;
+  using ShortVectorBatch = IntegerVectorBatch<int16_t>;
+  using ByteVectorBatch = IntegerVectorBatch<int8_t>;
+
+  template <>
+  inline std::string LongVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Long vector <" << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  template <>
+  inline std::string IntVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Int vector <" << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  template <>
+  inline std::string ShortVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Short vector <" << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  template <>
+  inline std::string ByteVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Byte vector <" << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  template <typename FloatType>
+  struct FloatingVectorBatch : public ColumnVectorBatch {
+    FloatingVectorBatch(uint64_t cap, MemoryPool& pool)
+        : ColumnVectorBatch(cap, pool), data(pool, cap) {
+      // PASS
+    }
 
-    DataBuffer<double> data;
+    ~FloatingVectorBatch() override = default;
+
+    inline std::string toString() const override;
+
+    void resize(uint64_t cap) override {
+      if (capacity < cap) {
+        ColumnVectorBatch::resize(cap);
+        data.resize(cap);
+      }
+    }
+
+    void clear() override {
+      numElements = 0;
+    }
+
+    uint64_t getMemoryUsage() override {
+      return ColumnVectorBatch::getMemoryUsage() +
+             static_cast<uint64_t>(data.capacity() * sizeof(FloatType));
+    }
+
+    DataBuffer<FloatType> data;
   };
 
-  struct StringVectorBatch: public ColumnVectorBatch {
+  using DoubleVectorBatch = FloatingVectorBatch<double>;
+  using FloatVectorBatch = FloatingVectorBatch<float>;
+
+  template <>
+  inline std::string DoubleVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Double vector <" << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  template <>
+  inline std::string FloatVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Float vector <" << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  struct StringVectorBatch : public ColumnVectorBatch {
     StringVectorBatch(uint64_t capacity, MemoryPool& pool);
-    virtual ~StringVectorBatch();
-    std::string toString() const;
-    void resize(uint64_t capacity);
-    void clear();
-    uint64_t getMemoryUsage();
+    ~StringVectorBatch() override;
+    std::string toString() const override;
+    void resize(uint64_t capacity) override;
+    void clear() override;
+    uint64_t getMemoryUsage() override;
 
     // pointers to the start of each string
     DataBuffer<char*> data;
@@ -152,35 +245,35 @@ namespace orc {
    */
   struct EncodedStringVectorBatch : public StringVectorBatch {
     EncodedStringVectorBatch(uint64_t capacity, MemoryPool& pool);
-    virtual ~EncodedStringVectorBatch();
-    std::string toString() const;
-    void resize(uint64_t capacity);
+    ~EncodedStringVectorBatch() override;
+    std::string toString() const override;
+    void resize(uint64_t capacity) override;
     std::shared_ptr<StringDictionary> dictionary;
 
     // index for dictionary entry
     DataBuffer<int64_t> index;
   };
 
-  struct StructVectorBatch: public ColumnVectorBatch {
+  struct StructVectorBatch : public ColumnVectorBatch {
     StructVectorBatch(uint64_t capacity, MemoryPool& pool);
-    virtual ~StructVectorBatch();
-    std::string toString() const;
-    void resize(uint64_t capacity);
-    void clear();
-    uint64_t getMemoryUsage();
-    bool hasVariableLength();
+    ~StructVectorBatch() override;
+    std::string toString() const override;
+    void resize(uint64_t capacity) override;
+    void clear() override;
+    uint64_t getMemoryUsage() override;
+    bool hasVariableLength() override;
 
     std::vector<ColumnVectorBatch*> fields;
   };
 
-  struct ListVectorBatch: public ColumnVectorBatch {
+  struct ListVectorBatch : public ColumnVectorBatch {
     ListVectorBatch(uint64_t capacity, MemoryPool& pool);
-    virtual ~ListVectorBatch();
-    std::string toString() const;
-    void resize(uint64_t capacity);
-    void clear();
-    uint64_t getMemoryUsage();
-    bool hasVariableLength();
+    ~ListVectorBatch() override;
+    std::string toString() const override;
+    void resize(uint64_t capacity) override;
+    void clear() override;
+    uint64_t getMemoryUsage() override;
+    bool hasVariableLength() override;
 
     /**
      * The offset of the first element of each list.
@@ -189,17 +282,17 @@ namespace orc {
     DataBuffer<int64_t> offsets;
 
     // the concatenated elements
-    ORC_UNIQUE_PTR<ColumnVectorBatch> elements;
+    std::unique_ptr<ColumnVectorBatch> elements;
   };
 
-  struct MapVectorBatch: public ColumnVectorBatch {
+  struct MapVectorBatch : public ColumnVectorBatch {
     MapVectorBatch(uint64_t capacity, MemoryPool& pool);
-    virtual ~MapVectorBatch();
-    std::string toString() const;
-    void resize(uint64_t capacity);
-    void clear();
-    uint64_t getMemoryUsage();
-    bool hasVariableLength();
+    ~MapVectorBatch() override;
+    std::string toString() const override;
+    void resize(uint64_t capacity) override;
+    void clear() override;
+    uint64_t getMemoryUsage() override;
+    bool hasVariableLength() override;
 
     /**
      * The offset of the first element of each map.
@@ -208,19 +301,19 @@ namespace orc {
     DataBuffer<int64_t> offsets;
 
     // the concatenated keys
-    ORC_UNIQUE_PTR<ColumnVectorBatch> keys;
+    std::unique_ptr<ColumnVectorBatch> keys;
     // the concatenated elements
-    ORC_UNIQUE_PTR<ColumnVectorBatch> elements;
+    std::unique_ptr<ColumnVectorBatch> elements;
   };
 
-  struct UnionVectorBatch: public ColumnVectorBatch {
+  struct UnionVectorBatch : public ColumnVectorBatch {
     UnionVectorBatch(uint64_t capacity, MemoryPool& pool);
-    virtual ~UnionVectorBatch();
-    std::string toString() const;
-    void resize(uint64_t capacity);
-    void clear();
-    uint64_t getMemoryUsage();
-    bool hasVariableLength();
+    ~UnionVectorBatch() override;
+    std::string toString() const override;
+    void resize(uint64_t capacity) override;
+    void clear() override;
+    uint64_t getMemoryUsage() override;
+    bool hasVariableLength() override;
 
     /**
      * For each value, which element of children has the value.
@@ -246,13 +339,13 @@ namespace orc {
     int32_t scale;
   };
 
-  struct Decimal64VectorBatch: public ColumnVectorBatch {
+  struct Decimal64VectorBatch : public ColumnVectorBatch {
     Decimal64VectorBatch(uint64_t capacity, MemoryPool& pool);
-    virtual ~Decimal64VectorBatch();
-    std::string toString() const;
-    void resize(uint64_t capacity);
-    void clear();
-    uint64_t getMemoryUsage();
+    ~Decimal64VectorBatch() override;
+    std::string toString() const override;
+    void resize(uint64_t capacity) override;
+    void clear() override;
+    uint64_t getMemoryUsage() override;
 
     // total number of digits
     int32_t precision;
@@ -262,7 +355,7 @@ namespace orc {
     // the numeric values
     DataBuffer<int64_t> values;
 
-  protected:
+   protected:
     /**
      * Contains the scales that were read from the file. Should NOT be
      * used.
@@ -272,13 +365,13 @@ namespace orc {
     friend class Decimal64ColumnWriter;
   };
 
-  struct Decimal128VectorBatch: public ColumnVectorBatch {
+  struct Decimal128VectorBatch : public ColumnVectorBatch {
     Decimal128VectorBatch(uint64_t capacity, MemoryPool& pool);
-    virtual ~Decimal128VectorBatch();
-    std::string toString() const;
-    void resize(uint64_t capacity);
-    void clear();
-    uint64_t getMemoryUsage();
+    ~Decimal128VectorBatch() override;
+    std::string toString() const override;
+    void resize(uint64_t capacity) override;
+    void clear() override;
+    uint64_t getMemoryUsage() override;
 
     // total number of digits
     int32_t precision;
@@ -288,7 +381,7 @@ namespace orc {
     // the numeric values
     DataBuffer<Int128> values;
 
-  protected:
+   protected:
     /**
      * Contains the scales that were read from the file. Should NOT be
      * used.
@@ -304,13 +397,13 @@ namespace orc {
    * The timestamps are stored split into the time_t value (seconds since
    * 1 Jan 1970 00:00:00) and the nanoseconds within the time_t value.
    */
-  struct TimestampVectorBatch: public ColumnVectorBatch {
+  struct TimestampVectorBatch : public ColumnVectorBatch {
     TimestampVectorBatch(uint64_t capacity, MemoryPool& pool);
-    virtual ~TimestampVectorBatch();
-    std::string toString() const;
-    void resize(uint64_t capacity);
-    void clear();
-    uint64_t getMemoryUsage();
+    ~TimestampVectorBatch() override;
+    std::string toString() const override;
+    void resize(uint64_t capacity) override;
+    void clear() override;
+    uint64_t getMemoryUsage() override;
 
     // the number of seconds past 1 Jan 1970 00:00 UTC (aka time_t)
     // Note that we always assume data is in GMT timezone; therefore it is
@@ -322,6 +415,6 @@ namespace orc {
     DataBuffer<int64_t> nanoseconds;
   };
 
-}
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Writer.hh b/contrib/libs/apache/orc/c++/include/orc/Writer.hh
index 78b0b97d25..047ee9ffc5 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Writer.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Writer.hh
@@ -20,10 +20,11 @@
 #define ORC_WRITER_HH
 
 #include "orc/Common.hh"
-#include "orc/orc-config.hh"
 #include "orc/Type.hh"
 #include "orc/Vector.hh"
+#include "orc/orc-config.hh"
 
+#include <atomic>
 #include <memory>
 #include <set>
 #include <string>
@@ -34,26 +35,29 @@ namespace orc {
   // classes that hold data members so we can maintain binary compatibility
   struct WriterOptionsPrivate;
 
-  enum CompressionStrategy {
-    CompressionStrategy_SPEED = 0,
-    CompressionStrategy_COMPRESSION
-  };
+  enum CompressionStrategy { CompressionStrategy_SPEED = 0, CompressionStrategy_COMPRESSION };
 
-  enum RleVersion {
-    RleVersion_1 = 0,
-    RleVersion_2 = 1
-  };
+  enum RleVersion { RleVersion_1 = 0, RleVersion_2 = 1 };
 
   class Timezone;
 
   /**
+   * Expose the IO metrics for write operation.
+   */
+  struct WriterMetrics {
+    // Record the number of IO requests written to the output file
+    std::atomic<uint64_t> IOCount{0};
+    // Record the lantency of IO blocking
+    std::atomic<uint64_t> IOBlockingLatencyUs{0};
+  };
+  /**
    * Options for creating a Writer.
    */
   class WriterOptions {
-  private:
-    ORC_UNIQUE_PTR<WriterOptionsPrivate> privateBits;
+   private:
+    std::unique_ptr<WriterOptionsPrivate> privateBits;
 
-  public:
+   public:
     WriterOptions();
     WriterOptions(const WriterOptions&);
     WriterOptions(WriterOptions&);
@@ -73,6 +77,8 @@ namespace orc {
 
     /**
      * Set the data compression block size.
+     * Should less then 1 << 23 bytes (8M) which is limited by the
+     * 3 bytes size of compression block header (1 bit for isOriginal and 23 bits for length)
      */
     WriterOptions& setCompressionBlockSize(uint64_t size);
 
@@ -83,7 +89,8 @@ namespace orc {
     uint64_t getCompressionBlockSize() const;
 
     /**
-     * Set row index stride (the number of rows per an entry in the row index). Use value 0 to disable row index.
+     * Set row index stride (the number of rows per an entry in the row index). Use value 0 to
+     * disable row index.
      */
     WriterOptions& setRowIndexStride(uint64_t stride);
 
@@ -157,13 +164,13 @@ namespace orc {
     /**
      * Set the memory pool.
      */
-    WriterOptions& setMemoryPool(MemoryPool * memoryPool);
+    WriterOptions& setMemoryPool(MemoryPool* memoryPool);
 
     /**
      * Get the memory pool.
      * @return if not set, return default memory pool.
      */
-    MemoryPool * getMemoryPool() const;
+    MemoryPool* getMemoryPool() const;
 
     /**
      * Set the error stream.
@@ -174,7 +181,7 @@ namespace orc {
      * Get the error stream.
      * @return if not set, return std::err.
      */
-    std::ostream * getErrorStream() const;
+    std::ostream* getErrorStream() const;
 
     /**
      * Get the RLE version.
@@ -235,10 +242,45 @@ namespace orc {
      * @param zone writer timezone name
      */
     WriterOptions& setTimezoneName(const std::string& zone);
+
+    /**
+     * Set the writer metrics.
+     */
+    WriterOptions& setWriterMetrics(WriterMetrics* metrics);
+
+    /**
+     * Get the writer metrics.
+     * @return if not set, return nullptr.
+     */
+    WriterMetrics* getWriterMetrics() const;
+
+    /**
+     * Set use tight numeric vectorBatch or not.
+     */
+    WriterOptions& setUseTightNumericVector(bool useTightNumericVector);
+
+    /**
+     * Get whether or not to use dedicated columnVectorBatch
+     * @return if not set, the default is false
+     */
+    bool getUseTightNumericVector() const;
+
+    /**
+     * Set the initial capacity of output buffer in the class BufferedOutputStream.
+     * Each column contains one or more BufferOutputStream depending on its type,
+     * and these buffers will automatically expand when more memory is required.
+     */
+    WriterOptions& setOutputBufferCapacity(uint64_t capacity);
+
+    /**
+     * Get the initial capacity of output buffer in the class BufferedOutputStream.
+     * @return if not set, return default value which is 1 MB.
+     */
+    uint64_t getOutputBufferCapacity() const;
   };
 
   class Writer {
-  public:
+   public:
     virtual ~Writer();
 
     /**
@@ -246,8 +288,7 @@ namespace orc {
      * @param size the number of rows to write.
      * @return a new ColumnVectorBatch to write into.
      */
-    virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size
-                                                             ) const = 0;
+    virtual std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size) const = 0;
 
     /**
      * Add a row batch into current writer.
@@ -263,8 +304,15 @@ namespace orc {
     /**
      * Add user metadata to the writer.
      */
-    virtual void addUserMetadata(const std::string name, const std::string value) = 0;
+    virtual void addUserMetadata(const std::string& name, const std::string& value) = 0;
+
+    /**
+     * Write an intermediate footer on the file such that if the file is
+     * truncated to the returned offset, it would be a valid ORC file.
+     * @return the offset that would be a valid end location for an ORC file
+     */
+    virtual uint64_t writeIntermediateFooter() = 0;
   };
-}
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/orc-config.hh b/contrib/libs/apache/orc/c++/include/orc/orc-config.hh
index b8fb9fbd4e..ab1e16fa15 100644
--- a/contrib/libs/apache/orc/c++/include/orc/orc-config.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/orc-config.hh
@@ -1,7 +1,11 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -15,14 +19,9 @@
 #ifndef ORC_CONFIG_HH
 #define ORC_CONFIG_HH
 
-#define ORC_VERSION "1.8.0"
+#define ORC_VERSION "2.0.0"
 
 #define ORC_CXX_HAS_CSTDINT
-#define ORC_CXX_HAS_INITIALIZER_LIST
-#define ORC_CXX_HAS_NOEXCEPT
-#define ORC_CXX_HAS_NULLPTR
-#define ORC_CXX_HAS_OVERRIDE
-#define ORC_CXX_HAS_UNIQUE_PTR
 
 #ifdef ORC_CXX_HAS_CSTDINT
   #include <cstdint>
@@ -30,49 +29,10 @@
   #include <stdint.h>
 #endif
 
-#ifdef ORC_CXX_HAS_NOEXCEPT
-  #define ORC_NOEXCEPT noexcept
-#else
-  #define ORC_NOEXCEPT throw ()
-#endif
-
-#ifdef ORC_CXX_HAS_NULLPTR
-  #define ORC_NULLPTR nullptr
-#else
-  namespace orc {
-    class nullptr_t {
-    public:
-      template<class T>
-      operator T*() const {
-       return 0;
-      }
-
-      template<class C, class T>
-      operator T C::*() const {
-        return 0;
-      }
-    private:
-      void operator&() const;    // whose address can't be taken
-    };
-    const nullptr_t nullptr = {};
-  }
-  #define ORC_NULLPTR orc::nullptr
-#endif
-
-#ifdef ORC_CXX_HAS_OVERRIDE
-  #define ORC_OVERRIDE override
-#else
-  #define ORC_OVERRIDE
-#endif
-
-#ifdef ORC_CXX_HAS_UNIQUE_PTR
-  #define ORC_UNIQUE_PTR std::unique_ptr
-#else
-  #define ORC_UNIQUE_PTR std::auto_ptr
-  namespace std {
-    template<typename T>
-    inline T move(T& x) { return x; }
-  }
-#endif
+// Following MACROS should be keeped for backward compatibility.
+#define ORC_NOEXCEPT noexcept
+#define ORC_NULLPTR nullptr
+#define ORC_OVERRIDE override
+#define ORC_UNIQUE_PTR std::unique_ptr
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/sargs/Literal.hh b/contrib/libs/apache/orc/c++/include/orc/sargs/Literal.hh
index 36c9b37e3f..9ce958302d 100644
--- a/contrib/libs/apache/orc/c++/include/orc/sargs/Literal.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/sargs/Literal.hh
@@ -27,21 +27,19 @@ namespace orc {
   /**
    * Possible data types for predicates
    */
-  enum class PredicateDataType {
-    LONG = 0, FLOAT, STRING, DATE, DECIMAL, TIMESTAMP, BOOLEAN
-  };
+  enum class PredicateDataType { LONG = 0, FLOAT, STRING, DATE, DECIMAL, TIMESTAMP, BOOLEAN };
 
   /**
    * Represents a literal value in a predicate
    */
   class Literal {
-  public:
+   public:
     struct Timestamp {
       Timestamp() = default;
       Timestamp(const Timestamp&) = default;
       Timestamp(Timestamp&&) = default;
       ~Timestamp() = default;
-      Timestamp(int64_t second_, int32_t nanos_): second(second_), nanos(nanos_) {
+      Timestamp(int64_t second_, int32_t nanos_) : second(second_), nanos(nanos_) {
         // PASS
       }
       Timestamp& operator=(const Timestamp&) = default;
@@ -55,15 +53,23 @@ namespace orc {
       bool operator<=(const Timestamp& r) const {
         return second < r.second || (second == r.second && nanos <= r.nanos);
       }
-      bool operator!=(const Timestamp& r) const { return !(*this == r); }
-      bool operator>(const Timestamp& r) const { return r < *this; }
-      bool operator>=(const Timestamp& r) const { return r <= *this; }
-      int64_t getMillis() const { return second * 1000 + nanos / 1000000; }
+      bool operator!=(const Timestamp& r) const {
+        return !(*this == r);
+      }
+      bool operator>(const Timestamp& r) const {
+        return r < *this;
+      }
+      bool operator>=(const Timestamp& r) const {
+        return r <= *this;
+      }
+      int64_t getMillis() const {
+        return second * 1000 + nanos / 1000000;
+      }
       int64_t second;
       int32_t nanos;
     };
 
-    Literal(const Literal &r);
+    Literal(const Literal& r);
     ~Literal();
     Literal& operator=(const Literal& r);
     bool operator==(const Literal& r) const;
@@ -102,7 +108,7 @@ namespace orc {
     /**
      * Create a literal of STRING type
      */
-    Literal(const char * str, size_t size);
+    Literal(const char* str, size_t size);
 
     /**
      * Create a literal of DECIMAL type
@@ -123,38 +129,44 @@ namespace orc {
     /**
      * Check if a literal is null
      */
-    bool isNull() const { return mIsNull; }
+    bool isNull() const {
+      return mIsNull;
+    }
 
-    PredicateDataType getType() const { return mType; }
+    PredicateDataType getType() const {
+      return mType;
+    }
     std::string toString() const;
-    size_t getHashCode() const { return mHashCode; }
+    size_t getHashCode() const {
+      return mHashCode;
+    }
 
-  private:
+   private:
     size_t hashCode() const;
 
     union LiteralVal {
       int64_t IntVal;
       double DoubleVal;
       int64_t DateVal;
-      char * Buffer;
+      char* Buffer;
       Timestamp TimeStampVal;
       Int128 DecimalVal;
       bool BooleanVal;
 
       // explicitly define default constructor
-      LiteralVal(): DecimalVal(0) {}
+      LiteralVal() : DecimalVal(0) {}
     };
 
-  private:
-    LiteralVal mValue;       // data value for this literal if not null
-    PredicateDataType mType; // data type of the literal
-    size_t mSize;            // size of mValue if it is Buffer
-    int32_t mPrecision;      // precision of decimal type
-    int32_t mScale;          // scale of decimal type
-    bool mIsNull;            // whether this literal is null
-    size_t mHashCode;        // precomputed hash code for the literal
+   private:
+    LiteralVal mValue;        // data value for this literal if not null
+    PredicateDataType mType;  // data type of the literal
+    size_t mSize;             // size of mValue if it is Buffer
+    int32_t mPrecision;       // precision of decimal type
+    int32_t mScale;           // scale of decimal type
+    bool mIsNull;             // whether this literal is null
+    size_t mHashCode;         // precomputed hash code for the literal
   };
 
-} // namespace orc
+}  // namespace orc
 
-#endif //ORC_LITERAL_HH
+#endif  // ORC_LITERAL_HH
diff --git a/contrib/libs/apache/orc/c++/include/orc/sargs/SearchArgument.hh b/contrib/libs/apache/orc/c++/include/orc/sargs/SearchArgument.hh
index 44fde8f5e9..6493840a92 100644
--- a/contrib/libs/apache/orc/c++/include/orc/sargs/SearchArgument.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/sargs/SearchArgument.hh
@@ -34,7 +34,7 @@ namespace orc {
    * (<a href="http://en.wikipedia.org/wiki/Conjunctive_normal_form">CNF</a>).
    */
   class SearchArgument {
-  public:
+   public:
     virtual ~SearchArgument();
 
     /**
@@ -52,7 +52,7 @@ namespace orc {
    * must call startOr, startAnd, or startNot before adding any leaves.
    */
   class SearchArgumentBuilder {
-  public:
+   public:
     virtual ~SearchArgumentBuilder();
 
     /**
@@ -87,8 +87,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    virtual SearchArgumentBuilder& lessThan(const std::string& column,
-                                            PredicateDataType type,
+    virtual SearchArgumentBuilder& lessThan(const std::string& column, PredicateDataType type,
                                             Literal literal) = 0;
 
     /**
@@ -98,8 +97,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    virtual SearchArgumentBuilder& lessThan(uint64_t columnId,
-                                            PredicateDataType type,
+    virtual SearchArgumentBuilder& lessThan(uint64_t columnId, PredicateDataType type,
                                             Literal literal) = 0;
 
     /**
@@ -109,8 +107,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    virtual SearchArgumentBuilder& lessThanEquals(const std::string& column,
-                                                  PredicateDataType type,
+    virtual SearchArgumentBuilder& lessThanEquals(const std::string& column, PredicateDataType type,
                                                   Literal literal) = 0;
 
     /**
@@ -120,8 +117,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    virtual SearchArgumentBuilder& lessThanEquals(uint64_t columnId,
-                                                  PredicateDataType type,
+    virtual SearchArgumentBuilder& lessThanEquals(uint64_t columnId, PredicateDataType type,
                                                   Literal literal) = 0;
 
     /**
@@ -131,8 +127,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    virtual SearchArgumentBuilder& equals(const std::string& column,
-                                          PredicateDataType type,
+    virtual SearchArgumentBuilder& equals(const std::string& column, PredicateDataType type,
                                           Literal literal) = 0;
 
     /**
@@ -142,8 +137,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    virtual SearchArgumentBuilder& equals(uint64_t columnId,
-                                          PredicateDataType type,
+    virtual SearchArgumentBuilder& equals(uint64_t columnId, PredicateDataType type,
                                           Literal literal) = 0;
 
     /**
@@ -153,8 +147,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    virtual SearchArgumentBuilder& nullSafeEquals(const std::string& column,
-                                                  PredicateDataType type,
+    virtual SearchArgumentBuilder& nullSafeEquals(const std::string& column, PredicateDataType type,
                                                   Literal literal) = 0;
 
     /**
@@ -164,8 +157,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    virtual SearchArgumentBuilder& nullSafeEquals(uint64_t columnId,
-                                                  PredicateDataType type,
+    virtual SearchArgumentBuilder& nullSafeEquals(uint64_t columnId, PredicateDataType type,
                                                   Literal literal) = 0;
 
     /**
@@ -175,8 +167,7 @@ namespace orc {
      * @param literals the literals
      * @return this
      */
-    virtual SearchArgumentBuilder& in(const std::string& column,
-                                      PredicateDataType type,
+    virtual SearchArgumentBuilder& in(const std::string& column, PredicateDataType type,
                                       const std::initializer_list<Literal>& literals) = 0;
 
     /**
@@ -186,8 +177,7 @@ namespace orc {
      * @param literals the literals
      * @return this
      */
-    virtual SearchArgumentBuilder& in(uint64_t columnId,
-                                      PredicateDataType type,
+    virtual SearchArgumentBuilder& in(uint64_t columnId, PredicateDataType type,
                                       const std::initializer_list<Literal>& literals) = 0;
 
     /**
@@ -197,8 +187,7 @@ namespace orc {
      * @param literals the literals
      * @return this
      */
-    virtual SearchArgumentBuilder& in(const std::string& column,
-                                      PredicateDataType type,
+    virtual SearchArgumentBuilder& in(const std::string& column, PredicateDataType type,
                                       const std::vector<Literal>& literals) = 0;
 
     /**
@@ -208,8 +197,7 @@ namespace orc {
      * @param literals the literals
      * @return this
      */
-    virtual SearchArgumentBuilder& in(uint64_t columnId,
-                                      PredicateDataType type,
+    virtual SearchArgumentBuilder& in(uint64_t columnId, PredicateDataType type,
                                       const std::vector<Literal>& literals) = 0;
 
     /**
@@ -218,8 +206,7 @@ namespace orc {
      * @param type the type of the expression
      * @return this
      */
-    virtual SearchArgumentBuilder& isNull(const std::string& column,
-                                          PredicateDataType type) = 0;
+    virtual SearchArgumentBuilder& isNull(const std::string& column, PredicateDataType type) = 0;
 
     /**
      * Add an is null leaf to the current item on the stack.
@@ -227,8 +214,7 @@ namespace orc {
      * @param type the type of the expression
      * @return this
      */
-    virtual SearchArgumentBuilder& isNull(uint64_t columnId,
-                                          PredicateDataType type) = 0;
+    virtual SearchArgumentBuilder& isNull(uint64_t columnId, PredicateDataType type) = 0;
 
     /**
      * Add a between leaf to the current item on the stack.
@@ -238,10 +224,8 @@ namespace orc {
      * @param upper the literal
      * @return this
      */
-    virtual SearchArgumentBuilder& between(const std::string& column,
-                                           PredicateDataType type,
-                                           Literal lower,
-                                           Literal upper) = 0;
+    virtual SearchArgumentBuilder& between(const std::string& column, PredicateDataType type,
+                                           Literal lower, Literal upper) = 0;
 
     /**
      * Add a between leaf to the current item on the stack.
@@ -251,9 +235,7 @@ namespace orc {
      * @param upper the literal
      * @return this
      */
-    virtual SearchArgumentBuilder& between(uint64_t columnId,
-                                           PredicateDataType type,
-                                           Literal lower,
+    virtual SearchArgumentBuilder& between(uint64_t columnId, PredicateDataType type, Literal lower,
                                            Literal upper) = 0;
 
     /**
@@ -275,10 +257,10 @@ namespace orc {
    * Factory to create SearchArgumentBuilder which builds SearchArgument
    */
   class SearchArgumentFactory {
-  public:
+   public:
     static std::unique_ptr<SearchArgumentBuilder> newBuilder();
   };
 
-} // namespace orc
+}  // namespace orc
 
-#endif //ORC_SEARCHARGUMENT_HH
+#endif  // ORC_SEARCHARGUMENT_HH
diff --git a/contrib/libs/apache/orc/c++/include/orc/sargs/TruthValue.hh b/contrib/libs/apache/orc/c++/include/orc/sargs/TruthValue.hh
index b3ea6b76ce..fa3dce06f8 100644
--- a/contrib/libs/apache/orc/c++/include/orc/sargs/TruthValue.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/sargs/TruthValue.hh
@@ -25,13 +25,13 @@ namespace orc {
    * The potential result sets of logical operations.
    */
   enum class TruthValue {
-      YES,        // all rows satisfy the predicate
-      NO,         // all rows dissatisfy the predicate
-      IS_NULL,    // all rows are null value
-      YES_NULL,   // null values exist, not-null rows satisfy the predicate
-      NO_NULL,    // null values exist, not-null rows dissatisfy the predicate
-      YES_NO,     // some rows satisfy the predicate and the others not
-      YES_NO_NULL // null values exist, some rows satisfy predicate and some not
+    YES,         // all rows satisfy the predicate
+    NO,          // all rows dissatisfy the predicate
+    IS_NULL,     // all rows are null value
+    YES_NULL,    // null values exist, not-null rows satisfy the predicate
+    NO_NULL,     // null values exist, not-null rows dissatisfy the predicate
+    YES_NO,      // some rows satisfy the predicate and the others not
+    YES_NO_NULL  // null values exist, some rows satisfy predicate and some not
   };
 
   // Compute logical or between the two values.
@@ -46,6 +46,6 @@ namespace orc {
   // Do we need to read the data based on the TruthValue?
   bool isNeeded(TruthValue val);
 
-} // namespace orc
+}  // namespace orc
 
-#endif //ORC_TRUTHVALUE_HH
+#endif  // ORC_TRUTHVALUE_HH
diff --git a/contrib/libs/apache/orc/c++/src/Adaptor-linux.hh b/contrib/libs/apache/orc/c++/src/Adaptor-linux.hh
index 625c1befb2..b11cdf74cd 100644
--- a/contrib/libs/apache/orc/c++/src/Adaptor-linux.hh
+++ b/contrib/libs/apache/orc/c++/src/Adaptor-linux.hh
@@ -19,20 +19,15 @@
 #ifndef ADAPTER_HH
 #define ADAPTER_HH
 
-/* #undef INT64_IS_LL */
-#define HAS_CONSTEXPR
 #define HAS_PREAD
 #define HAS_STRPTIME
-#define HAS_STOLL
 #define HAS_DIAGNOSTIC_PUSH
 #define HAS_DOUBLE_TO_STRING
 #define HAS_INT64_TO_STRING
 #define HAS_PRE_1970
 #define HAS_POST_2038
 #define HAS_STD_ISNAN
-#define HAS_STD_MUTEX
 #define HAS_BUILTIN_OVERFLOW_CHECK
-/* #undef NEEDS_REDUNDANT_MOVE */
 /* #undef NEEDS_Z_PREFIX */
 
 #include "orc/orc-config.hh"
@@ -46,13 +41,6 @@ typedef SSIZE_T ssize_t;
 #define asctime_r(tm, buf) (asctime_s(buf, 26, tm) ? NULL : buf)
 #endif
 
-#ifndef HAS_STOLL
-  // A poor man's stoll that converts str to a long long int base 10
-  namespace std {
-    int64_t stoll(std::string str);
-  }
-#endif
-
 #ifndef HAS_STRPTIME
   char* strptime(const char* buf, const char* format, struct tm* tm);
 #endif
@@ -61,20 +49,6 @@ typedef SSIZE_T ssize_t;
   ssize_t pread(int fd, void* buf, size_t count, off_t offset);
 #endif
 
-#ifdef INT64_IS_LL
-  #define INT64_FORMAT_STRING "ll"
-#else
-  #define INT64_FORMAT_STRING "l"
-#endif
-
-#ifndef ORC_CXX_HAS_NOEXCEPT
-  #define noexcept ORC_NOEXCEPT
-#endif
-
-#ifndef ORC_CXX_HAS_OVERRIDE
-  #define override ORC_OVERRIDE
-#endif
-
 #ifdef HAS_DIAGNOSTIC_PUSH
   #ifdef __clang__
     #define DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
@@ -105,10 +79,6 @@ typedef SSIZE_T ssize_t;
   #define DIAGNOSTIC_IGNORE(XXX)
 #endif
 
-#ifndef ORC_CXX_HAS_UNIQUE_PTR
-  #define unique_ptr auto_ptr
-#endif
-
 #ifndef UINT32_MAX
   #define UINT32_MAX 0xffffffff
 #endif
@@ -123,12 +93,6 @@ typedef SSIZE_T ssize_t;
 
 #define GTEST_LANG_CXX11 0
 
-#ifdef NEEDS_REDUNDANT_MOVE
-  #define REDUNDANT_MOVE(XXX) std::move(XXX)
-#else
-  #define REDUNDANT_MOVE(XXX) XXX
-#endif
-
 #ifndef HAS_STD_ISNAN
   #include <math.h>
   #define std::isnan(XXX) isnan(XXX)
@@ -136,34 +100,7 @@ typedef SSIZE_T ssize_t;
   #include <cmath>
 #endif
 
-#ifndef HAS_STD_MUTEX
-  #include <pthread.h>
-  namespace orc {
-    /**
-     * Lock guard for pthread_mutex_t object using RAII
-     * The Lock is automatically release when exiting current scope.
-     */
-    class LockORC {
-      public:
-        explicit LockORC(pthread_mutex_t& mutex) : mutex_ref_(mutex) {
-          pthread_mutex_lock(&mutex_ref_);
-        }
-        ~LockORC() { pthread_mutex_unlock(&mutex_ref_); }
-      private:
-        // no default constructor
-        LockORC();
-        // prohibit copying
-        LockORC(const LockORC&);
-        LockORC& operator=(const LockORC&);
-
-        pthread_mutex_t& mutex_ref_;
-    };
-  }
-  #define std::mutex pthread_mutex_t
-  #define std::lock_guard<std::mutex> LockORC
-#else
-  #include <mutex>
-#endif
+#include <mutex>
 
 #ifdef NEEDS_Z_PREFIX
 #define Z_PREFIX 1
@@ -208,8 +145,4 @@ namespace orc {
 }
 #endif
 
-#ifndef HAS_CONSTEXPR
-#define constexpr const
-#endif
-
 #endif /* ADAPTER_HH */
diff --git a/contrib/libs/apache/orc/c++/src/Adaptor.cc b/contrib/libs/apache/orc/c++/src/Adaptor.cc
index bf3a3e181b..d9390131b6 100644
--- a/contrib/libs/apache/orc/c++/src/Adaptor.cc
+++ b/contrib/libs/apache/orc/c++/src/Adaptor.cc
@@ -1,36 +1,24 @@
 /**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #include "Adaptor.hh"
-#include <sstream>
 #include <iomanip>
-
-#ifndef HAS_STOLL
-namespace std {
-  int64_t std::stoll(std::string str) {
-    int64_t val = 0;
-    stringstream ss;
-    ss << str;
-    ss >> val;
-    return val;
-  }
-}
-#endif
+#include <sstream>
 
 #ifndef HAS_STRPTIME
 char* strptime(const char* s, const char* f, struct tm* tm) {
@@ -43,7 +31,7 @@ char* strptime(const char* s, const char* f, struct tm* tm) {
 #endif
 
 #ifndef HAS_PREAD
-  #ifdef _WIN32
+#ifdef _WIN32
 #include <Windows.h>
 #include <io.h>
 ssize_t pread(int fd, void* buf, size_t size, off_t offset) {
@@ -60,9 +48,9 @@ ssize_t pread(int fd, void* buf, size_t size, off_t offset) {
   }
   return static_cast<ssize_t>(rt);
 }
-  #else
-    #error("pread() undefined: unknown environment")
-  #endif
+#else
+#error("pread() undefined: unknown environment")
+#endif
 #endif
 
 namespace orc {
@@ -85,4 +73,4 @@ namespace orc {
     return std::to_string(static_cast<long long int>(val));
   }
 #endif
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/BlockBuffer.cc b/contrib/libs/apache/orc/c++/src/BlockBuffer.cc
new file mode 100644
index 0000000000..1f7843fad7
--- /dev/null
+++ b/contrib/libs/apache/orc/c++/src/BlockBuffer.cc
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BlockBuffer.hh"
+#include "orc/OrcFile.hh"
+#include "orc/Writer.hh"
+
+#include <algorithm>
+
+namespace orc {
+
+  BlockBuffer::BlockBuffer(MemoryPool& pool, uint64_t _blockSize)
+      : memoryPool(pool), currentSize(0), currentCapacity(0), blockSize(_blockSize) {
+    if (blockSize == 0) {
+      throw std::logic_error("Block size cannot be zero");
+    }
+    reserve(blockSize);
+  }
+
+  BlockBuffer::~BlockBuffer() {
+    for (size_t i = 0; i < blocks.size(); ++i) {
+      memoryPool.free(blocks[i]);
+    }
+    blocks.clear();
+    currentSize = currentCapacity = 0;
+  }
+
+  BlockBuffer::Block BlockBuffer::getBlock(uint64_t blockIndex) const {
+    if (blockIndex >= getBlockNumber()) {
+      throw std::out_of_range("Block index out of range");
+    }
+    return Block(blocks[blockIndex], std::min(currentSize - blockIndex * blockSize, blockSize));
+  }
+
+  BlockBuffer::Block BlockBuffer::getNextBlock() {
+    if (currentSize < currentCapacity) {
+      Block emptyBlock(blocks[currentSize / blockSize] + currentSize % blockSize,
+                       blockSize - currentSize % blockSize);
+      currentSize = (currentSize / blockSize + 1) * blockSize;
+      return emptyBlock;
+    } else {
+      resize(currentSize + blockSize);
+      return Block(blocks.back(), blockSize);
+    }
+  }
+
+  void BlockBuffer::resize(uint64_t size) {
+    reserve(size);
+    if (currentCapacity >= size) {
+      currentSize = size;
+    } else {
+      throw std::logic_error("Block buffer resize error");
+    }
+  }
+
+  void BlockBuffer::reserve(uint64_t newCapacity) {
+    while (currentCapacity < newCapacity) {
+      char* newBlockPtr = memoryPool.malloc(blockSize);
+      if (newBlockPtr != nullptr) {
+        blocks.push_back(newBlockPtr);
+        currentCapacity += blockSize;
+      } else {
+        break;
+      }
+    }
+  }
+
+  void BlockBuffer::writeTo(OutputStream* output, WriterMetrics* metrics) {
+    if (currentSize == 0) {
+      return;
+    }
+    static uint64_t MAX_CHUNK_SIZE = 1024 * 1024 * 1024;
+    uint64_t chunkSize = std::min(output->getNaturalWriteSize(), MAX_CHUNK_SIZE);
+    if (chunkSize == 0) {
+      throw std::logic_error("Natural write size cannot be zero");
+    }
+    uint64_t ioCount = 0;
+    uint64_t blockNumber = getBlockNumber();
+    // if only exists one block, currentSize is equal to first block size
+    if (blockNumber == 1 && currentSize <= chunkSize) {
+      Block block = getBlock(0);
+      output->write(block.data, block.size);
+      ++ioCount;
+    } else {
+      char* chunk = memoryPool.malloc(chunkSize);
+      uint64_t chunkOffset = 0;
+      for (uint64_t i = 0; i < blockNumber; ++i) {
+        Block block = getBlock(i);
+        uint64_t blockOffset = 0;
+        while (blockOffset < block.size) {
+          // copy current block into chunk
+          uint64_t copySize = std::min(chunkSize - chunkOffset, block.size - blockOffset);
+          memcpy(chunk + chunkOffset, block.data + blockOffset, copySize);
+          chunkOffset += copySize;
+          blockOffset += copySize;
+
+          // chunk is full
+          if (chunkOffset >= chunkSize) {
+            output->write(chunk, chunkSize);
+            chunkOffset = 0;
+            ++ioCount;
+          }
+        }
+      }
+      if (chunkOffset != 0) {
+        output->write(chunk, chunkOffset);
+        ++ioCount;
+      }
+      memoryPool.free(chunk);
+    }
+
+    if (metrics != nullptr) {
+      metrics->IOCount.fetch_add(ioCount);
+    }
+  }
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/BlockBuffer.hh b/contrib/libs/apache/orc/c++/src/BlockBuffer.hh
new file mode 100644
index 0000000000..0f5f78e3fe
--- /dev/null
+++ b/contrib/libs/apache/orc/c++/src/BlockBuffer.hh
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_BLOCK_BUFFER_HH
+#define ORC_BLOCK_BUFFER_HH
+
+#include "orc/MemoryPool.hh"
+
+#include <vector>
+
+namespace orc {
+
+  class OutputStream;
+  struct WriterMetrics;
+  /**
+   * BlockBuffer implements a memory allocation policy based on
+   * equal-length blocks. BlockBuffer will reserve multiple blocks
+   * for allocation.
+   */
+  class BlockBuffer {
+   private:
+    MemoryPool& memoryPool;
+    // current buffer size
+    uint64_t currentSize;
+    // maximal capacity (actual allocated memory)
+    uint64_t currentCapacity;
+    // unit for buffer expansion
+    const uint64_t blockSize;
+    // pointers to the start of each block
+    std::vector<char*> blocks;
+
+    // non-copy-constructible
+    BlockBuffer(BlockBuffer& buffer) = delete;
+    BlockBuffer& operator=(BlockBuffer& buffer) = delete;
+    BlockBuffer(BlockBuffer&& buffer) = delete;
+    BlockBuffer& operator=(BlockBuffer&& buffer) = delete;
+
+   public:
+    BlockBuffer(MemoryPool& pool, uint64_t blockSize);
+
+    ~BlockBuffer();
+
+    /**
+     * Block points to a section of memory allocated by BlockBuffer,
+     * containing the corresponding physical memory address and available size.
+     */
+    struct Block {
+      // the start of block
+      char* data;
+      // number of bytes available at data
+      uint64_t size;
+
+      Block() : data(nullptr), size(0) {}
+      Block(char* _data, uint64_t _size) : data(_data), size(_size) {}
+      Block(const Block& block) = default;
+      ~Block() = default;
+    };
+
+    /**
+     * Get the allocated block object.
+     * The last allocated block size may be less than blockSize,
+     * and the rest of the blocks are all of size blockSize.
+     * @param blockIndex the index of blocks
+     * @return the allocated block object
+     */
+    Block getBlock(uint64_t blockIndex) const;
+
+    /**
+     * Get a empty block or allocate a new block to write.
+     * If the last allocated block size is less than blockSize,
+     * the size of empty block is equal to blockSize minus the size of
+     * the last allocated block size. Otherwise, the size of
+     * the empty block is equal to blockSize.
+     * @return a empty block object
+     */
+    Block getNextBlock();
+
+    /**
+     * Get the number of blocks that are fully or partially occupied
+     */
+    uint64_t getBlockNumber() const {
+      return (currentSize + blockSize - 1) / blockSize;
+    }
+
+    uint64_t size() const {
+      return currentSize;
+    }
+
+    uint64_t capacity() const {
+      return currentCapacity;
+    }
+
+    void resize(uint64_t size);
+    /**
+     * Requests the BlockBuffer to contain at least newCapacity bytes.
+     * Reallocation happens if there is need of more space.
+     * @param newCapacity new capacity of BlockBuffer
+     */
+    void reserve(uint64_t newCapacity);
+    /**
+     * Write the BlockBuffer content into OutputStream
+     * @param output the output stream to write to
+     * @param metrics the metrics of the writer
+     */
+    void writeTo(OutputStream* output, WriterMetrics* metrics);
+  };
+}  // namespace orc
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/BloomFilter.cc b/contrib/libs/apache/orc/c++/src/BloomFilter.cc
index 8a1f1880e7..882c6f4252 100644
--- a/contrib/libs/apache/orc/c++/src/BloomFilter.cc
+++ b/contrib/libs/apache/orc/c++/src/BloomFilter.cc
@@ -22,11 +22,14 @@
 namespace orc {
 
   constexpr uint64_t BITS_OF_LONG = 64;
-  constexpr uint8_t  SHIFT_6_BITS = 6;
-  constexpr uint8_t  SHIFT_3_BITS = 3;
+  constexpr uint8_t SHIFT_6_BITS = 6;
+  constexpr uint8_t SHIFT_3_BITS = 3;
 
   static bool isLittleEndian() {
-    static union { uint32_t i; char c[4]; } num = { 0x01020304 };
+    static union {
+      uint32_t i;
+      char c[4];
+    } num = {0x01020304};
     return num.c[0] == 4;
   }
 
@@ -34,11 +37,10 @@ namespace orc {
    * Implementation of BitSet
    */
   BitSet::BitSet(uint64_t numBits) {
-    mData.resize(static_cast<size_t>(ceil(
-      static_cast<double>(numBits) / BITS_OF_LONG)), 0);
+    mData.resize(static_cast<size_t>(ceil(static_cast<double>(numBits) / BITS_OF_LONG)), 0);
   }
 
-  BitSet::BitSet(const uint64_t * bits, uint64_t numBits) {
+  BitSet::BitSet(const uint64_t* bits, uint64_t numBits) {
     // caller should make sure numBits is multiple of 64
     mData.resize(numBits >> SHIFT_6_BITS, 0);
     memcpy(mData.data(), bits, numBits >> SHIFT_3_BITS);
@@ -59,8 +61,8 @@ namespace orc {
   void BitSet::merge(const BitSet& other) {
     if (mData.size() != other.mData.size()) {
       std::stringstream ss;
-      ss << "BitSet must be of equal length ("
-         << mData.size() << " != " << other.mData.size() << ")";
+      ss << "BitSet must be of equal length (" << mData.size() << " != " << other.mData.size()
+         << ")";
       throw std::logic_error(ss.str());
     }
 
@@ -73,7 +75,7 @@ namespace orc {
     memset(mData.data(), 0, sizeof(uint64_t) * mData.size());
   }
 
-  const uint64_t * BitSet::getData() const {
+  const uint64_t* BitSet::getData() const {
     return mData.data();
   }
 
@@ -92,8 +94,8 @@ namespace orc {
 
   int32_t optimalNumOfHashFunctions(uint64_t expectedEntries, uint64_t numBits) {
     double n = static_cast<double>(expectedEntries);
-    return std::max<int32_t>(1, static_cast<int32_t>(
-      std::round(static_cast<double>(numBits) / n * std::log(2.0))));
+    return std::max<int32_t>(
+        1, static_cast<int32_t>(std::round(static_cast<double>(numBits) / n * std::log(2.0))));
   }
 
   int32_t optimalNumOfBits(uint64_t expectedEntries, double fpp) {
@@ -108,23 +110,20 @@ namespace orc {
   // probability'
   // Lets split up 64-bit hashcode into two 32-bit hash codes and employ
   // the technique mentioned in the above paper
-  inline uint64_t getBytesHash(const char * data, int64_t length) {
+  inline uint64_t getBytesHash(const char* data, int64_t length) {
     if (data == nullptr) {
       return Murmur3::NULL_HASHCODE;
     }
 
-    return Murmur3::hash64(reinterpret_cast<const uint8_t *>(data),
-                           static_cast<uint32_t>(length));
+    return Murmur3::hash64(reinterpret_cast<const uint8_t*>(data), static_cast<uint32_t>(length));
   }
 
   /**
    * Implementation of BloomFilter
    */
   BloomFilterImpl::BloomFilterImpl(uint64_t expectedEntries, double fpp) {
-    checkArgument(expectedEntries > 0,
-                  "expectedEntries should be > 0");
-    checkArgument(fpp > 0.0 && fpp < 1.0,
-                  "False positive probability should be > 0.0 & < 1.0");
+    checkArgument(expectedEntries > 0, "expectedEntries should be > 0");
+    checkArgument(fpp > 0.0 && fpp < 1.0, "False positive probability should be > 0.0 & < 1.0");
 
     uint64_t nb = static_cast<uint64_t>(optimalNumOfBits(expectedEntries, fpp));
     // make 'mNumBits' multiple of 64
@@ -133,7 +132,7 @@ namespace orc {
     mBitSet.reset(new BitSet(mNumBits));
   }
 
-  void BloomFilterImpl::addBytes(const char * data, int64_t length) {
+  void BloomFilterImpl::addBytes(const char* data, int64_t length) {
     uint64_t hash64 = getBytesHash(data, length);
     addHash(static_cast<int64_t>(hash64));
   }
@@ -142,7 +141,7 @@ namespace orc {
     addHash(getLongHash(data));
   }
 
-  bool BloomFilterImpl::testBytes(const char * data, int64_t length) const {
+  bool BloomFilterImpl::testBytes(const char* data, int64_t length) const {
     uint64_t hash64 = getBytesHash(data, length);
     return testHash(static_cast<int64_t>(hash64));
   }
@@ -176,13 +175,13 @@ namespace orc {
   // caller should make sure input proto::BloomFilter is valid since
   // no check will be performed in the following constructor
   BloomFilterImpl::BloomFilterImpl(const proto::BloomFilter& bloomFilter) {
-    mNumHashFunctions = static_cast<int32_t>(bloomFilter.numhashfunctions());
+    mNumHashFunctions = static_cast<int32_t>(bloomFilter.num_hash_functions());
 
     const std::string& bitsetStr = bloomFilter.utf8bitset();
     mNumBits = bitsetStr.size() << SHIFT_3_BITS;
     checkArgument(mNumBits % BITS_OF_LONG == 0, "numBits should be multiple of 64!");
 
-    const uint64_t * bitset = reinterpret_cast<const uint64_t *>(bitsetStr.data());
+    const uint64_t* bitset = reinterpret_cast<const uint64_t*>(bitsetStr.data());
     if (isLittleEndian()) {
       mBitSet.reset(new BitSet(bitset, mNumBits));
     } else {
@@ -204,7 +203,7 @@ namespace orc {
     addLong(reinterpret_cast<int64_t&>(data));
   }
 
-  bool BloomFilterImpl::testDouble(double data) const{
+  bool BloomFilterImpl::testDouble(double data) const {
     return testLong(reinterpret_cast<int64_t&>(data));
   }
 
@@ -227,7 +226,7 @@ namespace orc {
     }
   }
 
-  bool BloomFilterImpl::testHash(int64_t hash64) const{
+  bool BloomFilterImpl::testHash(int64_t hash64) const {
     int32_t hash1 = static_cast<int32_t>(hash64 & 0xffffffff);
     // In Java codes, we use "hash64 >>> 32" which is an unsigned shift op.
     // So we cast hash64 to uint64_t here for an unsigned right shift.
@@ -251,10 +250,8 @@ namespace orc {
     if (mNumBits != other.mNumBits || mNumHashFunctions != other.mNumHashFunctions) {
       std::stringstream ss;
       ss << "BloomFilters are not compatible for merging: "
-         << "this: numBits:" << mNumBits
-         << ",numHashFunctions:" << mNumHashFunctions
-         << ", that: numBits:" << other.mNumBits
-         << ",numHashFunctions:" << other.mNumHashFunctions;
+         << "this: numBits:" << mNumBits << ",numHashFunctions:" << mNumHashFunctions
+         << ", that: numBits:" << other.mNumBits << ",numHashFunctions:" << other.mNumHashFunctions;
       throw std::logic_error(ss.str());
     }
 
@@ -266,17 +263,17 @@ namespace orc {
   }
 
   void BloomFilterImpl::serialize(proto::BloomFilter& bloomFilter) const {
-    bloomFilter.set_numhashfunctions(static_cast<uint32_t>(mNumHashFunctions));
+    bloomFilter.set_num_hash_functions(static_cast<uint32_t>(mNumHashFunctions));
 
     // According to ORC standard, the encoding is a sequence of bytes with
     // a little endian encoding in the utf8bitset field.
     if (isLittleEndian()) {
       // bytes are already organized in little endian; thus no conversion needed
-      const char * bitset = reinterpret_cast<const char *>(mBitSet->getData());
+      const char* bitset = reinterpret_cast<const char*>(mBitSet->getData());
       bloomFilter.set_utf8bitset(bitset, sizeInBytes());
     } else {
       std::vector<uint64_t> bitset(sizeInBytes() / sizeof(uint64_t), 0);
-      const uint64_t * longs = mBitSet->getData();
+      const uint64_t* longs = mBitSet->getData();
       for (size_t i = 0; i != bitset.size(); ++i) {
         uint64_t& dst = bitset[i];
         const uint64_t src = longs[i];
@@ -290,8 +287,7 @@ namespace orc {
   }
 
   bool BloomFilterImpl::operator==(const BloomFilterImpl& other) const {
-    return mNumBits == other.mNumBits &&
-           mNumHashFunctions == other.mNumHashFunctions &&
+    return mNumBits == other.mNumBits && mNumHashFunctions == other.mNumHashFunctions &&
            *mBitSet == *other.mBitSet;
   }
 
@@ -300,29 +296,24 @@ namespace orc {
   }
 
   std::unique_ptr<BloomFilter> BloomFilterUTF8Utils::deserialize(
-    const proto::Stream_Kind& streamKind,
-    const proto::ColumnEncoding& encoding,
-    const proto::BloomFilter& bloomFilter) {
-
-    std::unique_ptr<BloomFilter> ret(nullptr);
-
+      const proto::Stream_Kind& streamKind, const proto::ColumnEncoding& encoding,
+      const proto::BloomFilter& bloomFilter) {
     // only BLOOM_FILTER_UTF8 is supported
     if (streamKind != proto::Stream_Kind_BLOOM_FILTER_UTF8) {
-      return ret;
+      return nullptr;
     }
 
     // make sure we don't use unknown encodings or original timestamp encodings
-    if (!encoding.has_bloomencoding() || encoding.bloomencoding() != 1) {
-      return ret;
+    if (!encoding.has_bloom_encoding() || encoding.bloom_encoding() != 1) {
+      return nullptr;
     }
 
     // make sure all required fields exist
-    if (!bloomFilter.has_numhashfunctions() || !bloomFilter.has_utf8bitset()) {
-      return ret;
+    if (!bloomFilter.has_num_hash_functions() || !bloomFilter.has_utf8bitset()) {
+      return nullptr;
     }
 
-    ret.reset(new BloomFilterImpl(bloomFilter));
-    return ret;
+    return std::make_unique<BloomFilterImpl>(bloomFilter);
   }
 
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/BloomFilter.hh b/contrib/libs/apache/orc/c++/src/BloomFilter.hh
index cf18a46fd9..d72961a83c 100644
--- a/contrib/libs/apache/orc/c++/src/BloomFilter.hh
+++ b/contrib/libs/apache/orc/c++/src/BloomFilter.hh
@@ -33,7 +33,7 @@ namespace orc {
    * for index bounds nor expand the bit set size if the specified index is greater than the size.
    */
   class BitSet {
-  public:
+   public:
     /**
      * Creates an empty BitSet
      *
@@ -47,7 +47,7 @@ namespace orc {
      * @param bits - serialized uint64_t buffer of bitset
      * @param numBits - number of bits used
      */
-    BitSet(const uint64_t * bits, uint64_t numBits);
+    BitSet(const uint64_t* bits, uint64_t numBits);
 
     /**
      * Sets the bit at specified index.
@@ -82,14 +82,14 @@ namespace orc {
     /**
      * Gets underlying raw data
      */
-    const uint64_t * getData() const;
+    const uint64_t* getData() const;
 
     /**
      * Compares two BitSets
      */
     bool operator==(const BitSet& other) const;
 
-  private:
+   private:
     std::vector<uint64_t> mData;
   };
 
@@ -120,14 +120,14 @@ namespace orc {
    * BloomFilterUtf8, which always uses UTF8 for the encoding.
    */
   class BloomFilterImpl : public BloomFilter {
-  public:
+   public:
     /**
      * Creates an empty BloomFilter
      *
      * @param expectedEntries - number of entries it will hold
      * @param fpp - false positive probability
      */
-    BloomFilterImpl(uint64_t expectedEntries, double fpp=DEFAULT_FPP);
+    BloomFilterImpl(uint64_t expectedEntries, double fpp = DEFAULT_FPP);
 
     /**
      * Creates a BloomFilter by deserializing the proto-buf version
@@ -139,14 +139,14 @@ namespace orc {
     /**
      * Adds a new element to the BloomFilter
      */
-    void addBytes(const char * data, int64_t length);
+    void addBytes(const char* data, int64_t length);
     void addLong(int64_t data);
     void addDouble(double data);
 
     /**
      * Test if the element exists in BloomFilter
      */
-    bool testBytes(const char * data, int64_t length) const override;
+    bool testBytes(const char* data, int64_t length) const override;
     bool testLong(int64_t data) const override;
     bool testDouble(double data) const override;
 
@@ -160,7 +160,7 @@ namespace orc {
 
     bool operator==(const BloomFilterImpl& other) const;
 
-  private:
+   private:
     friend struct BloomFilterUTF8Utils;
     friend class TestBloomFilter_testBloomFilterBasicOperations_Test;
 
@@ -172,7 +172,7 @@ namespace orc {
 
     void serialize(proto::BloomFilter& bloomFilter) const;
 
-  private:
+   private:
     static constexpr double DEFAULT_FPP = 0.05;
     uint64_t mNumBits;
     int32_t mNumHashFunctions;
@@ -186,25 +186,24 @@ namespace orc {
     }
 
     // deserialize BloomFilter from protobuf
-    static std::unique_ptr<BloomFilter>
-    deserialize(const proto::Stream_Kind& streamKind,
-                const proto::ColumnEncoding& columnEncoding,
-                const proto::BloomFilter& bloomFilter);
+    static std::unique_ptr<BloomFilter> deserialize(const proto::Stream_Kind& streamKind,
+                                                    const proto::ColumnEncoding& columnEncoding,
+                                                    const proto::BloomFilter& bloomFilter);
   };
 
   // Thomas Wang's integer hash function
   // http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm
   // Put this in header file so tests can use it as well.
   inline int64_t getLongHash(int64_t key) {
-    key = (~key) + (key << 21); // key = (key << 21) - key - 1;
+    key = (~key) + (key << 21);  // key = (key << 21) - key - 1;
     key = key ^ (key >> 24);
-    key = (key + (key << 3)) + (key << 8); // key * 265
+    key = (key + (key << 3)) + (key << 8);  // key * 265
     key = key ^ (key >> 14);
-    key = (key + (key << 2)) + (key << 4); // key * 21
+    key = (key + (key << 2)) + (key << 4);  // key * 21
     key = key ^ (key >> 28);
     key = key + (key << 31);
     return key;
   }
-}
+}  // namespace orc
 
-#endif //ORC_BLOOMFILTER_IMPL_HH
+#endif  // ORC_BLOOMFILTER_IMPL_HH
diff --git a/contrib/libs/apache/orc/c++/src/Bpacking.hh b/contrib/libs/apache/orc/c++/src/Bpacking.hh
new file mode 100644
index 0000000000..f55e986d8d
--- /dev/null
+++ b/contrib/libs/apache/orc/c++/src/Bpacking.hh
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_BPACKING_HH
+#define ORC_BPACKING_HH
+
+#include <cstdint>
+
+namespace orc {
+  class RleDecoderV2;
+
+  class BitUnpack {
+   public:
+    static void readLongs(RleDecoderV2* decoder, int64_t* data, uint64_t offset, uint64_t len,
+                          uint64_t fbs);
+  };
+}  // namespace orc
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/BpackingDefault.cc b/contrib/libs/apache/orc/c++/src/BpackingDefault.cc
new file mode 100644
index 0000000000..5a80bc6fb1
--- /dev/null
+++ b/contrib/libs/apache/orc/c++/src/BpackingDefault.cc
@@ -0,0 +1,368 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BpackingDefault.hh"
+#include "RLEv2.hh"
+#include "Utils.hh"
+
+namespace orc {
+
+  UnpackDefault::UnpackDefault(RleDecoderV2* dec) : decoder(dec) {
+    // PASS
+  }
+
+  UnpackDefault::~UnpackDefault() {
+    // PASS
+  }
+
+  void UnpackDefault::unrolledUnpack4(int64_t* data, uint64_t offset, uint64_t len) {
+    uint64_t curIdx = offset;
+    while (curIdx < offset + len) {
+      // Make sure bitsLeft is 0 before the loop. bitsLeft can only be 0, 4, or 8.
+      while (decoder->getBitsLeft() > 0 && curIdx < offset + len) {
+        decoder->setBitsLeft(decoder->getBitsLeft() - 4);
+        data[curIdx++] = (decoder->getCurByte() >> decoder->getBitsLeft()) & 15;
+      }
+      if (curIdx == offset + len) return;
+
+      // Exhaust the buffer
+      uint64_t numGroups = (offset + len - curIdx) / 2;
+      numGroups = std::min(numGroups, static_cast<uint64_t>(decoder->bufLength()));
+      // Avoid updating 'bufferStart' inside the loop.
+      auto* buffer = reinterpret_cast<unsigned char*>(decoder->getBufStart());
+      uint32_t localByte;
+      for (uint64_t i = 0; i < numGroups; ++i) {
+        localByte = *buffer++;
+        data[curIdx] = (localByte >> 4) & 15;
+        data[curIdx + 1] = localByte & 15;
+        curIdx += 2;
+      }
+      decoder->setBufStart(reinterpret_cast<char*>(buffer));
+      if (curIdx == offset + len) return;
+
+      // readByte() will update 'bufferStart' and 'bufferEnd'
+      decoder->setCurByte(decoder->readByte());
+      decoder->setBitsLeft(8);
+    }
+  }
+
+  void UnpackDefault::unrolledUnpack8(int64_t* data, uint64_t offset, uint64_t len) {
+    uint64_t curIdx = offset;
+    while (curIdx < offset + len) {
+      // Exhaust the buffer
+      int64_t bufferNum = decoder->bufLength();
+      bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
+      // Avoid updating 'bufferStart' inside the loop.
+      auto* buffer = reinterpret_cast<unsigned char*>(decoder->getBufStart());
+      for (int i = 0; i < bufferNum; ++i) {
+        data[curIdx++] = *buffer++;
+      }
+      decoder->setBufStart(reinterpret_cast<char*>(buffer));
+      if (curIdx == offset + len) return;
+
+      // readByte() will update 'bufferStart' and 'bufferEnd'.
+      data[curIdx++] = decoder->readByte();
+    }
+  }
+
+  void UnpackDefault::unrolledUnpack16(int64_t* data, uint64_t offset, uint64_t len) {
+    uint64_t curIdx = offset;
+    while (curIdx < offset + len) {
+      // Exhaust the buffer
+      int64_t bufferNum = decoder->bufLength() / 2;
+      bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
+      uint16_t b0, b1;
+      // Avoid updating 'bufferStart' inside the loop.
+      auto* buffer = reinterpret_cast<unsigned char*>(decoder->getBufStart());
+      for (int i = 0; i < bufferNum; ++i) {
+        b0 = static_cast<uint16_t>(*buffer);
+        b1 = static_cast<uint16_t>(*(buffer + 1));
+        buffer += 2;
+        data[curIdx++] = (b0 << 8) | b1;
+      }
+      decoder->setBufStart(reinterpret_cast<char*>(buffer));
+      if (curIdx == offset + len) return;
+
+      // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
+      b0 = decoder->readByte();
+      b1 = decoder->readByte();
+      data[curIdx++] = (b0 << 8) | b1;
+    }
+  }
+
+  void UnpackDefault::unrolledUnpack24(int64_t* data, uint64_t offset, uint64_t len) {
+    uint64_t curIdx = offset;
+    while (curIdx < offset + len) {
+      // Exhaust the buffer
+      int64_t bufferNum = decoder->bufLength() / 3;
+      bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
+      uint32_t b0, b1, b2;
+      // Avoid updating 'bufferStart' inside the loop.
+      auto* buffer = reinterpret_cast<unsigned char*>(decoder->getBufStart());
+      for (int i = 0; i < bufferNum; ++i) {
+        b0 = static_cast<uint32_t>(*buffer);
+        b1 = static_cast<uint32_t>(*(buffer + 1));
+        b2 = static_cast<uint32_t>(*(buffer + 2));
+        buffer += 3;
+        data[curIdx++] = static_cast<int64_t>((b0 << 16) | (b1 << 8) | b2);
+      }
+      //////decoder->bufferStart += bufferNum * 3;
+      decoder->setBufStart(reinterpret_cast<char*>(buffer));
+      if (curIdx == offset + len) return;
+
+      // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
+      b0 = decoder->readByte();
+      b1 = decoder->readByte();
+      b2 = decoder->readByte();
+      data[curIdx++] = static_cast<int64_t>((b0 << 16) | (b1 << 8) | b2);
+    }
+  }
+
+  void UnpackDefault::unrolledUnpack32(int64_t* data, uint64_t offset, uint64_t len) {
+    uint64_t curIdx = offset;
+    while (curIdx < offset + len) {
+      // Exhaust the buffer
+      int64_t bufferNum = decoder->bufLength() / 4;
+      bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
+      uint32_t b0, b1, b2, b3;
+      // Avoid updating 'bufferStart' inside the loop.
+      auto* buffer = reinterpret_cast<unsigned char*>(decoder->getBufStart());
+      for (int i = 0; i < bufferNum; ++i) {
+        b0 = static_cast<uint32_t>(*buffer);
+        b1 = static_cast<uint32_t>(*(buffer + 1));
+        b2 = static_cast<uint32_t>(*(buffer + 2));
+        b3 = static_cast<uint32_t>(*(buffer + 3));
+        buffer += 4;
+        data[curIdx++] = static_cast<int64_t>((b0 << 24) | (b1 << 16) | (b2 << 8) | b3);
+      }
+      decoder->setBufStart(reinterpret_cast<char*>(buffer));
+      if (curIdx == offset + len) return;
+
+      // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
+      b0 = decoder->readByte();
+      b1 = decoder->readByte();
+      b2 = decoder->readByte();
+      b3 = decoder->readByte();
+      data[curIdx++] = static_cast<int64_t>((b0 << 24) | (b1 << 16) | (b2 << 8) | b3);
+    }
+  }
+
+  void UnpackDefault::unrolledUnpack40(int64_t* data, uint64_t offset, uint64_t len) {
+    uint64_t curIdx = offset;
+    while (curIdx < offset + len) {
+      // Exhaust the buffer
+      int64_t bufferNum = decoder->bufLength() / 5;
+      bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
+      uint64_t b0, b1, b2, b3, b4;
+      // Avoid updating 'bufferStart' inside the loop.
+      auto* buffer = reinterpret_cast<unsigned char*>(decoder->getBufStart());
+      for (int i = 0; i < bufferNum; ++i) {
+        b0 = static_cast<uint32_t>(*buffer);
+        b1 = static_cast<uint32_t>(*(buffer + 1));
+        b2 = static_cast<uint32_t>(*(buffer + 2));
+        b3 = static_cast<uint32_t>(*(buffer + 3));
+        b4 = static_cast<uint32_t>(*(buffer + 4));
+        buffer += 5;
+        data[curIdx++] =
+            static_cast<int64_t>((b0 << 32) | (b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
+      }
+      decoder->setBufStart(reinterpret_cast<char*>(buffer));
+      if (curIdx == offset + len) return;
+
+      // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
+      b0 = decoder->readByte();
+      b1 = decoder->readByte();
+      b2 = decoder->readByte();
+      b3 = decoder->readByte();
+      b4 = decoder->readByte();
+      data[curIdx++] = static_cast<int64_t>((b0 << 32) | (b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
+    }
+  }
+
+  void UnpackDefault::unrolledUnpack48(int64_t* data, uint64_t offset, uint64_t len) {
+    uint64_t curIdx = offset;
+    while (curIdx < offset + len) {
+      // Exhaust the buffer
+      int64_t bufferNum = decoder->bufLength() / 6;
+      bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
+      uint64_t b0, b1, b2, b3, b4, b5;
+      // Avoid updating 'bufferStart' inside the loop.
+      auto* buffer = reinterpret_cast<unsigned char*>(decoder->getBufStart());
+      for (int i = 0; i < bufferNum; ++i) {
+        b0 = static_cast<uint32_t>(*buffer);
+        b1 = static_cast<uint32_t>(*(buffer + 1));
+        b2 = static_cast<uint32_t>(*(buffer + 2));
+        b3 = static_cast<uint32_t>(*(buffer + 3));
+        b4 = static_cast<uint32_t>(*(buffer + 4));
+        b5 = static_cast<uint32_t>(*(buffer + 5));
+        buffer += 6;
+        data[curIdx++] = static_cast<int64_t>((b0 << 40) | (b1 << 32) | (b2 << 24) | (b3 << 16) |
+                                              (b4 << 8) | b5);
+      }
+      decoder->setBufStart(reinterpret_cast<char*>(buffer));
+      if (curIdx == offset + len) return;
+
+      // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
+      b0 = decoder->readByte();
+      b1 = decoder->readByte();
+      b2 = decoder->readByte();
+      b3 = decoder->readByte();
+      b4 = decoder->readByte();
+      b5 = decoder->readByte();
+      data[curIdx++] =
+          static_cast<int64_t>((b0 << 40) | (b1 << 32) | (b2 << 24) | (b3 << 16) | (b4 << 8) | b5);
+    }
+  }
+
+  void UnpackDefault::unrolledUnpack56(int64_t* data, uint64_t offset, uint64_t len) {
+    uint64_t curIdx = offset;
+    while (curIdx < offset + len) {
+      // Exhaust the buffer
+      int64_t bufferNum = decoder->bufLength() / 7;
+      bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
+      uint64_t b0, b1, b2, b3, b4, b5, b6;
+      // Avoid updating 'bufferStart' inside the loop.
+      auto* buffer = reinterpret_cast<unsigned char*>(decoder->getBufStart());
+      for (int i = 0; i < bufferNum; ++i) {
+        b0 = static_cast<uint32_t>(*buffer);
+        b1 = static_cast<uint32_t>(*(buffer + 1));
+        b2 = static_cast<uint32_t>(*(buffer + 2));
+        b3 = static_cast<uint32_t>(*(buffer + 3));
+        b4 = static_cast<uint32_t>(*(buffer + 4));
+        b5 = static_cast<uint32_t>(*(buffer + 5));
+        b6 = static_cast<uint32_t>(*(buffer + 6));
+        buffer += 7;
+        data[curIdx++] = static_cast<int64_t>((b0 << 48) | (b1 << 40) | (b2 << 32) | (b3 << 24) |
+                                              (b4 << 16) | (b5 << 8) | b6);
+      }
+      decoder->setBufStart(reinterpret_cast<char*>(buffer));
+      if (curIdx == offset + len) return;
+
+      // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
+      b0 = decoder->readByte();
+      b1 = decoder->readByte();
+      b2 = decoder->readByte();
+      b3 = decoder->readByte();
+      b4 = decoder->readByte();
+      b5 = decoder->readByte();
+      b6 = decoder->readByte();
+      data[curIdx++] = static_cast<int64_t>((b0 << 48) | (b1 << 40) | (b2 << 32) | (b3 << 24) |
+                                            (b4 << 16) | (b5 << 8) | b6);
+    }
+  }
+
+  void UnpackDefault::unrolledUnpack64(int64_t* data, uint64_t offset, uint64_t len) {
+    uint64_t curIdx = offset;
+    while (curIdx < offset + len) {
+      // Exhaust the buffer
+      int64_t bufferNum = decoder->bufLength() / 8;
+      bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
+      uint64_t b0, b1, b2, b3, b4, b5, b6, b7;
+      // Avoid updating 'bufferStart' inside the loop.
+      auto* buffer = reinterpret_cast<unsigned char*>(decoder->getBufStart());
+      for (int i = 0; i < bufferNum; ++i) {
+        b0 = static_cast<uint32_t>(*buffer);
+        b1 = static_cast<uint32_t>(*(buffer + 1));
+        b2 = static_cast<uint32_t>(*(buffer + 2));
+        b3 = static_cast<uint32_t>(*(buffer + 3));
+        b4 = static_cast<uint32_t>(*(buffer + 4));
+        b5 = static_cast<uint32_t>(*(buffer + 5));
+        b6 = static_cast<uint32_t>(*(buffer + 6));
+        b7 = static_cast<uint32_t>(*(buffer + 7));
+        buffer += 8;
+        data[curIdx++] = static_cast<int64_t>((b0 << 56) | (b1 << 48) | (b2 << 40) | (b3 << 32) |
+                                              (b4 << 24) | (b5 << 16) | (b6 << 8) | b7);
+      }
+      decoder->setBufStart(reinterpret_cast<char*>(buffer));
+      if (curIdx == offset + len) return;
+
+      // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
+      b0 = decoder->readByte();
+      b1 = decoder->readByte();
+      b2 = decoder->readByte();
+      b3 = decoder->readByte();
+      b4 = decoder->readByte();
+      b5 = decoder->readByte();
+      b6 = decoder->readByte();
+      b7 = decoder->readByte();
+      data[curIdx++] = static_cast<int64_t>((b0 << 56) | (b1 << 48) | (b2 << 40) | (b3 << 32) |
+                                            (b4 << 24) | (b5 << 16) | (b6 << 8) | b7);
+    }
+  }
+
+  void UnpackDefault::plainUnpackLongs(int64_t* data, uint64_t offset, uint64_t len, uint64_t fbs) {
+    for (uint64_t i = offset; i < (offset + len); i++) {
+      uint64_t result = 0;
+      uint64_t bitsLeftToRead = fbs;
+      while (bitsLeftToRead > decoder->getBitsLeft()) {
+        result <<= decoder->getBitsLeft();
+        result |= decoder->getCurByte() & ((1 << decoder->getBitsLeft()) - 1);
+        bitsLeftToRead -= decoder->getBitsLeft();
+        decoder->setCurByte(decoder->readByte());
+        decoder->setBitsLeft(8);
+      }
+
+      // handle the left over bits
+      if (bitsLeftToRead > 0) {
+        result <<= bitsLeftToRead;
+        decoder->setBitsLeft(decoder->getBitsLeft() - static_cast<uint32_t>(bitsLeftToRead));
+        result |= (decoder->getCurByte() >> decoder->getBitsLeft()) & ((1 << bitsLeftToRead) - 1);
+      }
+      data[i] = static_cast<int64_t>(result);
+    }
+  }
+
+  void BitUnpackDefault::readLongs(RleDecoderV2* decoder, int64_t* data, uint64_t offset,
+                                   uint64_t len, uint64_t fbs) {
+    UnpackDefault unpackDefault(decoder);
+    switch (fbs) {
+      case 4:
+        unpackDefault.unrolledUnpack4(data, offset, len);
+        break;
+      case 8:
+        unpackDefault.unrolledUnpack8(data, offset, len);
+        break;
+      case 16:
+        unpackDefault.unrolledUnpack16(data, offset, len);
+        break;
+      case 24:
+        unpackDefault.unrolledUnpack24(data, offset, len);
+        break;
+      case 32:
+        unpackDefault.unrolledUnpack32(data, offset, len);
+        break;
+      case 40:
+        unpackDefault.unrolledUnpack40(data, offset, len);
+        break;
+      case 48:
+        unpackDefault.unrolledUnpack48(data, offset, len);
+        break;
+      case 56:
+        unpackDefault.unrolledUnpack56(data, offset, len);
+        break;
+      case 64:
+        unpackDefault.unrolledUnpack64(data, offset, len);
+        break;
+      default:
+        // Fallback to the default implementation for deprecated bit size.
+        unpackDefault.plainUnpackLongs(data, offset, len, fbs);
+        break;
+    }
+  }
+
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/BpackingDefault.hh b/contrib/libs/apache/orc/c++/src/BpackingDefault.hh
new file mode 100644
index 0000000000..0a58234495
--- /dev/null
+++ b/contrib/libs/apache/orc/c++/src/BpackingDefault.hh
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_BPACKINGDEFAULT_HH
+#define ORC_BPACKINGDEFAULT_HH
+
+#include <cstdint>
+#include <cstdlib>
+
+#include "Bpacking.hh"
+
+namespace orc {
+  class RleDecoderV2;
+
+  class UnpackDefault {
+   public:
+    UnpackDefault(RleDecoderV2* dec);
+    ~UnpackDefault();
+
+    void unrolledUnpack4(int64_t* data, uint64_t offset, uint64_t len);
+    void unrolledUnpack8(int64_t* data, uint64_t offset, uint64_t len);
+    void unrolledUnpack16(int64_t* data, uint64_t offset, uint64_t len);
+    void unrolledUnpack24(int64_t* data, uint64_t offset, uint64_t len);
+    void unrolledUnpack32(int64_t* data, uint64_t offset, uint64_t len);
+    void unrolledUnpack40(int64_t* data, uint64_t offset, uint64_t len);
+    void unrolledUnpack48(int64_t* data, uint64_t offset, uint64_t len);
+    void unrolledUnpack56(int64_t* data, uint64_t offset, uint64_t len);
+    void unrolledUnpack64(int64_t* data, uint64_t offset, uint64_t len);
+
+    void plainUnpackLongs(int64_t* data, uint64_t offset, uint64_t len, uint64_t fbs);
+
+   private:
+    RleDecoderV2* decoder;
+  };
+
+  class BitUnpackDefault : public BitUnpack {
+   public:
+    static void readLongs(RleDecoderV2* decoder, int64_t* data, uint64_t offset, uint64_t len,
+                          uint64_t fbs);
+  };
+
+}  // namespace orc
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/ByteRLE.cc b/contrib/libs/apache/orc/c++/src/ByteRLE.cc
index 1c4a645167..b81d282e35 100644
--- a/contrib/libs/apache/orc/c++/src/ByteRLE.cc
+++ b/contrib/libs/apache/orc/c++/src/ByteRLE.cc
@@ -16,12 +16,13 @@
  * limitations under the License.
  */
 
+#include <string.h>
 #include <algorithm>
 #include <iostream>
-#include <string.h>
 #include <utility>
 
 #include "ByteRLE.hh"
+#include "Utils.hh"
 #include "orc/Exceptions.hh"
 
 namespace orc {
@@ -35,7 +36,7 @@ namespace orc {
   }
 
   class ByteRleEncoderImpl : public ByteRleEncoder {
-  public:
+   public:
     ByteRleEncoderImpl(std::unique_ptr<BufferedOutputStream> output);
     virtual ~ByteRleEncoderImpl() override;
 
@@ -46,8 +47,7 @@ namespace orc {
      * @param notNull If the pointer is null, all values are read. If the
      *    pointer is not null, positions that are false are skipped.
      */
-    virtual void add(const char* data, uint64_t numValues,
-                      const char* notNull) override;
+    virtual void add(const char* data, uint64_t numValues, const char* notNull) override;
 
     /**
      * Get size of buffer used so far.
@@ -68,7 +68,7 @@ namespace orc {
      */
     void reset();
 
-  protected:
+   protected:
     std::unique_ptr<BufferedOutputStream> outputStream;
     char* literals;
     int numLiterals;
@@ -83,22 +83,21 @@ namespace orc {
     void write(char c);
   };
 
-  ByteRleEncoderImpl::ByteRleEncoderImpl(
-                                std::unique_ptr<BufferedOutputStream> output)
-                                  : outputStream(std::move(output)) {
+  ByteRleEncoderImpl::ByteRleEncoderImpl(std::unique_ptr<BufferedOutputStream> output)
+      : outputStream(std::move(output)) {
     literals = new char[MAX_LITERAL_SIZE];
     reset();
   }
 
   ByteRleEncoderImpl::~ByteRleEncoderImpl() {
     // PASS
-    delete [] literals;
+    delete[] literals;
   }
 
   void ByteRleEncoderImpl::writeByte(char c) {
     if (bufferPosition == bufferLength) {
       int addedSize = 0;
-      if (!outputStream->Next(reinterpret_cast<void **>(&buffer), &addedSize)) {
+      if (!outputStream->Next(reinterpret_cast<void**>(&buffer), &addedSize)) {
         throw std::bad_alloc();
       }
       bufferPosition = 0;
@@ -107,10 +106,7 @@ namespace orc {
     buffer[bufferPosition++] = c;
   }
 
-  void ByteRleEncoderImpl::add(
-                               const char* data,
-                               uint64_t numValues,
-                               const char* notNull) {
+  void ByteRleEncoderImpl::add(const char* data, uint64_t numValues, const char* notNull) {
     for (uint64_t i = 0; i < numValues; ++i) {
       if (!notNull || notNull[i]) {
         write(data[i]);
@@ -121,8 +117,7 @@ namespace orc {
   void ByteRleEncoderImpl::writeValues() {
     if (numLiterals != 0) {
       if (repeat) {
-        writeByte(
-            static_cast<char>(numLiterals - static_cast<int>(MINIMUM_REPEAT)));
+        writeByte(static_cast<char>(numLiterals - static_cast<int>(MINIMUM_REPEAT)));
         writeByte(literals[0]);
       } else {
         writeByte(static_cast<char>(-numLiterals));
@@ -189,7 +184,7 @@ namespace orc {
     return outputStream->getSize();
   }
 
-  void ByteRleEncoderImpl::recordPosition(PositionRecorder *recorder) const {
+  void ByteRleEncoderImpl::recordPosition(PositionRecorder* recorder) const {
     uint64_t flushedSize = outputStream->getSize();
     uint64_t unflushedSize = static_cast<uint64_t>(bufferPosition);
     if (outputStream->isCompressed()) {
@@ -220,14 +215,13 @@ namespace orc {
     reset();
   }
 
-  std::unique_ptr<ByteRleEncoder> createByteRleEncoder
-                              (std::unique_ptr<BufferedOutputStream> output) {
-    return std::unique_ptr<ByteRleEncoder>(new ByteRleEncoderImpl
-                                           (std::move(output)));
+  std::unique_ptr<ByteRleEncoder> createByteRleEncoder(
+      std::unique_ptr<BufferedOutputStream> output) {
+    return std::make_unique<ByteRleEncoderImpl>(std::move(output));
   }
 
   class BooleanRleEncoderImpl : public ByteRleEncoderImpl {
-  public:
+   public:
     BooleanRleEncoderImpl(std::unique_ptr<BufferedOutputStream> output);
     virtual ~BooleanRleEncoderImpl() override;
 
@@ -238,8 +232,7 @@ namespace orc {
      * @param notNull If the pointer is null, all values are read. If the
      *    pointer is not null, positions that are false are skipped.
      */
-    virtual void add(const char* data, uint64_t numValues,
-                      const char* notNull) override;
+    virtual void add(const char* data, uint64_t numValues, const char* notNull) override;
 
     /**
      * Flushing underlying BufferedOutputStream
@@ -248,15 +241,15 @@ namespace orc {
 
     virtual void recordPosition(PositionRecorder* recorder) const override;
 
-  private:
+    virtual void suppress() override;
+
+   private:
     int bitsRemained;
     char current;
-
   };
 
-  BooleanRleEncoderImpl::BooleanRleEncoderImpl(
-                        std::unique_ptr<BufferedOutputStream> output)
-                        : ByteRleEncoderImpl(std::move(output)) {
+  BooleanRleEncoderImpl::BooleanRleEncoderImpl(std::unique_ptr<BufferedOutputStream> output)
+      : ByteRleEncoderImpl(std::move(output)) {
     bitsRemained = 8;
     current = static_cast<char>(0);
   }
@@ -265,10 +258,7 @@ namespace orc {
     // PASS
   }
 
-  void BooleanRleEncoderImpl::add(
-                                  const char* data,
-                                  uint64_t numValues,
-                                  const char* notNull) {
+  void BooleanRleEncoderImpl::add(const char* data, uint64_t numValues, const char* notNull) {
     for (uint64_t i = 0; i < numValues; ++i) {
       if (bitsRemained == 0) {
         write(current);
@@ -277,8 +267,7 @@ namespace orc {
       }
       if (!notNull || notNull[i]) {
         if (!data || data[i]) {
-          current =
-            static_cast<char>(current | (0x80 >> (8 - bitsRemained)));
+          current = static_cast<char>(current | (0x80 >> (8 - bitsRemained)));
         }
         --bitsRemained;
       }
@@ -304,43 +293,49 @@ namespace orc {
     recorder->add(static_cast<uint64_t>(8 - bitsRemained));
   }
 
-  std::unique_ptr<ByteRleEncoder> createBooleanRleEncoder
-                                (std::unique_ptr<BufferedOutputStream> output) {
-    BooleanRleEncoderImpl* encoder =
-      new BooleanRleEncoderImpl(std::move(output)) ;
-    return std::unique_ptr<ByteRleEncoder>(
-                                    reinterpret_cast<ByteRleEncoder*>(encoder));
+  void BooleanRleEncoderImpl::suppress() {
+    ByteRleEncoderImpl::suppress();
+    bitsRemained = 8;
+    current = static_cast<char>(0);
+  }
+
+  std::unique_ptr<ByteRleEncoder> createBooleanRleEncoder(
+      std::unique_ptr<BufferedOutputStream> output) {
+    BooleanRleEncoderImpl* encoder = new BooleanRleEncoderImpl(std::move(output));
+    return std::unique_ptr<ByteRleEncoder>(reinterpret_cast<ByteRleEncoder*>(encoder));
   }
 
   ByteRleDecoder::~ByteRleDecoder() {
     // PASS
   }
 
-  class ByteRleDecoderImpl: public ByteRleDecoder {
-  public:
-    ByteRleDecoderImpl(std::unique_ptr<SeekableInputStream> input);
+  class ByteRleDecoderImpl : public ByteRleDecoder {
+   public:
+    ByteRleDecoderImpl(std::unique_ptr<SeekableInputStream> input, ReaderMetrics* metrics);
 
-    virtual ~ByteRleDecoderImpl();
+    ~ByteRleDecoderImpl() override;
 
     /**
      * Seek to a particular spot.
      */
-    virtual void seek(PositionProvider&);
+    virtual void seek(PositionProvider&) override;
 
     /**
      * Seek over a given number of values.
      */
-    virtual void skip(uint64_t numValues);
+    virtual void skip(uint64_t numValues) override;
 
     /**
      * Read a number of values into the batch.
      */
-    virtual void next(char* data, uint64_t numValues, char* notNull);
+    virtual void next(char* data, uint64_t numValues, char* notNull) override;
 
-  protected:
+   protected:
+    void nextInternal(char* data, uint64_t numValues, char* notNull);
     inline void nextBuffer();
     inline signed char readByte();
     inline void readHeader();
+    inline void reset();
 
     std::unique_ptr<SeekableInputStream> inputStream;
     size_t remainingValues;
@@ -348,9 +343,11 @@ namespace orc {
     const char* bufferStart;
     const char* bufferEnd;
     bool repeating;
+    ReaderMetrics* metrics;
   };
 
   void ByteRleDecoderImpl::nextBuffer() {
+    SCOPED_MINUS_STOPWATCH(metrics, ByteDecodingLatencyUs);
     int bufferLength;
     const void* bufferPointer;
     bool result = inputStream->Next(&bufferPointer, &bufferLength);
@@ -365,7 +362,7 @@ namespace orc {
     if (bufferStart == bufferEnd) {
       nextBuffer();
     }
-    return *(bufferStart++);
+    return static_cast<signed char>(*(bufferStart++));
   }
 
   void ByteRleDecoderImpl::readHeader() {
@@ -376,13 +373,11 @@ namespace orc {
     } else {
       remainingValues = static_cast<size_t>(ch) + MINIMUM_REPEAT;
       repeating = true;
-      value = readByte();
+      value = static_cast<char>(readByte());
     }
   }
 
-  ByteRleDecoderImpl::ByteRleDecoderImpl(std::unique_ptr<SeekableInputStream>
-                                         input) {
-    inputStream = std::move(input);
+  void ByteRleDecoderImpl::reset() {
     repeating = false;
     remainingValues = 0;
     value = 0;
@@ -390,6 +385,13 @@ namespace orc {
     bufferEnd = nullptr;
   }
 
+  ByteRleDecoderImpl::ByteRleDecoderImpl(std::unique_ptr<SeekableInputStream> input,
+                                         ReaderMetrics* _metrics)
+      : metrics(_metrics) {
+    inputStream = std::move(input);
+    reset();
+  }
+
   ByteRleDecoderImpl::~ByteRleDecoderImpl() {
     // PASS
   }
@@ -397,15 +399,14 @@ namespace orc {
   void ByteRleDecoderImpl::seek(PositionProvider& location) {
     // move the input stream
     inputStream->seek(location);
-    // force a re-read from the stream
-    bufferEnd = bufferStart;
-    // read a new header
-    readHeader();
+    // reset the decoder status and lazily call readHeader()
+    reset();
     // skip ahead the given number of records
     ByteRleDecoderImpl::skip(location.next());
   }
 
   void ByteRleDecoderImpl::skip(uint64_t numValues) {
+    SCOPED_STOPWATCH(metrics, ByteDecodingLatencyUs, ByteDecodingCall);
     while (numValues > 0) {
       if (remainingValues == 0) {
         readHeader();
@@ -422,8 +423,7 @@ namespace orc {
             nextBuffer();
           }
           size_t skipSize = std::min(static_cast<size_t>(consumedBytes),
-                                     static_cast<size_t>(bufferEnd -
-                                                         bufferStart));
+                                     static_cast<size_t>(bufferEnd - bufferStart));
           bufferStart += skipSize;
           consumedBytes -= skipSize;
         }
@@ -431,8 +431,12 @@ namespace orc {
     }
   }
 
-  void ByteRleDecoderImpl::next(char* data, uint64_t numValues,
-                                char* notNull) {
+  void ByteRleDecoderImpl::next(char* data, uint64_t numValues, char* notNull) {
+    SCOPED_STOPWATCH(metrics, ByteDecodingLatencyUs, ByteDecodingCall);
+    nextInternal(data, numValues, notNull);
+  }
+
+  void ByteRleDecoderImpl::nextInternal(char* data, uint64_t numValues, char* notNull) {
     uint64_t position = 0;
     // skip over null values
     while (notNull && position < numValues && !notNull[position]) {
@@ -444,12 +448,11 @@ namespace orc {
         readHeader();
       }
       // how many do we read out of this block?
-      size_t count = std::min(static_cast<size_t>(numValues - position),
-                              remainingValues);
+      size_t count = std::min(static_cast<size_t>(numValues - position), remainingValues);
       uint64_t consumed = 0;
       if (repeating) {
         if (notNull) {
-          for(uint64_t i=0; i < count; ++i) {
+          for (uint64_t i = 0; i < count; ++i) {
             if (notNull[position + i]) {
               data[position + i] = value;
               consumed += 1;
@@ -461,9 +464,9 @@ namespace orc {
         }
       } else {
         if (notNull) {
-          for(uint64_t i=0; i < count; ++i) {
+          for (uint64_t i = 0; i < count; ++i) {
             if (notNull[position + i]) {
-              data[position + i] = readByte();
+              data[position + i] = static_cast<char>(readByte());
               consumed += 1;
             }
           }
@@ -473,9 +476,8 @@ namespace orc {
             if (bufferStart == bufferEnd) {
               nextBuffer();
             }
-            uint64_t copyBytes =
-              std::min(static_cast<uint64_t>(count - i),
-                       static_cast<uint64_t>(bufferEnd - bufferStart));
+            uint64_t copyBytes = std::min(static_cast<uint64_t>(count - i),
+                                          static_cast<uint64_t>(bufferEnd - bufferStart));
             memcpy(data + position + i, bufferStart, copyBytes);
             bufferStart += copyBytes;
             i += copyBytes;
@@ -492,41 +494,40 @@ namespace orc {
     }
   }
 
-  std::unique_ptr<ByteRleDecoder> createByteRleDecoder
-                                 (std::unique_ptr<SeekableInputStream> input) {
-    return std::unique_ptr<ByteRleDecoder>(new ByteRleDecoderImpl
-                                           (std::move(input)));
+  std::unique_ptr<ByteRleDecoder> createByteRleDecoder(std::unique_ptr<SeekableInputStream> input,
+                                                       ReaderMetrics* metrics) {
+    return std::make_unique<ByteRleDecoderImpl>(std::move(input), metrics);
   }
 
-  class BooleanRleDecoderImpl: public ByteRleDecoderImpl {
-  public:
-    BooleanRleDecoderImpl(std::unique_ptr<SeekableInputStream> input);
+  class BooleanRleDecoderImpl : public ByteRleDecoderImpl {
+   public:
+    BooleanRleDecoderImpl(std::unique_ptr<SeekableInputStream> input, ReaderMetrics* metrics);
 
-    virtual ~BooleanRleDecoderImpl();
+    ~BooleanRleDecoderImpl() override;
 
     /**
      * Seek to a particular spot.
      */
-    virtual void seek(PositionProvider&);
+    virtual void seek(PositionProvider&) override;
 
     /**
      * Seek over a given number of values.
      */
-    virtual void skip(uint64_t numValues);
+    virtual void skip(uint64_t numValues) override;
 
     /**
      * Read a number of values into the batch.
      */
-    virtual void next(char* data, uint64_t numValues, char* notNull);
+    virtual void next(char* data, uint64_t numValues, char* notNull) override;
 
-  protected:
+   protected:
     size_t remainingBits;
     char lastByte;
   };
 
-  BooleanRleDecoderImpl::BooleanRleDecoderImpl
-                                (std::unique_ptr<SeekableInputStream> input
-                                 ): ByteRleDecoderImpl(std::move(input)) {
+  BooleanRleDecoderImpl::BooleanRleDecoderImpl(std::unique_ptr<SeekableInputStream> input,
+                                               ReaderMetrics* _metrics)
+      : ByteRleDecoderImpl(std::move(input), _metrics) {
     remainingBits = 0;
     lastByte = 0;
   }
@@ -564,35 +565,33 @@ namespace orc {
     }
   }
 
-  void BooleanRleDecoderImpl::next(char* data, uint64_t numValues,
-                                   char* notNull) {
+  void BooleanRleDecoderImpl::next(char* data, uint64_t numValues, char* notNull) {
+    SCOPED_STOPWATCH(metrics, ByteDecodingLatencyUs, ByteDecodingCall);
     // next spot to fill in
     uint64_t position = 0;
 
     // use up any remaining bits
     if (notNull) {
-      while(remainingBits > 0 && position < numValues) {
+      while (remainingBits > 0 && position < numValues) {
         if (notNull[position]) {
           remainingBits -= 1;
-          data[position] = (static_cast<unsigned char>(lastByte) >>
-                            remainingBits) & 0x1;
+          data[position] = (static_cast<unsigned char>(lastByte) >> remainingBits) & 0x1;
         } else {
           data[position] = 0;
         }
         position += 1;
       }
     } else {
-      while(remainingBits > 0 && position < numValues) {
+      while (remainingBits > 0 && position < numValues) {
         remainingBits -= 1;
-        data[position++] = (static_cast<unsigned char>(lastByte) >>
-                            remainingBits) & 0x1;
+        data[position++] = (static_cast<unsigned char>(lastByte) >> remainingBits) & 0x1;
       }
     }
 
     // count the number of nonNulls remaining
     uint64_t nonNulls = numValues - position;
     if (notNull) {
-      for(uint64_t i=position; i < numValues; ++i) {
+      for (uint64_t i = position; i < numValues; ++i) {
         if (!notNull[i]) {
           nonNulls -= 1;
         }
@@ -607,14 +606,14 @@ namespace orc {
     } else if (position < numValues) {
       // read the new bytes into the array
       uint64_t bytesRead = (nonNulls + 7) / 8;
-      ByteRleDecoderImpl::next(data + position, bytesRead, nullptr);
+      ByteRleDecoderImpl::nextInternal(data + position, bytesRead, nullptr);
       lastByte = data[position + bytesRead - 1];
       remainingBits = bytesRead * 8 - nonNulls;
       // expand the array backwards so that we don't clobber the data
       uint64_t bitsLeft = bytesRead * 8 - remainingBits;
       if (notNull) {
-        for(int64_t i=static_cast<int64_t>(numValues) - 1;
-            i >= static_cast<int64_t>(position); --i) {
+        for (int64_t i = static_cast<int64_t>(numValues) - 1; i >= static_cast<int64_t>(position);
+             --i) {
           if (notNull[i]) {
             uint64_t shiftPosn = (-bitsLeft) % 8;
             data[i] = (data[position + (bitsLeft - 1) / 8] >> shiftPosn) & 0x1;
@@ -624,8 +623,8 @@ namespace orc {
           }
         }
       } else {
-        for(int64_t i=static_cast<int64_t>(numValues) - 1;
-            i >= static_cast<int64_t>(position); --i, --bitsLeft) {
+        for (int64_t i = static_cast<int64_t>(numValues) - 1; i >= static_cast<int64_t>(position);
+             --i, --bitsLeft) {
           uint64_t shiftPosn = (-bitsLeft) % 8;
           data[i] = (data[position + (bitsLeft - 1) / 8] >> shiftPosn) & 0x1;
         }
@@ -633,11 +632,8 @@ namespace orc {
     }
   }
 
-  std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder
-                                 (std::unique_ptr<SeekableInputStream> input) {
-    BooleanRleDecoderImpl* decoder =
-      new BooleanRleDecoderImpl(std::move(input));
-    return std::unique_ptr<ByteRleDecoder>(
-                                    reinterpret_cast<ByteRleDecoder*>(decoder));
+  std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder(
+      std::unique_ptr<SeekableInputStream> input, ReaderMetrics* metrics) {
+    return std::make_unique<BooleanRleDecoderImpl>(std::move(input), metrics);
   }
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/ByteRLE.hh b/contrib/libs/apache/orc/c++/src/ByteRLE.hh
index 2f6e2eb4df..bd19f52ecc 100644
--- a/contrib/libs/apache/orc/c++/src/ByteRLE.hh
+++ b/contrib/libs/apache/orc/c++/src/ByteRLE.hh
@@ -27,7 +27,7 @@
 namespace orc {
 
   class ByteRleEncoder {
-  public:
+   public:
     virtual ~ByteRleEncoder();
 
     /**
@@ -37,8 +37,7 @@ namespace orc {
      * @param notNull If the pointer is null, all values are read. If the
      *    pointer is not null, positions that are false are skipped.
      */
-    virtual void add(const char* data, uint64_t numValues,
-                      const char* notNull) = 0;
+    virtual void add(const char* data, uint64_t numValues, const char* notNull) = 0;
 
     /**
      * Get size of buffer used so far.
@@ -63,7 +62,7 @@ namespace orc {
   };
 
   class ByteRleDecoder {
-  public:
+   public:
     virtual ~ByteRleDecoder();
 
     /**
@@ -90,22 +89,23 @@ namespace orc {
    * Create a byte RLE encoder.
    * @param output the output stream to write to
    */
-  std::unique_ptr<ByteRleEncoder> createByteRleEncoder
-                                 (std::unique_ptr<BufferedOutputStream> output);
+  std::unique_ptr<ByteRleEncoder> createByteRleEncoder(
+      std::unique_ptr<BufferedOutputStream> output);
 
   /**
    * Create a boolean RLE encoder.
    * @param output the output stream to write to
    */
-  std::unique_ptr<ByteRleEncoder> createBooleanRleEncoder
-                                 (std::unique_ptr<BufferedOutputStream> output);
+  std::unique_ptr<ByteRleEncoder> createBooleanRleEncoder(
+      std::unique_ptr<BufferedOutputStream> output);
 
   /**
    * Create a byte RLE decoder.
    * @param input the input stream to read from
+   * @param metrics the metrics of the decoder
    */
-  std::unique_ptr<ByteRleDecoder> createByteRleDecoder
-                                 (std::unique_ptr<SeekableInputStream> input);
+  std::unique_ptr<ByteRleDecoder> createByteRleDecoder(std::unique_ptr<SeekableInputStream> input,
+                                                       ReaderMetrics* metrics);
 
   /**
    * Create a boolean RLE decoder.
@@ -114,9 +114,10 @@ namespace orc {
    * if the value is masked by notNull. This is required for the notNull stream
    * processing to properly apply multiple masks from nested types.
    * @param input the input stream to read from
+   * @param metrics the metrics of the decoder
    */
-  std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder
-                                 (std::unique_ptr<SeekableInputStream> input);
-}
+  std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder(
+      std::unique_ptr<SeekableInputStream> input, ReaderMetrics* metrics);
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/src/ColumnPrinter.cc b/contrib/libs/apache/orc/c++/src/ColumnPrinter.cc
index ab6b690c57..5297f80371 100644
--- a/contrib/libs/apache/orc/c++/src/ColumnPrinter.cc
+++ b/contrib/libs/apache/orc/c++/src/ColumnPrinter.cc
@@ -21,167 +21,174 @@
 
 #include "Adaptor.hh"
 
+#include <time.h>
 #include <limits>
 #include <sstream>
 #include <stdexcept>
-#include <time.h>
 #include <typeinfo>
 
 #ifdef __clang__
-  #pragma clang diagnostic ignored "-Wformat-security"
+#pragma clang diagnostic ignored "-Wformat-security"
 #endif
 
 namespace orc {
 
-  class VoidColumnPrinter: public ColumnPrinter {
-  public:
+  class VoidColumnPrinter : public ColumnPrinter {
+   public:
     VoidColumnPrinter(std::string&);
     ~VoidColumnPrinter() override {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
   };
 
-  class BooleanColumnPrinter: public ColumnPrinter {
-  private:
+  class BooleanColumnPrinter : public ColumnPrinter {
+   private:
     const int64_t* data;
-  public:
+
+   public:
     BooleanColumnPrinter(std::string&);
     ~BooleanColumnPrinter() override {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
   };
 
-  class LongColumnPrinter: public ColumnPrinter {
-  private:
+  class LongColumnPrinter : public ColumnPrinter {
+   private:
     const int64_t* data;
-  public:
+
+   public:
     LongColumnPrinter(std::string&);
     ~LongColumnPrinter() override {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
   };
 
-  class DoubleColumnPrinter: public ColumnPrinter {
-  private:
+  class DoubleColumnPrinter : public ColumnPrinter {
+   private:
     const double* data;
     const bool isFloat;
 
-  public:
+   public:
     DoubleColumnPrinter(std::string&, const Type& type);
     virtual ~DoubleColumnPrinter() override {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
   };
 
-  class TimestampColumnPrinter: public ColumnPrinter {
-  private:
+  class TimestampColumnPrinter : public ColumnPrinter {
+   private:
     const int64_t* seconds;
     const int64_t* nanoseconds;
 
-  public:
+   public:
     TimestampColumnPrinter(std::string&);
     ~TimestampColumnPrinter() override {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
   };
 
-  class DateColumnPrinter: public ColumnPrinter {
-  private:
+  class DateColumnPrinter : public ColumnPrinter {
+   private:
     const int64_t* data;
 
-  public:
+   public:
     DateColumnPrinter(std::string&);
     ~DateColumnPrinter() override {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
   };
 
-  class Decimal64ColumnPrinter: public ColumnPrinter {
-  private:
+  class Decimal64ColumnPrinter : public ColumnPrinter {
+   private:
     const int64_t* data;
     int32_t scale;
-  public:
+
+   public:
     Decimal64ColumnPrinter(std::string&);
     ~Decimal64ColumnPrinter() override {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
   };
 
-  class Decimal128ColumnPrinter: public ColumnPrinter {
-  private:
+  class Decimal128ColumnPrinter : public ColumnPrinter {
+   private:
     const Int128* data;
     int32_t scale;
-  public:
+
+   public:
     Decimal128ColumnPrinter(std::string&);
     ~Decimal128ColumnPrinter() override {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
   };
 
-  class StringColumnPrinter: public ColumnPrinter {
-  private:
-    const char* const * start;
+  class StringColumnPrinter : public ColumnPrinter {
+   private:
+    const char* const* start;
     const int64_t* length;
-  public:
+
+   public:
     StringColumnPrinter(std::string&);
     virtual ~StringColumnPrinter() override {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
   };
 
-  class BinaryColumnPrinter: public ColumnPrinter {
-  private:
-    const char* const * start;
+  class BinaryColumnPrinter : public ColumnPrinter {
+   private:
+    const char* const* start;
     const int64_t* length;
-  public:
+
+   public:
     BinaryColumnPrinter(std::string&);
     virtual ~BinaryColumnPrinter() override {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
   };
 
-  class ListColumnPrinter: public ColumnPrinter {
-  private:
+  class ListColumnPrinter : public ColumnPrinter {
+   private:
     const int64_t* offsets;
     std::unique_ptr<ColumnPrinter> elementPrinter;
 
-  public:
+   public:
     ListColumnPrinter(std::string&, const Type& type);
     virtual ~ListColumnPrinter() override {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
   };
 
-  class MapColumnPrinter: public ColumnPrinter {
-  private:
+  class MapColumnPrinter : public ColumnPrinter {
+   private:
     const int64_t* offsets;
     std::unique_ptr<ColumnPrinter> keyPrinter;
     std::unique_ptr<ColumnPrinter> elementPrinter;
 
-  public:
+   public:
     MapColumnPrinter(std::string&, const Type& type);
     virtual ~MapColumnPrinter() override {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
   };
 
-  class UnionColumnPrinter: public ColumnPrinter {
-  private:
-    const unsigned char *tags;
+  class UnionColumnPrinter : public ColumnPrinter {
+   private:
+    const unsigned char* tags;
     const uint64_t* offsets;
     std::vector<std::unique_ptr<ColumnPrinter>> fieldPrinter;
 
-  public:
+   public:
     UnionColumnPrinter(std::string&, const Type& type);
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
   };
 
-  class StructColumnPrinter: public ColumnPrinter {
-  private:
+  class StructColumnPrinter : public ColumnPrinter {
+   private:
     std::vector<std::unique_ptr<ColumnPrinter>> fieldPrinter;
     std::vector<std::string> fieldNames;
-  public:
+
+   public:
     StructColumnPrinter(std::string&, const Type& type);
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
@@ -191,13 +198,12 @@ namespace orc {
     file += ch;
   }
 
-  void writeString(std::string& file, const char *ptr) {
+  void writeString(std::string& file, const char* ptr) {
     size_t len = strlen(ptr);
     file.append(ptr, len);
   }
 
-  ColumnPrinter::ColumnPrinter(std::string& _buffer
-                               ): buffer(_buffer) {
+  ColumnPrinter::ColumnPrinter(std::string& _buffer) : buffer(_buffer) {
     notNull = nullptr;
     hasNulls = false;
   }
@@ -211,89 +217,87 @@ namespace orc {
     if (hasNulls) {
       notNull = batch.notNull.data();
     } else {
-      notNull = nullptr ;
+      notNull = nullptr;
     }
   }
 
-  std::unique_ptr<ColumnPrinter> createColumnPrinter(std::string& buffer,
-                                                     const Type* type) {
-    ColumnPrinter *result = nullptr;
+  std::unique_ptr<ColumnPrinter> createColumnPrinter(std::string& buffer, const Type* type) {
+    std::unique_ptr<ColumnPrinter> result;
     if (type == nullptr) {
-      result = new VoidColumnPrinter(buffer);
+      result = std::make_unique<VoidColumnPrinter>(buffer);
     } else {
-      switch(static_cast<int64_t>(type->getKind())) {
-      case BOOLEAN:
-        result = new BooleanColumnPrinter(buffer);
-        break;
-
-      case BYTE:
-      case SHORT:
-      case INT:
-      case LONG:
-        result = new LongColumnPrinter(buffer);
-        break;
-
-      case FLOAT:
-      case DOUBLE:
-        result = new DoubleColumnPrinter(buffer, *type);
-        break;
-
-      case STRING:
-      case VARCHAR :
-      case CHAR:
-        result = new StringColumnPrinter(buffer);
-        break;
-
-      case BINARY:
-        result = new BinaryColumnPrinter(buffer);
-        break;
-
-      case TIMESTAMP:
-      case TIMESTAMP_INSTANT:
-        result = new TimestampColumnPrinter(buffer);
-        break;
-
-      case LIST:
-        result = new ListColumnPrinter(buffer, *type);
-        break;
-
-      case MAP:
-        result = new MapColumnPrinter(buffer, *type);
-        break;
-
-      case STRUCT:
-        result = new StructColumnPrinter(buffer, *type);
-        break;
-
-      case DECIMAL:
-        if (type->getPrecision() == 0 || type->getPrecision() > 18) {
-          result = new Decimal128ColumnPrinter(buffer);
-        } else {
-          result = new Decimal64ColumnPrinter(buffer);
-        }
-        break;
+      switch (static_cast<int64_t>(type->getKind())) {
+        case BOOLEAN:
+          result = std::make_unique<BooleanColumnPrinter>(buffer);
+          break;
+
+        case BYTE:
+        case SHORT:
+        case INT:
+        case LONG:
+          result = std::make_unique<LongColumnPrinter>(buffer);
+          break;
+
+        case FLOAT:
+        case DOUBLE:
+          result = std::make_unique<DoubleColumnPrinter>(buffer, *type);
+          break;
 
-      case DATE:
-        result = new DateColumnPrinter(buffer);
-        break;
+        case STRING:
+        case VARCHAR:
+        case CHAR:
+          result = std::make_unique<StringColumnPrinter>(buffer);
+          break;
 
-      case UNION:
-        result = new UnionColumnPrinter(buffer, *type);
-        break;
+        case BINARY:
+          result = std::make_unique<BinaryColumnPrinter>(buffer);
+          break;
+
+        case TIMESTAMP:
+        case TIMESTAMP_INSTANT:
+          result = std::make_unique<TimestampColumnPrinter>(buffer);
+          break;
+
+        case LIST:
+          result = std::make_unique<ListColumnPrinter>(buffer, *type);
+          break;
+
+        case MAP:
+          result = std::make_unique<MapColumnPrinter>(buffer, *type);
+          break;
+
+        case STRUCT:
+          result = std::make_unique<StructColumnPrinter>(buffer, *type);
+          break;
+
+        case DECIMAL:
+          if (type->getPrecision() == 0 || type->getPrecision() > 18) {
+            result = std::make_unique<Decimal128ColumnPrinter>(buffer);
+          } else {
+            result = std::make_unique<Decimal64ColumnPrinter>(buffer);
+          }
+          break;
+
+        case DATE:
+          result = std::make_unique<DateColumnPrinter>(buffer);
+          break;
+
+        case UNION:
+          result = std::make_unique<UnionColumnPrinter>(buffer, *type);
+          break;
 
-      default:
-        throw std::logic_error("unknown batch type");
+        default:
+          throw std::logic_error("unknown batch type");
       }
     }
-    return std::unique_ptr<ColumnPrinter>(result);
+    return result;
   }
 
-  VoidColumnPrinter::VoidColumnPrinter(std::string& _buffer
-                                       ): ColumnPrinter(_buffer) {
+  VoidColumnPrinter::VoidColumnPrinter(std::string& _buffer) : ColumnPrinter(_buffer) {
     // PASS
   }
 
-  void VoidColumnPrinter::reset(const  ColumnVectorBatch&) {
+  void VoidColumnPrinter::reset(const ColumnVectorBatch&) {
     // PASS
   }
 
@@ -301,13 +305,12 @@ namespace orc {
     writeString(buffer, "null");
   }
 
-  LongColumnPrinter::LongColumnPrinter(std::string& _buffer
-                                       ): ColumnPrinter(_buffer),
-                                          data(nullptr) {
+  LongColumnPrinter::LongColumnPrinter(std::string& _buffer)
+      : ColumnPrinter(_buffer), data(nullptr) {
     // PASS
   }
 
-  void LongColumnPrinter::reset(const  ColumnVectorBatch& batch) {
+  void LongColumnPrinter::reset(const ColumnVectorBatch& batch) {
     ColumnPrinter::reset(batch);
     data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
   }
@@ -316,22 +319,17 @@ namespace orc {
     if (hasNulls && !notNull[rowId]) {
       writeString(buffer, "null");
     } else {
-      char numBuffer[64];
-      snprintf(numBuffer, sizeof(numBuffer), "%" INT64_FORMAT_STRING "d",
-               static_cast<int64_t >(data[rowId]));
-      writeString(buffer, numBuffer);
+      const auto numBuffer = std::to_string(static_cast<int64_t>(data[rowId]));
+      writeString(buffer, numBuffer.c_str());
     }
   }
 
-  DoubleColumnPrinter::DoubleColumnPrinter(std::string& _buffer,
-                                           const Type& type
-                                           ): ColumnPrinter(_buffer),
-                                              data(nullptr),
-                                              isFloat(type.getKind() == FLOAT){
+  DoubleColumnPrinter::DoubleColumnPrinter(std::string& _buffer, const Type& type)
+      : ColumnPrinter(_buffer), data(nullptr), isFloat(type.getKind() == FLOAT) {
     // PASS
   }
 
-  void DoubleColumnPrinter::reset(const  ColumnVectorBatch& batch) {
+  void DoubleColumnPrinter::reset(const ColumnVectorBatch& batch) {
     ColumnPrinter::reset(batch);
     data = dynamic_cast<const DoubleVectorBatch&>(batch).data.data();
   }
@@ -341,20 +339,17 @@ namespace orc {
       writeString(buffer, "null");
     } else {
       char numBuffer[64];
-      snprintf(numBuffer, sizeof(numBuffer), isFloat ? "%.7g" : "%.14g",
-               data[rowId]);
+      snprintf(numBuffer, sizeof(numBuffer), isFloat ? "%.7g" : "%.14g", data[rowId]);
       writeString(buffer, numBuffer);
     }
   }
 
-  Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& _buffer
-                                                 ): ColumnPrinter(_buffer),
-                                                    data(nullptr),
-                                                    scale(0) {
+  Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& _buffer)
+      : ColumnPrinter(_buffer), data(nullptr), scale(0) {
     // PASS
   }
 
-  void Decimal64ColumnPrinter::reset(const  ColumnVectorBatch& batch) {
+  void Decimal64ColumnPrinter::reset(const ColumnVectorBatch& batch) {
     ColumnPrinter::reset(batch);
     data = dynamic_cast<const Decimal64VectorBatch&>(batch).values.data();
     scale = dynamic_cast<const Decimal64VectorBatch&>(batch).scale;
@@ -376,13 +371,12 @@ namespace orc {
     int32_t len = static_cast<int32_t>(str.length());
     if (len > scale) {
       return sign + str.substr(0, static_cast<size_t>(len - scale)) + "." +
-        str.substr(static_cast<size_t>(len - scale),
-                   static_cast<size_t>(scale));
+             str.substr(static_cast<size_t>(len - scale), static_cast<size_t>(scale));
     } else if (len == scale) {
       return sign + "0." + str;
     } else {
       std::string result = sign + "0.";
-      for(int32_t i=0; i < scale - len; ++i) {
+      for (int32_t i = 0; i < scale - len; ++i) {
         result += "0";
       }
       return result + str;
@@ -397,31 +391,27 @@ namespace orc {
     }
   }
 
-  Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& _buffer
-                                                   ): ColumnPrinter(_buffer),
-                                                      data(nullptr),
-                                                      scale(0) {
-     // PASS
-   }
-
-   void Decimal128ColumnPrinter::reset(const  ColumnVectorBatch& batch) {
-     ColumnPrinter::reset(batch);
-     data = dynamic_cast<const Decimal128VectorBatch&>(batch).values.data();
-     scale = dynamic_cast<const Decimal128VectorBatch&>(batch).scale;
-   }
-
-   void Decimal128ColumnPrinter::printRow(uint64_t rowId) {
-     if (hasNulls && !notNull[rowId]) {
-       writeString(buffer, "null");
-     } else {
-       writeString(buffer, data[rowId].toDecimalString(scale).c_str());
-     }
-   }
-
-  StringColumnPrinter::StringColumnPrinter(std::string& _buffer
-                                           ): ColumnPrinter(_buffer),
-                                              start(nullptr),
-                                              length(nullptr) {
+  Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& _buffer)
+      : ColumnPrinter(_buffer), data(nullptr), scale(0) {
+    // PASS
+  }
+
+  void Decimal128ColumnPrinter::reset(const ColumnVectorBatch& batch) {
+    ColumnPrinter::reset(batch);
+    data = dynamic_cast<const Decimal128VectorBatch&>(batch).values.data();
+    scale = dynamic_cast<const Decimal128VectorBatch&>(batch).scale;
+  }
+
+  void Decimal128ColumnPrinter::printRow(uint64_t rowId) {
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      writeString(buffer, data[rowId].toDecimalString(scale).c_str());
+    }
+  }
+
+  StringColumnPrinter::StringColumnPrinter(std::string& _buffer)
+      : ColumnPrinter(_buffer), start(nullptr), length(nullptr) {
     // PASS
   }
 
@@ -436,51 +426,48 @@ namespace orc {
       writeString(buffer, "null");
     } else {
       writeChar(buffer, '"');
-      for(int64_t i=0; i < length[rowId]; ++i) {
+      for (int64_t i = 0; i < length[rowId]; ++i) {
         char ch = static_cast<char>(start[rowId][i]);
         switch (ch) {
-        case '\\':
-          writeString(buffer, "\\\\");
-          break;
-        case '\b':
-          writeString(buffer, "\\b");
-          break;
-        case '\f':
-          writeString(buffer, "\\f");
-          break;
-        case '\n':
-          writeString(buffer, "\\n");
-          break;
-        case '\r':
-          writeString(buffer, "\\r");
-          break;
-        case '\t':
-          writeString(buffer, "\\t");
-          break;
-        case '"':
-          writeString(buffer, "\\\"");
-          break;
-        default:
-          writeChar(buffer, ch);
-          break;
+          case '\\':
+            writeString(buffer, "\\\\");
+            break;
+          case '\b':
+            writeString(buffer, "\\b");
+            break;
+          case '\f':
+            writeString(buffer, "\\f");
+            break;
+          case '\n':
+            writeString(buffer, "\\n");
+            break;
+          case '\r':
+            writeString(buffer, "\\r");
+            break;
+          case '\t':
+            writeString(buffer, "\\t");
+            break;
+          case '"':
+            writeString(buffer, "\\\"");
+            break;
+          default:
+            writeChar(buffer, ch);
+            break;
         }
       }
       writeChar(buffer, '"');
     }
   }
 
-  ListColumnPrinter::ListColumnPrinter(std::string& _buffer,
-                                       const Type& type
-                                       ): ColumnPrinter(_buffer),
-                                          offsets(nullptr) {
+  ListColumnPrinter::ListColumnPrinter(std::string& _buffer, const Type& type)
+      : ColumnPrinter(_buffer), offsets(nullptr) {
     elementPrinter = createColumnPrinter(buffer, type.getSubtype(0));
   }
 
-  void ListColumnPrinter::reset(const  ColumnVectorBatch& batch) {
+  void ListColumnPrinter::reset(const ColumnVectorBatch& batch) {
     ColumnPrinter::reset(batch);
     offsets = dynamic_cast<const ListVectorBatch&>(batch).offsets.data();
-    elementPrinter->reset(*dynamic_cast<const ListVectorBatch&>(batch).
-                          elements);
+    elementPrinter->reset(*dynamic_cast<const ListVectorBatch&>(batch).elements);
   }
 
   void ListColumnPrinter::printRow(uint64_t rowId) {
@@ -488,7 +475,7 @@ namespace orc {
       writeString(buffer, "null");
     } else {
       writeChar(buffer, '[');
-      for(int64_t i=offsets[rowId]; i < offsets[rowId+1]; ++i) {
+      for (int64_t i = offsets[rowId]; i < offsets[rowId + 1]; ++i) {
         if (i != offsets[rowId]) {
           writeString(buffer, ", ");
         }
@@ -498,15 +485,13 @@ namespace orc {
     }
   }
 
-  MapColumnPrinter::MapColumnPrinter(std::string& _buffer,
-                                     const Type& type
-                                     ): ColumnPrinter(_buffer),
-                                        offsets(nullptr) {
+  MapColumnPrinter::MapColumnPrinter(std::string& _buffer, const Type& type)
+      : ColumnPrinter(_buffer), offsets(nullptr) {
     keyPrinter = createColumnPrinter(buffer, type.getSubtype(0));
     elementPrinter = createColumnPrinter(buffer, type.getSubtype(1));
   }
 
-  void MapColumnPrinter::reset(const  ColumnVectorBatch& batch) {
+  void MapColumnPrinter::reset(const ColumnVectorBatch& batch) {
     ColumnPrinter::reset(batch);
     const MapVectorBatch& myBatch = dynamic_cast<const MapVectorBatch&>(batch);
     offsets = myBatch.offsets.data();
@@ -519,7 +504,7 @@ namespace orc {
       writeString(buffer, "null");
     } else {
       writeChar(buffer, '[');
-      for(int64_t i=offsets[rowId]; i < offsets[rowId+1]; ++i) {
+      for (int64_t i = offsets[rowId]; i < offsets[rowId + 1]; ++i) {
         if (i != offsets[rowId]) {
           writeString(buffer, ", ");
         }
@@ -533,23 +518,19 @@ namespace orc {
     }
   }
 
-  UnionColumnPrinter::UnionColumnPrinter(std::string& _buffer,
-                                           const Type& type
-                                         ): ColumnPrinter(_buffer),
-                                            tags(nullptr),
-                                            offsets(nullptr) {
-    for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
+  UnionColumnPrinter::UnionColumnPrinter(std::string& _buffer, const Type& type)
+      : ColumnPrinter(_buffer), tags(nullptr), offsets(nullptr) {
+    for (unsigned int i = 0; i < type.getSubtypeCount(); ++i) {
       fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i)));
     }
   }
 
   void UnionColumnPrinter::reset(const ColumnVectorBatch& batch) {
     ColumnPrinter::reset(batch);
-    const UnionVectorBatch& unionBatch =
-      dynamic_cast<const UnionVectorBatch&>(batch);
+    const UnionVectorBatch& unionBatch = dynamic_cast<const UnionVectorBatch&>(batch);
     tags = unionBatch.tags.data();
     offsets = unionBatch.offsets.data();
-    for(size_t i=0; i < fieldPrinter.size(); ++i) {
+    for (size_t i = 0; i < fieldPrinter.size(); ++i) {
       fieldPrinter[i]->reset(*(unionBatch.children[i]));
     }
   }
@@ -559,20 +540,17 @@ namespace orc {
       writeString(buffer, "null");
     } else {
       writeString(buffer, "{\"tag\": ");
-      char numBuffer[64];
-      snprintf(numBuffer, sizeof(numBuffer), "%" INT64_FORMAT_STRING "d",
-               static_cast<int64_t>(tags[rowId]));
-      writeString(buffer, numBuffer);
+      const auto numBuffer = std::to_string(static_cast<int64_t>(tags[rowId]));
+      writeString(buffer, numBuffer.c_str());
       writeString(buffer, ", \"value\": ");
       fieldPrinter[tags[rowId]]->printRow(offsets[rowId]);
       writeChar(buffer, '}');
     }
   }
 
-  StructColumnPrinter::StructColumnPrinter(std::string& _buffer,
-                                           const Type& type
-                                           ): ColumnPrinter(_buffer) {
-    for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
+  StructColumnPrinter::StructColumnPrinter(std::string& _buffer, const Type& type)
+      : ColumnPrinter(_buffer) {
+    for (unsigned int i = 0; i < type.getSubtypeCount(); ++i) {
       fieldNames.push_back(type.getFieldName(i));
       fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i)));
     }
@@ -580,9 +558,8 @@ namespace orc {
 
   void StructColumnPrinter::reset(const ColumnVectorBatch& batch) {
     ColumnPrinter::reset(batch);
-    const StructVectorBatch& structBatch =
-      dynamic_cast<const StructVectorBatch&>(batch);
-    for(size_t i=0; i < fieldPrinter.size(); ++i) {
+    const StructVectorBatch& structBatch = dynamic_cast<const StructVectorBatch&>(batch);
+    for (size_t i = 0; i < fieldPrinter.size(); ++i) {
       fieldPrinter[i]->reset(*(structBatch.fields[i]));
     }
   }
@@ -592,7 +569,7 @@ namespace orc {
       writeString(buffer, "null");
     } else {
       writeChar(buffer, '{');
-      for(unsigned int i=0; i < fieldPrinter.size(); ++i) {
+      for (unsigned int i = 0; i < fieldPrinter.size(); ++i) {
         if (i != 0) {
           writeString(buffer, ", ");
         }
@@ -605,9 +582,8 @@ namespace orc {
     }
   }
 
-  DateColumnPrinter::DateColumnPrinter(std::string& _buffer
-                                       ): ColumnPrinter(_buffer),
-                                          data(nullptr) {
+  DateColumnPrinter::DateColumnPrinter(std::string& _buffer)
+      : ColumnPrinter(_buffer), data(nullptr) {
     // PASS
   }
 
@@ -631,9 +607,8 @@ namespace orc {
     data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
   }
 
-  BooleanColumnPrinter::BooleanColumnPrinter(std::string& _buffer
-                                             ): ColumnPrinter(_buffer),
-                                                data(nullptr) {
+  BooleanColumnPrinter::BooleanColumnPrinter(std::string& _buffer)
+      : ColumnPrinter(_buffer), data(nullptr) {
     // PASS
   }
 
@@ -650,10 +625,8 @@ namespace orc {
     data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
   }
 
-  BinaryColumnPrinter::BinaryColumnPrinter(std::string& _buffer
-                                           ): ColumnPrinter(_buffer),
-                                              start(nullptr),
-                                              length(nullptr) {
+  BinaryColumnPrinter::BinaryColumnPrinter(std::string& _buffer)
+      : ColumnPrinter(_buffer), start(nullptr), length(nullptr) {
     // PASS
   }
 
@@ -662,14 +635,12 @@ namespace orc {
       writeString(buffer, "null");
     } else {
       writeChar(buffer, '[');
-      for(int64_t i=0; i < length[rowId]; ++i) {
+      for (int64_t i = 0; i < length[rowId]; ++i) {
         if (i != 0) {
           writeString(buffer, ", ");
         }
-        char numBuffer[64];
-        snprintf(numBuffer, sizeof(numBuffer), "%d",
-                 (static_cast<const int>(start[rowId][i]) & 0xff));
-        writeString(buffer, numBuffer);
+        const auto numBuffer = std::to_string(static_cast<int>(start[rowId][i]) & 0xff);
+        writeString(buffer, numBuffer.c_str());
       }
       writeChar(buffer, ']');
     }
@@ -681,10 +652,8 @@ namespace orc {
     length = dynamic_cast<const StringVectorBatch&>(batch).length.data();
   }
 
-  TimestampColumnPrinter::TimestampColumnPrinter(std::string& _buffer
-                                                 ): ColumnPrinter(_buffer),
-                                                    seconds(nullptr),
-                                                    nanoseconds(nullptr) {
+  TimestampColumnPrinter::TimestampColumnPrinter(std::string& _buffer)
+      : ColumnPrinter(_buffer), seconds(nullptr), nanoseconds(nullptr) {
     // PASS
   }
 
@@ -712,20 +681,20 @@ namespace orc {
           zeroDigits += 1;
         }
       }
-      char numBuffer[64];
-      snprintf(numBuffer, sizeof(numBuffer),
-               "%0*" INT64_FORMAT_STRING "d\"",
-               static_cast<int>(NANO_DIGITS - zeroDigits),
-               static_cast<int64_t >(nanos));
-      writeString(buffer, numBuffer);
+      const auto numBuffer = std::to_string(static_cast<int64_t>(nanos));
+      const int64_t padDigits = NANO_DIGITS - zeroDigits - static_cast<int64_t>(numBuffer.size());
+      for (int i = 0; i < padDigits; ++i) {
+        writeChar(buffer, '0');
+      }
+      writeString(buffer, numBuffer.c_str());
+      writeChar(buffer, '"');
     }
   }
 
   void TimestampColumnPrinter::reset(const ColumnVectorBatch& batch) {
     ColumnPrinter::reset(batch);
-    const TimestampVectorBatch& ts =
-      dynamic_cast<const TimestampVectorBatch&>(batch);
+    const TimestampVectorBatch& ts = dynamic_cast<const TimestampVectorBatch&>(batch);
     seconds = ts.data.data();
     nanoseconds = ts.nanoseconds.data();
   }
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/ColumnReader.cc b/contrib/libs/apache/orc/c++/src/ColumnReader.cc
index 873b54c618..a6bbdabedc 100644
--- a/contrib/libs/apache/orc/c++/src/ColumnReader.cc
+++ b/contrib/libs/apache/orc/c++/src/ColumnReader.cc
@@ -21,8 +21,10 @@
 #include "Adaptor.hh"
 #include "ByteRLE.hh"
 #include "ColumnReader.hh"
-#include "orc/Exceptions.hh"
+#include "ConvertColumnReader.hh"
 #include "RLE.hh"
+#include "SchemaEvolution.hh"
+#include "orc/Exceptions.hh"
 
 #include <math.h>
 #include <iostream>
@@ -35,25 +37,25 @@ namespace orc {
 
   inline RleVersion convertRleVersion(proto::ColumnEncoding_Kind kind) {
     switch (static_cast<int64_t>(kind)) {
-    case proto::ColumnEncoding_Kind_DIRECT:
-    case proto::ColumnEncoding_Kind_DICTIONARY:
-      return RleVersion_1;
-    case proto::ColumnEncoding_Kind_DIRECT_V2:
-    case proto::ColumnEncoding_Kind_DICTIONARY_V2:
-      return RleVersion_2;
-    default:
-      throw ParseError("Unknown encoding in convertRleVersion");
+      case proto::ColumnEncoding_Kind_DIRECT:
+      case proto::ColumnEncoding_Kind_DICTIONARY:
+        return RleVersion_1;
+      case proto::ColumnEncoding_Kind_DIRECT_V2:
+      case proto::ColumnEncoding_Kind_DICTIONARY_V2:
+        return RleVersion_2;
+      default:
+        throw ParseError("Unknown encoding in convertRleVersion");
     }
   }
 
-  ColumnReader::ColumnReader(const Type& type,
-                             StripeStreams& stripe
-                             ): columnId(type.getColumnId()),
-                                memoryPool(stripe.getMemoryPool()) {
+  ColumnReader::ColumnReader(const Type& type, StripeStreams& stripe)
+      : columnId(type.getColumnId()),
+        memoryPool(stripe.getMemoryPool()),
+        metrics(stripe.getReaderMetrics()) {
     std::unique_ptr<SeekableInputStream> stream =
-      stripe.getStream(columnId, proto::Stream_Kind_PRESENT, true);
+        stripe.getStream(columnId, proto::Stream_Kind_PRESENT, true);
     if (stream.get()) {
-      notNullDecoder = createBooleanRleDecoder(std::move(stream));
+      notNullDecoder = createBooleanRleDecoder(std::move(stream), metrics);
     }
   }
 
@@ -67,17 +69,14 @@ namespace orc {
       // page through the values that we want to skip
       // and count how many are non-null
       const size_t MAX_BUFFER_SIZE = 32768;
-      size_t bufferSize = std::min(MAX_BUFFER_SIZE,
-                                   static_cast<size_t>(numValues));
+      size_t bufferSize = std::min(MAX_BUFFER_SIZE, static_cast<size_t>(numValues));
       char buffer[MAX_BUFFER_SIZE];
       uint64_t remaining = numValues;
       while (remaining > 0) {
-        uint64_t chunkSize =
-          std::min(remaining,
-                   static_cast<uint64_t>(bufferSize));
+        uint64_t chunkSize = std::min(remaining, static_cast<uint64_t>(bufferSize));
         decoder->next(buffer, chunkSize, nullptr);
         remaining -= chunkSize;
-        for(uint64_t i=0; i < chunkSize; ++i) {
+        for (uint64_t i = 0; i < chunkSize; ++i) {
           if (!buffer[i]) {
             numValues -= 1;
           }
@@ -87,9 +86,7 @@ namespace orc {
     return numValues;
   }
 
-  void ColumnReader::next(ColumnVectorBatch& rowBatch,
-                          uint64_t numValues,
-                          char* incomingMask) {
+  void ColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* incomingMask) {
     if (numValues > rowBatch.capacity) {
       rowBatch.resize(numValues);
     }
@@ -99,7 +96,7 @@ namespace orc {
       char* notNullArray = rowBatch.notNull.data();
       decoder->next(notNullArray, numValues, incomingMask);
       // check to see if there are nulls in this batch
-      for(uint64_t i=0; i < numValues; ++i) {
+      for (uint64_t i = 0; i < numValues; ++i) {
         if (!notNullArray[i]) {
           rowBatch.hasNulls = true;
           return;
@@ -114,240 +111,195 @@ namespace orc {
     rowBatch.hasNulls = false;
   }
 
-  void ColumnReader::seekToRowGroup(
-    std::unordered_map<uint64_t, PositionProvider>& positions) {
+  void ColumnReader::seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) {
     if (notNullDecoder.get()) {
       notNullDecoder->seek(positions.at(columnId));
     }
   }
 
   /**
-   * Expand an array of bytes in place to the corresponding array of longs.
+   * Expand an array of bytes in place to the corresponding array of integer.
    * Has to work backwards so that they data isn't clobbered during the
    * expansion.
    * @param buffer the array of chars and array of longs that need to be
    *        expanded
    * @param numValues the number of bytes to convert to longs
    */
-  void expandBytesToLongs(int64_t* buffer, uint64_t numValues) {
-    for(size_t i=numValues - 1; i < numValues; --i) {
-      buffer[i] = reinterpret_cast<char *>(buffer)[i];
+  template <typename T>
+  void expandBytesToIntegers(T* buffer, uint64_t numValues) {
+    if (sizeof(T) == sizeof(int8_t)) {
+      return;
+    }
+    for (uint64_t i = 0UL; i < numValues; ++i) {
+      buffer[numValues - 1 - i] = reinterpret_cast<int8_t*>(buffer)[numValues - 1 - i];
     }
   }
 
-  class BooleanColumnReader: public ColumnReader {
-  private:
+  template <typename BatchType>
+  class BooleanColumnReader : public ColumnReader {
+   private:
     std::unique_ptr<orc::ByteRleDecoder> rle;
 
-  public:
+   public:
     BooleanColumnReader(const Type& type, StripeStreams& stipe);
     ~BooleanColumnReader() override;
 
     uint64_t skip(uint64_t numValues) override;
 
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char* notNull) override;
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void seekToRowGroup(
-      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+    void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) override;
   };
 
-  BooleanColumnReader::BooleanColumnReader(const Type& type,
-                                           StripeStreams& stripe
-                                           ): ColumnReader(type, stripe){
+  template <typename BatchType>
+  BooleanColumnReader<BatchType>::BooleanColumnReader(const Type& type, StripeStreams& stripe)
+      : ColumnReader(type, stripe) {
     std::unique_ptr<SeekableInputStream> stream =
         stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
-    if (stream == nullptr)
-      throw ParseError("DATA stream not found in Boolean column");
-    rle = createBooleanRleDecoder(std::move(stream));
+    if (stream == nullptr) throw ParseError("DATA stream not found in Boolean column");
+    rle = createBooleanRleDecoder(std::move(stream), metrics);
   }
 
-  BooleanColumnReader::~BooleanColumnReader() {
+  template <typename BatchType>
+  BooleanColumnReader<BatchType>::~BooleanColumnReader() {
     // PASS
   }
 
-  uint64_t BooleanColumnReader::skip(uint64_t numValues) {
+  template <typename BatchType>
+  uint64_t BooleanColumnReader<BatchType>::skip(uint64_t numValues) {
     numValues = ColumnReader::skip(numValues);
     rle->skip(numValues);
     return numValues;
   }
 
-  void BooleanColumnReader::next(ColumnVectorBatch& rowBatch,
-                                 uint64_t numValues,
-                                 char *notNull) {
+  template <typename BatchType>
+  void BooleanColumnReader<BatchType>::next(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                            char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
-    // Since the byte rle places the output in a char* instead of long*,
-    // we cheat here and use the long* and then expand it in a second pass.
-    int64_t *ptr = dynamic_cast<LongVectorBatch&>(rowBatch).data.data();
-    rle->next(reinterpret_cast<char*>(ptr),
-              numValues, rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr);
-    expandBytesToLongs(ptr, numValues);
+    // Since the byte rle places the output in a char* and BatchType here may be
+    // LongVectorBatch with long*. We cheat here in that case and use the long*
+    // and then expand it in a second pass..
+    auto* ptr = dynamic_cast<BatchType&>(rowBatch).data.data();
+    rle->next(reinterpret_cast<char*>(ptr), numValues,
+              rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr);
+    expandBytesToIntegers(ptr, numValues);
   }
 
-  void BooleanColumnReader::seekToRowGroup(
-    std::unordered_map<uint64_t, PositionProvider>& positions) {
+  template <typename BatchType>
+  void BooleanColumnReader<BatchType>::seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) {
     ColumnReader::seekToRowGroup(positions);
     rle->seek(positions.at(columnId));
   }
 
-  class ByteColumnReader: public ColumnReader {
-  private:
+  template <typename BatchType>
+  class ByteColumnReader : public ColumnReader {
+   private:
     std::unique_ptr<orc::ByteRleDecoder> rle;
 
-  public:
-    ByteColumnReader(const Type& type, StripeStreams& stipe);
-    ~ByteColumnReader() override;
-
-    uint64_t skip(uint64_t numValues) override;
-
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char* notNull) override;
-
-    void seekToRowGroup(
-      std::unordered_map<uint64_t, PositionProvider>& positions) override;
-  };
-
-  ByteColumnReader::ByteColumnReader(const Type& type,
-                                           StripeStreams& stripe
-                                           ): ColumnReader(type, stripe){
-    std::unique_ptr<SeekableInputStream> stream =
-        stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
-    if (stream == nullptr)
-      throw ParseError("DATA stream not found in Byte column");
-    rle = createByteRleDecoder(std::move(stream));
-  }
+   public:
+    ByteColumnReader(const Type& type, StripeStreams& stripe) : ColumnReader(type, stripe) {
+      std::unique_ptr<SeekableInputStream> stream =
+          stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+      if (stream == nullptr) throw ParseError("DATA stream not found in Byte column");
+      rle = createByteRleDecoder(std::move(stream), metrics);
+    }
 
-  ByteColumnReader::~ByteColumnReader() {
-    // PASS
-  }
+    ~ByteColumnReader() override = default;
 
-  uint64_t ByteColumnReader::skip(uint64_t numValues) {
-    numValues = ColumnReader::skip(numValues);
-    rle->skip(numValues);
-    return numValues;
-  }
+    uint64_t skip(uint64_t numValues) override {
+      numValues = ColumnReader::skip(numValues);
+      rle->skip(numValues);
+      return numValues;
+    }
 
-  void ByteColumnReader::next(ColumnVectorBatch& rowBatch,
-                              uint64_t numValues,
-                              char *notNull) {
-    ColumnReader::next(rowBatch, numValues, notNull);
-    // Since the byte rle places the output in a char* instead of long*,
-    // we cheat here and use the long* and then expand it in a second pass.
-    int64_t *ptr = dynamic_cast<LongVectorBatch&>(rowBatch).data.data();
-    rle->next(reinterpret_cast<char*>(ptr),
-              numValues, rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr);
-    expandBytesToLongs(ptr, numValues);
-  }
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override {
+      ColumnReader::next(rowBatch, numValues, notNull);
+      // Since the byte rle places the output in a char* instead of long*,
+      // we cheat here and use the long* and then expand it in a second pass.
+      auto* ptr = dynamic_cast<BatchType&>(rowBatch).data.data();
+      rle->next(reinterpret_cast<char*>(ptr), numValues,
+                rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr);
+      expandBytesToIntegers(ptr, numValues);
+    }
 
-  void ByteColumnReader::seekToRowGroup(
-    std::unordered_map<uint64_t, PositionProvider>& positions) {
-    ColumnReader::seekToRowGroup(positions);
-    rle->seek(positions.at(columnId));
-  }
+    void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) override {
+      ColumnReader::seekToRowGroup(positions);
+      rle->seek(positions.at(columnId));
+    }
+  };
 
-  class IntegerColumnReader: public ColumnReader {
-  protected:
+  template <typename BatchType>
+  class IntegerColumnReader : public ColumnReader {
+   protected:
     std::unique_ptr<orc::RleDecoder> rle;
 
-  public:
-    IntegerColumnReader(const Type& type, StripeStreams& stripe);
-    ~IntegerColumnReader() override;
-
-    uint64_t skip(uint64_t numValues) override;
-
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char* notNull) override;
-
-    void seekToRowGroup(
-      std::unordered_map<uint64_t, PositionProvider>& positions) override;
-  };
-
-  IntegerColumnReader::IntegerColumnReader(const Type& type,
-                                           StripeStreams& stripe
-                                           ): ColumnReader(type, stripe) {
-    RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
-    std::unique_ptr<SeekableInputStream> stream =
-        stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
-    if (stream == nullptr)
-      throw ParseError("DATA stream not found in Integer column");
-    rle = createRleDecoder(std::move(stream), true, vers, memoryPool);
-  }
+   public:
+    IntegerColumnReader(const Type& type, StripeStreams& stripe) : ColumnReader(type, stripe) {
+      RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
+      std::unique_ptr<SeekableInputStream> stream =
+          stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+      if (stream == nullptr) throw ParseError("DATA stream not found in Integer column");
+      rle = createRleDecoder(std::move(stream), true, vers, memoryPool, metrics);
+    }
 
-  IntegerColumnReader::~IntegerColumnReader() {
-    // PASS
-  }
+    ~IntegerColumnReader() override {
+      // PASS
+    }
 
-  uint64_t IntegerColumnReader::skip(uint64_t numValues) {
-    numValues = ColumnReader::skip(numValues);
-    rle->skip(numValues);
-    return numValues;
-  }
+    uint64_t skip(uint64_t numValues) override {
+      numValues = ColumnReader::skip(numValues);
+      rle->skip(numValues);
+      return numValues;
+    }
 
-  void IntegerColumnReader::next(ColumnVectorBatch& rowBatch,
-                                 uint64_t numValues,
-                                 char *notNull) {
-    ColumnReader::next(rowBatch, numValues, notNull);
-    rle->next(dynamic_cast<LongVectorBatch&>(rowBatch).data.data(),
-              numValues, rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr);
-  }
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override {
+      ColumnReader::next(rowBatch, numValues, notNull);
+      rle->next(dynamic_cast<BatchType&>(rowBatch).data.data(), numValues,
+                rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr);
+    }
 
-  void IntegerColumnReader::seekToRowGroup(
-    std::unordered_map<uint64_t, PositionProvider>& positions) {
-    ColumnReader::seekToRowGroup(positions);
-    rle->seek(positions.at(columnId));
-  }
+    void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) override {
+      ColumnReader::seekToRowGroup(positions);
+      rle->seek(positions.at(columnId));
+    }
+  };
 
-  class TimestampColumnReader: public ColumnReader {
-  private:
+  class TimestampColumnReader : public ColumnReader {
+   private:
     std::unique_ptr<orc::RleDecoder> secondsRle;
     std::unique_ptr<orc::RleDecoder> nanoRle;
-    const Timezone& writerTimezone;
-    const Timezone& readerTimezone;
+    const Timezone* writerTimezone;
+    const Timezone* readerTimezone;
     const int64_t epochOffset;
     const bool sameTimezone;
 
-  public:
-    TimestampColumnReader(const Type& type,
-                          StripeStreams& stripe,
-                          bool isInstantType);
+   public:
+    TimestampColumnReader(const Type& type, StripeStreams& stripe, bool isInstantType);
     ~TimestampColumnReader() override;
 
     uint64_t skip(uint64_t numValues) override;
 
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char* notNull) override;
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void seekToRowGroup(
-      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+    void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) override;
   };
 
-
-  TimestampColumnReader::TimestampColumnReader(const Type& type,
-                                               StripeStreams& stripe,
-                                               bool isInstantType
-                               ): ColumnReader(type, stripe),
-                                  writerTimezone(isInstantType ?
-                                                 getTimezoneByName("GMT") :
-                                                 stripe.getWriterTimezone()),
-                                  readerTimezone(isInstantType ?
-                                                 getTimezoneByName("GMT") :
-                                                 stripe.getReaderTimezone()),
-                                  epochOffset(writerTimezone.getEpoch()),
-                                  sameTimezone(&writerTimezone == &readerTimezone){
+  TimestampColumnReader::TimestampColumnReader(const Type& type, StripeStreams& stripe,
+                                               bool isInstantType)
+      : ColumnReader(type, stripe),
+        writerTimezone(isInstantType ? &getTimezoneByName("GMT") : &stripe.getWriterTimezone()),
+        readerTimezone(isInstantType ? &getTimezoneByName("GMT") : &stripe.getReaderTimezone()),
+        epochOffset(writerTimezone->getEpoch()),
+        sameTimezone(writerTimezone == readerTimezone) {
     RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
     std::unique_ptr<SeekableInputStream> stream =
         stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
-    if (stream == nullptr)
-      throw ParseError("DATA stream not found in Timestamp column");
-    secondsRle = createRleDecoder(std::move(stream), true, vers, memoryPool);
+    if (stream == nullptr) throw ParseError("DATA stream not found in Timestamp column");
+    secondsRle = createRleDecoder(std::move(stream), true, vers, memoryPool, metrics);
     stream = stripe.getStream(columnId, proto::Stream_Kind_SECONDARY, true);
-    if (stream == nullptr)
-      throw ParseError("SECONDARY stream not found in Timestamp column");
-    nanoRle = createRleDecoder(std::move(stream), false, vers, memoryPool);
+    if (stream == nullptr) throw ParseError("SECONDARY stream not found in Timestamp column");
+    nanoRle = createRleDecoder(std::move(stream), false, vers, memoryPool, metrics);
   }
 
   TimestampColumnReader::~TimestampColumnReader() {
@@ -361,25 +313,22 @@ namespace orc {
     return numValues;
   }
 
-  void TimestampColumnReader::next(ColumnVectorBatch& rowBatch,
-                                   uint64_t numValues,
-                                   char *notNull) {
+  void TimestampColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
     notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
-    TimestampVectorBatch& timestampBatch =
-      dynamic_cast<TimestampVectorBatch&>(rowBatch);
-    int64_t *secsBuffer = timestampBatch.data.data();
+    TimestampVectorBatch& timestampBatch = dynamic_cast<TimestampVectorBatch&>(rowBatch);
+    int64_t* secsBuffer = timestampBatch.data.data();
     secondsRle->next(secsBuffer, numValues, notNull);
-    int64_t *nanoBuffer = timestampBatch.nanoseconds.data();
+    int64_t* nanoBuffer = timestampBatch.nanoseconds.data();
     nanoRle->next(nanoBuffer, numValues, notNull);
 
     // Construct the values
-    for(uint64_t i=0; i < numValues; i++) {
+    for (uint64_t i = 0; i < numValues; i++) {
       if (notNull == nullptr || notNull[i]) {
         uint64_t zeros = nanoBuffer[i] & 0x7;
         nanoBuffer[i] >>= 3;
         if (zeros != 0) {
-          for(uint64_t j = 0; j <= zeros; ++j) {
+          for (uint64_t j = 0; j <= zeros; ++j) {
             nanoBuffer[i] *= 10;
           }
         }
@@ -387,13 +336,13 @@ namespace orc {
         if (!sameTimezone) {
           // adjust timestamp value to same wall clock time if writer and reader
           // time zones have different rules, which is required for Apache Orc.
-          const auto& wv = writerTimezone.getVariant(writerTime);
-          const auto& rv = readerTimezone.getVariant(writerTime);
+          const auto& wv = writerTimezone->getVariant(writerTime);
+          const auto& rv = readerTimezone->getVariant(writerTime);
           if (!wv.hasSameTzRule(rv)) {
             // If the timezone adjustment moves the millis across a DST boundary,
             // we need to reevaluate the offsets.
             int64_t adjustedTime = writerTime + wv.gmtOffset - rv.gmtOffset;
-            const auto& adjustedReader = readerTimezone.getVariant(adjustedTime);
+            const auto& adjustedReader = readerTimezone->getVariant(adjustedTime);
             writerTime = writerTime + wv.gmtOffset - adjustedReader.gmtOffset;
           }
         }
@@ -406,38 +355,34 @@ namespace orc {
   }
 
   void TimestampColumnReader::seekToRowGroup(
-    std::unordered_map<uint64_t, PositionProvider>& positions) {
+      std::unordered_map<uint64_t, PositionProvider>& positions) {
     ColumnReader::seekToRowGroup(positions);
     secondsRle->seek(positions.at(columnId));
     nanoRle->seek(positions.at(columnId));
   }
 
-  template<TypeKind columnKind, bool isLittleEndian>
-  class DoubleColumnReader: public ColumnReader {
-  public:
+  template <TypeKind columnKind, bool isLittleEndian, typename ValueType, typename BatchType>
+  class DoubleColumnReader : public ColumnReader {
+   public:
     DoubleColumnReader(const Type& type, StripeStreams& stripe);
     ~DoubleColumnReader() override {}
 
     uint64_t skip(uint64_t numValues) override;
 
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char* notNull) override;
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void seekToRowGroup(
-      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+    void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) override;
 
-  private:
+   private:
     std::unique_ptr<SeekableInputStream> inputStream;
     const uint64_t bytesPerValue = (columnKind == FLOAT) ? 4 : 8;
-    const char *bufferPointer;
-    const char *bufferEnd;
+    const char* bufferPointer;
+    const char* bufferEnd;
 
     unsigned char readByte() {
       if (bufferPointer == bufferEnd) {
         int length;
-        if (!inputStream->Next
-            (reinterpret_cast<const void**>(&bufferPointer), &length)) {
+        if (!inputStream->Next(reinterpret_cast<const void**>(&bufferPointer), &length)) {
           throw ParseError("bad read in DoubleColumnReader::next()");
         }
         bufferEnd = bufferPointer + length;
@@ -445,7 +390,8 @@ namespace orc {
       return static_cast<unsigned char>(*(bufferPointer++));
     }
 
-    double readDouble() {
+    template <typename FloatType>
+    FloatType readDouble() {
       int64_t bits = 0;
       if (bufferEnd - bufferPointer >= 8) {
         if (isLittleEndian) {
@@ -466,11 +412,12 @@ namespace orc {
           bits |= static_cast<int64_t>(readByte()) << (i * 8);
         }
       }
-      double *result = reinterpret_cast<double*>(&bits);
+      FloatType* result = reinterpret_cast<FloatType*>(&bits);
       return *result;
     }
 
-    double readFloat() {
+    template <typename FloatType>
+    FloatType readFloat() {
       int32_t bits = 0;
       if (bufferEnd - bufferPointer >= 4) {
         if (isLittleEndian) {
@@ -487,33 +434,32 @@ namespace orc {
           bits |= readByte() << (i * 8);
         }
       }
-      float *result = reinterpret_cast<float*>(&bits);
-      return static_cast<double>(*result);
+      float* result = reinterpret_cast<float*>(&bits);
+      if (!result) {
+        std::cerr << "read float empty." << std::endl;
+      }
+      return static_cast<FloatType>(*result);
     }
   };
 
-  template<TypeKind columnKind, bool isLittleEndian>
-  DoubleColumnReader<columnKind, isLittleEndian>::DoubleColumnReader(
-      const Type& type,
-      StripeStreams& stripe
-      ): ColumnReader(type, stripe),
-         bufferPointer(nullptr),
-         bufferEnd(nullptr) {
+  template <TypeKind columnKind, bool isLittleEndian, typename ValueType, typename BatchType>
+  DoubleColumnReader<columnKind, isLittleEndian, ValueType, BatchType>::DoubleColumnReader(
+      const Type& type, StripeStreams& stripe)
+      : ColumnReader(type, stripe), bufferPointer(nullptr), bufferEnd(nullptr) {
     inputStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
-    if (inputStream == nullptr)
-      throw ParseError("DATA stream not found in Double column");
+    if (inputStream == nullptr) throw ParseError("DATA stream not found in Double column");
   }
 
-  template<TypeKind columnKind, bool isLittleEndian>
-  uint64_t DoubleColumnReader<columnKind, isLittleEndian>::skip(uint64_t numValues) {
+  template <TypeKind columnKind, bool isLittleEndian, typename ValueType, typename BatchType>
+  uint64_t DoubleColumnReader<columnKind, isLittleEndian, ValueType, BatchType>::skip(
+      uint64_t numValues) {
     numValues = ColumnReader::skip(numValues);
 
-    if (static_cast<size_t>(bufferEnd - bufferPointer) >=
-        bytesPerValue * numValues) {
+    if (static_cast<size_t>(bufferEnd - bufferPointer) >= bytesPerValue * numValues) {
       bufferPointer += bytesPerValue * numValues;
     } else {
-      size_t sizeToSkip = bytesPerValue * numValues -
-                          static_cast<size_t>(bufferEnd - bufferPointer);
+      size_t sizeToSkip =
+          bytesPerValue * numValues - static_cast<size_t>(bufferEnd - bufferPointer);
       const size_t cap = static_cast<size_t>(std::numeric_limits<int>::max());
       while (sizeToSkip != 0) {
         size_t step = sizeToSkip > cap ? cap : sizeToSkip;
@@ -527,33 +473,32 @@ namespace orc {
     return numValues;
   }
 
-  template<TypeKind columnKind, bool isLittleEndian>
-  void DoubleColumnReader<columnKind, isLittleEndian>::next(
-      ColumnVectorBatch& rowBatch,
-      uint64_t numValues,
-      char *notNull) {
+  template <TypeKind columnKind, bool isLittleEndian, typename ValueType, typename BatchType>
+  void DoubleColumnReader<columnKind, isLittleEndian, ValueType, BatchType>::next(
+      ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
     // update the notNull from the parent class
     notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
-    double* outArray = dynamic_cast<DoubleVectorBatch&>(rowBatch).data.data();
+    ValueType* outArray =
+        reinterpret_cast<ValueType*>(dynamic_cast<BatchType&>(rowBatch).data.data());
 
-    if (columnKind == FLOAT) {
+    if constexpr (columnKind == FLOAT) {
       if (notNull) {
-        for(size_t i=0; i < numValues; ++i) {
+        for (size_t i = 0; i < numValues; ++i) {
           if (notNull[i]) {
-            outArray[i] = readFloat();
+            outArray[i] = readFloat<ValueType>();
           }
         }
       } else {
-        for(size_t i=0; i < numValues; ++i) {
-          outArray[i] = readFloat();
+        for (size_t i = 0; i < numValues; ++i) {
+          outArray[i] = readFloat<ValueType>();
         }
       }
     } else {
       if (notNull) {
-        for(size_t i=0; i < numValues; ++i) {
+        for (size_t i = 0; i < numValues; ++i) {
           if (notNull[i]) {
-            outArray[i] = readDouble();
+            outArray[i] = readDouble<ValueType>();
           }
         }
       } else {
@@ -561,25 +506,23 @@ namespace orc {
         // Only viable when the machine is little-endian.
         uint64_t bufferNum = 0;
         if (isLittleEndian) {
-          bufferNum = std::min(numValues,
-              static_cast<size_t>(bufferEnd - bufferPointer) / bytesPerValue);
+          bufferNum =
+              std::min(numValues, static_cast<size_t>(bufferEnd - bufferPointer) / bytesPerValue);
           uint64_t bufferBytes = bufferNum * bytesPerValue;
-          if (bufferPointer && bufferBytes) {
+          if (bufferBytes > 0) {
             memcpy(outArray, bufferPointer, bufferBytes);
             bufferPointer += bufferBytes;
-          } else {
-            bufferNum = 0;
           }
         }
         for (size_t i = bufferNum; i < numValues; ++i) {
-          outArray[i] = readDouble();
+          outArray[i] = readDouble<ValueType>();
         }
       }
     }
   }
 
-  template<TypeKind columnKind, bool isLittleEndian>
-  void DoubleColumnReader<columnKind, isLittleEndian>::seekToRowGroup(
+  template <TypeKind columnKind, bool isLittleEndian, typename ValueType, typename BatchType>
+  void DoubleColumnReader<columnKind, isLittleEndian, ValueType, BatchType>::seekToRowGroup(
       std::unordered_map<uint64_t, PositionProvider>& positions) {
     ColumnReader::seekToRowGroup(positions);
     inputStream->seek(positions.at(columnId));
@@ -604,54 +547,46 @@ namespace orc {
     }
   }
 
-  class StringDictionaryColumnReader: public ColumnReader {
-  private:
+  class StringDictionaryColumnReader : public ColumnReader {
+   private:
     std::shared_ptr<StringDictionary> dictionary;
     std::unique_ptr<RleDecoder> rle;
 
-  public:
+   public:
     StringDictionaryColumnReader(const Type& type, StripeStreams& stipe);
     ~StringDictionaryColumnReader() override;
 
     uint64_t skip(uint64_t numValues) override;
 
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char *notNull) override;
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void nextEncoded(ColumnVectorBatch& rowBatch,
-                      uint64_t numValues,
-                      char* notNull) override;
+    void nextEncoded(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void seekToRowGroup(
-      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+    void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) override;
   };
 
-  StringDictionaryColumnReader::StringDictionaryColumnReader
-             (const Type& type,
-              StripeStreams& stripe
-              ): ColumnReader(type, stripe),
-                 dictionary(new StringDictionary(stripe.getMemoryPool())) {
-    RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId)
-                                                .kind());
-    uint32_t dictSize = stripe.getEncoding(columnId).dictionarysize();
+  StringDictionaryColumnReader::StringDictionaryColumnReader(const Type& type,
+                                                             StripeStreams& stripe)
+      : ColumnReader(type, stripe), dictionary(new StringDictionary(stripe.getMemoryPool())) {
+    RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId).kind());
+    uint32_t dictSize = stripe.getEncoding(columnId).dictionary_size();
     std::unique_ptr<SeekableInputStream> stream =
         stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
     if (stream == nullptr) {
       throw ParseError("DATA stream not found in StringDictionaryColumn");
     }
-    rle = createRleDecoder(std::move(stream), false, rleVersion, memoryPool);
+    rle = createRleDecoder(std::move(stream), false, rleVersion, memoryPool, metrics);
     stream = stripe.getStream(columnId, proto::Stream_Kind_LENGTH, false);
     if (dictSize > 0 && stream == nullptr) {
       throw ParseError("LENGTH stream not found in StringDictionaryColumn");
     }
     std::unique_ptr<RleDecoder> lengthDecoder =
-        createRleDecoder(std::move(stream), false, rleVersion, memoryPool);
+        createRleDecoder(std::move(stream), false, rleVersion, memoryPool, metrics);
     dictionary->dictionaryOffset.resize(dictSize + 1);
     int64_t* lengthArray = dictionary->dictionaryOffset.data();
     lengthDecoder->next(lengthArray + 1, dictSize, nullptr);
     lengthArray[0] = 0;
-    for(uint32_t i = 1; i < dictSize + 1; ++i) {
+    for (uint32_t i = 1; i < dictSize + 1; ++i) {
       if (lengthArray[i] < 0) {
         throw ParseError("Negative dictionary entry length");
       }
@@ -660,10 +595,9 @@ namespace orc {
     int64_t blobSize = lengthArray[dictSize];
     dictionary->dictionaryBlob.resize(static_cast<uint64_t>(blobSize));
     std::unique_ptr<SeekableInputStream> blobStream =
-      stripe.getStream(columnId, proto::Stream_Kind_DICTIONARY_DATA, false);
+        stripe.getStream(columnId, proto::Stream_Kind_DICTIONARY_DATA, false);
     if (blobSize > 0 && blobStream == nullptr) {
-      throw ParseError(
-          "DICTIONARY_DATA stream not found in StringDictionaryColumn");
+      throw ParseError("DICTIONARY_DATA stream not found in StringDictionaryColumn");
     }
     readFully(dictionary->dictionaryBlob.data(), blobSize, blobStream.get());
   }
@@ -678,47 +612,43 @@ namespace orc {
     return numValues;
   }
 
-  void StringDictionaryColumnReader::next(ColumnVectorBatch& rowBatch,
-                                          uint64_t numValues,
-                                          char *notNull) {
+  void StringDictionaryColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                          char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
     // update the notNull from the parent class
     notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
     StringVectorBatch& byteBatch = dynamic_cast<StringVectorBatch&>(rowBatch);
-    char *blob = dictionary->dictionaryBlob.data();
-    int64_t *dictionaryOffsets = dictionary->dictionaryOffset.data();
-    char **outputStarts = byteBatch.data.data();
-    int64_t *outputLengths = byteBatch.length.data();
+    char* blob = dictionary->dictionaryBlob.data();
+    int64_t* dictionaryOffsets = dictionary->dictionaryOffset.data();
+    char** outputStarts = byteBatch.data.data();
+    int64_t* outputLengths = byteBatch.length.data();
     rle->next(outputLengths, numValues, notNull);
     uint64_t dictionaryCount = dictionary->dictionaryOffset.size() - 1;
     if (notNull) {
-      for(uint64_t i=0; i < numValues; ++i) {
+      for (uint64_t i = 0; i < numValues; ++i) {
         if (notNull[i]) {
           int64_t entry = outputLengths[i];
-          if (entry < 0 || static_cast<uint64_t>(entry) >= dictionaryCount ) {
+          if (entry < 0 || static_cast<uint64_t>(entry) >= dictionaryCount) {
             throw ParseError("Entry index out of range in StringDictionaryColumn");
           }
           outputStarts[i] = blob + dictionaryOffsets[entry];
-          outputLengths[i] = dictionaryOffsets[entry+1] -
-            dictionaryOffsets[entry];
+          outputLengths[i] = dictionaryOffsets[entry + 1] - dictionaryOffsets[entry];
         }
       }
     } else {
-      for(uint64_t i=0; i < numValues; ++i) {
+      for (uint64_t i = 0; i < numValues; ++i) {
         int64_t entry = outputLengths[i];
         if (entry < 0 || static_cast<uint64_t>(entry) >= dictionaryCount) {
           throw ParseError("Entry index out of range in StringDictionaryColumn");
         }
         outputStarts[i] = blob + dictionaryOffsets[entry];
-        outputLengths[i] = dictionaryOffsets[entry+1] -
-          dictionaryOffsets[entry];
+        outputLengths[i] = dictionaryOffsets[entry + 1] - dictionaryOffsets[entry];
       }
     }
   }
 
-  void StringDictionaryColumnReader::nextEncoded(ColumnVectorBatch& rowBatch,
-                                                  uint64_t numValues,
-                                                  char* notNull) {
+  void StringDictionaryColumnReader::nextEncoded(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                                 char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
     notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
     rowBatch.isEncoded = true;
@@ -731,17 +661,16 @@ namespace orc {
   }
 
   void StringDictionaryColumnReader::seekToRowGroup(
-    std::unordered_map<uint64_t, PositionProvider>& positions) {
+      std::unordered_map<uint64_t, PositionProvider>& positions) {
     ColumnReader::seekToRowGroup(positions);
     rle->seek(positions.at(columnId));
   }
 
-
-  class StringDirectColumnReader: public ColumnReader {
-  private:
+  class StringDirectColumnReader : public ColumnReader {
+   private:
     std::unique_ptr<RleDecoder> lengthRle;
     std::unique_ptr<SeekableInputStream> blobStream;
-    const char *lastBuffer;
+    const char* lastBuffer;
     size_t lastBufferLength;
 
     /**
@@ -751,38 +680,28 @@ namespace orc {
      * @param numValues the lengths of the arrays
      * @return the total number of bytes for the non-null values
      */
-    size_t computeSize(const int64_t *lengths, const char *notNull,
-                       uint64_t numValues);
+    size_t computeSize(const int64_t* lengths, const char* notNull, uint64_t numValues);
 
-  public:
+   public:
     StringDirectColumnReader(const Type& type, StripeStreams& stipe);
     ~StringDirectColumnReader() override;
 
     uint64_t skip(uint64_t numValues) override;
 
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char *notNull) override;
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void seekToRowGroup(
-      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+    void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) override;
   };
 
-  StringDirectColumnReader::StringDirectColumnReader
-                 (const Type& type,
-                  StripeStreams& stripe
-                  ): ColumnReader(type, stripe) {
-    RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId)
-                                                .kind());
+  StringDirectColumnReader::StringDirectColumnReader(const Type& type, StripeStreams& stripe)
+      : ColumnReader(type, stripe) {
+    RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId).kind());
     std::unique_ptr<SeekableInputStream> stream =
         stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true);
-    if (stream == nullptr)
-      throw ParseError("LENGTH stream not found in StringDirectColumn");
-    lengthRle = createRleDecoder(
-        std::move(stream), false, rleVersion, memoryPool);
+    if (stream == nullptr) throw ParseError("LENGTH stream not found in StringDirectColumn");
+    lengthRle = createRleDecoder(std::move(stream), false, rleVersion, memoryPool, metrics);
     blobStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
-    if (blobStream == nullptr)
-      throw ParseError("DATA stream not found in StringDirectColumn");
+    if (blobStream == nullptr) throw ParseError("DATA stream not found in StringDirectColumn");
     lastBuffer = nullptr;
     lastBufferLength = 0;
   }
@@ -799,8 +718,7 @@ namespace orc {
     size_t totalBytes = 0;
     // read the lengths, so we know haw many bytes to skip
     while (done < numValues) {
-      uint64_t step = std::min(BUFFER_SIZE,
-                                    static_cast<size_t>(numValues - done));
+      uint64_t step = std::min(BUFFER_SIZE, static_cast<size_t>(numValues - done));
       lengthRle->next(buffer, step, nullptr);
       totalBytes += computeSize(buffer, nullptr, step);
       done += step;
@@ -824,33 +742,31 @@ namespace orc {
     return numValues;
   }
 
-  size_t StringDirectColumnReader::computeSize(const int64_t* lengths,
-                                               const char* notNull,
+  size_t StringDirectColumnReader::computeSize(const int64_t* lengths, const char* notNull,
                                                uint64_t numValues) {
     size_t totalLength = 0;
     if (notNull) {
-      for(size_t i=0; i < numValues; ++i) {
+      for (size_t i = 0; i < numValues; ++i) {
         if (notNull[i]) {
           totalLength += static_cast<size_t>(lengths[i]);
         }
       }
     } else {
-      for(size_t i=0; i < numValues; ++i) {
+      for (size_t i = 0; i < numValues; ++i) {
         totalLength += static_cast<size_t>(lengths[i]);
       }
     }
     return totalLength;
   }
 
-  void StringDirectColumnReader::next(ColumnVectorBatch& rowBatch,
-                                      uint64_t numValues,
-                                      char *notNull) {
+  void StringDirectColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                      char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
     // update the notNull from the parent class
     notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
     StringVectorBatch& byteBatch = dynamic_cast<StringVectorBatch&>(rowBatch);
-    char **startPtr = byteBatch.data.data();
-    int64_t *lengthPtr = byteBatch.length.data();
+    char** startPtr = byteBatch.data.data();
+    int64_t* lengthPtr = byteBatch.length.data();
 
     // read the length vector
     lengthRle->next(lengthPtr, numValues, notNull);
@@ -862,7 +778,7 @@ namespace orc {
     // to get the rest directly out of the stream's buffer.
     size_t bytesBuffered = 0;
     byteBatch.blob.resize(totalLength);
-    char *ptr= byteBatch.blob.data();
+    char* ptr = byteBatch.blob.data();
     while (bytesBuffered + lastBufferLength < totalLength) {
       memcpy(ptr + bytesBuffered, lastBuffer, lastBufferLength);
       bytesBuffered += lastBufferLength;
@@ -902,7 +818,7 @@ namespace orc {
   }
 
   void StringDirectColumnReader::seekToRowGroup(
-    std::unordered_map<uint64_t, PositionProvider>& positions) {
+      std::unordered_map<uint64_t, PositionProvider>& positions) {
     ColumnReader::seekToRowGroup(positions);
     blobStream->seek(positions.at(columnId));
     lengthRle->seek(positions.at(columnId));
@@ -911,145 +827,130 @@ namespace orc {
     lastBufferLength = 0;
   }
 
-  class StructColumnReader: public ColumnReader {
-  private:
+  class StructColumnReader : public ColumnReader {
+   private:
     std::vector<std::unique_ptr<ColumnReader>> children;
 
-  public:
-    StructColumnReader(const Type& type, StripeStreams& stipe);
+   public:
+    StructColumnReader(const Type& type, StripeStreams& stripe, bool useTightNumericVector = false,
+                       bool throwOnSchemaEvolutionOverflow = false);
 
     uint64_t skip(uint64_t numValues) override;
 
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char *notNull) override;
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void nextEncoded(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char *notNull) override;
+    void nextEncoded(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void seekToRowGroup(
-      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+    void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) override;
 
-  private:
-    template<bool encoded>
-    void nextInternal(ColumnVectorBatch& rowBatch,
-                      uint64_t numValues,
-                      char *notNull);
+   private:
+    template <bool encoded>
+    void nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull);
   };
 
-  StructColumnReader::StructColumnReader(const Type& type,
-                                         StripeStreams& stripe
-                                         ): ColumnReader(type, stripe) {
+  StructColumnReader::StructColumnReader(const Type& type, StripeStreams& stripe,
+                                         bool useTightNumericVector,
+                                         bool throwOnSchemaEvolutionOverflow)
+      : ColumnReader(type, stripe) {
     // count the number of selected sub-columns
     const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
     switch (static_cast<int64_t>(stripe.getEncoding(columnId).kind())) {
-    case proto::ColumnEncoding_Kind_DIRECT:
-      for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
-        const Type& child = *type.getSubtype(i);
-        if (selectedColumns[static_cast<uint64_t>(child.getColumnId())]) {
-          children.push_back(buildReader(child, stripe));
+      case proto::ColumnEncoding_Kind_DIRECT:
+        for (unsigned int i = 0; i < type.getSubtypeCount(); ++i) {
+          const Type& child = *type.getSubtype(i);
+          if (selectedColumns[static_cast<uint64_t>(child.getColumnId())]) {
+            children.push_back(
+                buildReader(child, stripe, useTightNumericVector, throwOnSchemaEvolutionOverflow));
+          }
         }
-      }
-      break;
-    case proto::ColumnEncoding_Kind_DIRECT_V2:
-    case proto::ColumnEncoding_Kind_DICTIONARY:
-    case proto::ColumnEncoding_Kind_DICTIONARY_V2:
-    default:
-      throw ParseError("Unknown encoding for StructColumnReader");
+        break;
+      case proto::ColumnEncoding_Kind_DIRECT_V2:
+      case proto::ColumnEncoding_Kind_DICTIONARY:
+      case proto::ColumnEncoding_Kind_DICTIONARY_V2:
+      default:
+        throw ParseError("Unknown encoding for StructColumnReader");
     }
   }
 
   uint64_t StructColumnReader::skip(uint64_t numValues) {
     numValues = ColumnReader::skip(numValues);
-    for(auto& ptr : children) {
+    for (auto& ptr : children) {
       ptr->skip(numValues);
     }
     return numValues;
   }
 
-  void StructColumnReader::next(ColumnVectorBatch& rowBatch,
-                                uint64_t numValues,
-                                char *notNull) {
+  void StructColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) {
     nextInternal<false>(rowBatch, numValues, notNull);
   }
 
-  void StructColumnReader::nextEncoded(ColumnVectorBatch& rowBatch,
-                                uint64_t numValues,
-                                char *notNull) {
+  void StructColumnReader::nextEncoded(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                       char* notNull) {
     nextInternal<true>(rowBatch, numValues, notNull);
   }
 
-  template<bool encoded>
-  void StructColumnReader::nextInternal(ColumnVectorBatch& rowBatch,
-                                uint64_t numValues,
-                                char *notNull) {
+  template <bool encoded>
+  void StructColumnReader::nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                        char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
-    uint64_t i=0;
-    notNull = rowBatch.hasNulls? rowBatch.notNull.data() : nullptr;
-    for(auto iter = children.begin(); iter != children.end(); ++iter, ++i) {
+    uint64_t i = 0;
+    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
+    for (auto iter = children.begin(); iter != children.end(); ++iter, ++i) {
       if (encoded) {
-        (*iter)->nextEncoded(*(dynamic_cast<StructVectorBatch&>(rowBatch).fields[i]),
-                    numValues, notNull);
+        (*iter)->nextEncoded(*(dynamic_cast<StructVectorBatch&>(rowBatch).fields[i]), numValues,
+                             notNull);
       } else {
-        (*iter)->next(*(dynamic_cast<StructVectorBatch&>(rowBatch).fields[i]),
-                    numValues, notNull);
+        (*iter)->next(*(dynamic_cast<StructVectorBatch&>(rowBatch).fields[i]), numValues, notNull);
       }
     }
   }
 
   void StructColumnReader::seekToRowGroup(
-    std::unordered_map<uint64_t, PositionProvider>& positions) {
+      std::unordered_map<uint64_t, PositionProvider>& positions) {
     ColumnReader::seekToRowGroup(positions);
 
-    for(auto& ptr : children) {
+    for (auto& ptr : children) {
       ptr->seekToRowGroup(positions);
     }
   }
 
-  class ListColumnReader: public ColumnReader {
-  private:
+  class ListColumnReader : public ColumnReader {
+   private:
     std::unique_ptr<ColumnReader> child;
     std::unique_ptr<RleDecoder> rle;
 
-  public:
-    ListColumnReader(const Type& type, StripeStreams& stipe);
+   public:
+    ListColumnReader(const Type& type, StripeStreams& stipe, bool useTightNumericVector = false,
+                     bool throwOnSchemaEvolutionOverflow = false);
     ~ListColumnReader() override;
 
     uint64_t skip(uint64_t numValues) override;
 
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char *notNull) override;
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void nextEncoded(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char *notNull) override;
+    void nextEncoded(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void seekToRowGroup(
-      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+    void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) override;
 
-  private:
-    template<bool encoded>
-    void nextInternal(ColumnVectorBatch& rowBatch,
-                      uint64_t numValues,
-                      char *notNull);
+   private:
+    template <bool encoded>
+    void nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull);
   };
 
-  ListColumnReader::ListColumnReader(const Type& type,
-                                     StripeStreams& stripe
-                                     ): ColumnReader(type, stripe) {
+  ListColumnReader::ListColumnReader(const Type& type, StripeStreams& stripe,
+                                     bool useTightNumericVector,
+                                     bool throwOnSchemaEvolutionOverflow)
+      : ColumnReader(type, stripe) {
     // count the number of selected sub-columns
     const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
     RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
     std::unique_ptr<SeekableInputStream> stream =
         stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true);
-    if (stream == nullptr)
-      throw ParseError("LENGTH stream not found in List column");
-    rle = createRleDecoder(std::move(stream), false, vers, memoryPool);
+    if (stream == nullptr) throw ParseError("LENGTH stream not found in List column");
+    rle = createRleDecoder(std::move(stream), false, vers, memoryPool, metrics);
     const Type& childType = *type.getSubtype(0);
     if (selectedColumns[static_cast<uint64_t>(childType.getColumnId())]) {
-      child = buildReader(childType, stripe);
+      child = buildReader(childType, stripe, useTightNumericVector, throwOnSchemaEvolutionOverflow);
     }
   }
 
@@ -1059,7 +960,7 @@ namespace orc {
 
   uint64_t ListColumnReader::skip(uint64_t numValues) {
     numValues = ColumnReader::skip(numValues);
-    ColumnReader *childReader = child.get();
+    ColumnReader* childReader = child.get();
     if (childReader) {
       const uint64_t BUFFER_SIZE = 1024;
       int64_t buffer[BUFFER_SIZE];
@@ -1068,7 +969,7 @@ namespace orc {
       while (lengthsRead < numValues) {
         uint64_t chunk = std::min(numValues - lengthsRead, BUFFER_SIZE);
         rle->next(buffer, chunk, nullptr);
-        for(size_t i=0; i < chunk; ++i) {
+        for (size_t i = 0; i < chunk; ++i) {
           childrenElements += static_cast<size_t>(buffer[i]);
         }
         lengthsRead += chunk;
@@ -1080,30 +981,26 @@ namespace orc {
     return numValues;
   }
 
-  void ListColumnReader::next(ColumnVectorBatch& rowBatch,
-                                      uint64_t numValues,
-                                      char *notNull) {
+  void ListColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) {
     nextInternal<false>(rowBatch, numValues, notNull);
   }
 
-  void ListColumnReader::nextEncoded(ColumnVectorBatch& rowBatch,
-                                      uint64_t numValues,
-                                      char *notNull) {
+  void ListColumnReader::nextEncoded(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                     char* notNull) {
     nextInternal<true>(rowBatch, numValues, notNull);
   }
 
-  template<bool encoded>
-  void ListColumnReader::nextInternal(ColumnVectorBatch& rowBatch,
-                              uint64_t numValues,
-                              char *notNull) {
+  template <bool encoded>
+  void ListColumnReader::nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                      char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
-    ListVectorBatch &listBatch = dynamic_cast<ListVectorBatch&>(rowBatch);
+    ListVectorBatch& listBatch = dynamic_cast<ListVectorBatch&>(rowBatch);
     int64_t* offsets = listBatch.offsets.data();
     notNull = listBatch.hasNulls ? listBatch.notNull.data() : nullptr;
     rle->next(offsets, numValues, notNull);
     uint64_t totalChildren = 0;
     if (notNull) {
-      for(size_t i=0; i < numValues; ++i) {
+      for (size_t i = 0; i < numValues; ++i) {
         if (notNull[i]) {
           uint64_t tmp = static_cast<uint64_t>(offsets[i]);
           offsets[i] = static_cast<int64_t>(totalChildren);
@@ -1113,14 +1010,14 @@ namespace orc {
         }
       }
     } else {
-      for(size_t i=0; i < numValues; ++i) {
+      for (size_t i = 0; i < numValues; ++i) {
         uint64_t tmp = static_cast<uint64_t>(offsets[i]);
         offsets[i] = static_cast<int64_t>(totalChildren);
         totalChildren += tmp;
       }
     }
     offsets[numValues] = static_cast<int64_t>(totalChildren);
-    ColumnReader *childReader = child.get();
+    ColumnReader* childReader = child.get();
     if (childReader) {
       if (encoded) {
         childReader->nextEncoded(*(listBatch.elements.get()), totalChildren, nullptr);
@@ -1130,8 +1027,7 @@ namespace orc {
     }
   }
 
-  void ListColumnReader::seekToRowGroup(
-    std::unordered_map<uint64_t, PositionProvider>& positions) {
+  void ListColumnReader::seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) {
     ColumnReader::seekToRowGroup(positions);
     rle->seek(positions.at(columnId));
     if (child.get()) {
@@ -1139,54 +1035,49 @@ namespace orc {
     }
   }
 
-  class MapColumnReader: public ColumnReader {
-  private:
+  class MapColumnReader : public ColumnReader {
+   private:
     std::unique_ptr<ColumnReader> keyReader;
     std::unique_ptr<ColumnReader> elementReader;
     std::unique_ptr<RleDecoder> rle;
 
-  public:
-    MapColumnReader(const Type& type, StripeStreams& stipe);
+   public:
+    MapColumnReader(const Type& type, StripeStreams& stipe, bool useTightNumericVector = false,
+                    bool throwOnSchemaEvolutionOverflow = false);
     ~MapColumnReader() override;
 
     uint64_t skip(uint64_t numValues) override;
 
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char *notNull) override;
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void nextEncoded(ColumnVectorBatch& rowBatch,
-                     uint64_t numValues,
-                     char *notNull) override;
+    void nextEncoded(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void seekToRowGroup(
-      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+    void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) override;
 
-  private:
-    template<bool encoded>
-    void nextInternal(ColumnVectorBatch& rowBatch,
-                      uint64_t numValues,
-                      char *notNull);
+   private:
+    template <bool encoded>
+    void nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull);
   };
 
-  MapColumnReader::MapColumnReader(const Type& type,
-                                   StripeStreams& stripe
-                                   ): ColumnReader(type, stripe) {
+  MapColumnReader::MapColumnReader(const Type& type, StripeStreams& stripe,
+                                   bool useTightNumericVector, bool throwOnSchemaEvolutionOverflow)
+      : ColumnReader(type, stripe) {
     // Determine if the key and/or value columns are selected
     const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
     RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
     std::unique_ptr<SeekableInputStream> stream =
         stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true);
-    if (stream == nullptr)
-      throw ParseError("LENGTH stream not found in Map column");
-    rle = createRleDecoder(std::move(stream), false, vers, memoryPool);
+    if (stream == nullptr) throw ParseError("LENGTH stream not found in Map column");
+    rle = createRleDecoder(std::move(stream), false, vers, memoryPool, metrics);
     const Type& keyType = *type.getSubtype(0);
     if (selectedColumns[static_cast<uint64_t>(keyType.getColumnId())]) {
-      keyReader = buildReader(keyType, stripe);
+      keyReader =
+          buildReader(keyType, stripe, useTightNumericVector, throwOnSchemaEvolutionOverflow);
     }
     const Type& elementType = *type.getSubtype(1);
     if (selectedColumns[static_cast<uint64_t>(elementType.getColumnId())]) {
-      elementReader = buildReader(elementType, stripe);
+      elementReader =
+          buildReader(elementType, stripe, useTightNumericVector, throwOnSchemaEvolutionOverflow);
     }
   }
 
@@ -1196,8 +1087,8 @@ namespace orc {
 
   uint64_t MapColumnReader::skip(uint64_t numValues) {
     numValues = ColumnReader::skip(numValues);
-    ColumnReader *rawKeyReader = keyReader.get();
-    ColumnReader *rawElementReader = elementReader.get();
+    ColumnReader* rawKeyReader = keyReader.get();
+    ColumnReader* rawElementReader = elementReader.get();
     if (rawKeyReader || rawElementReader) {
       const uint64_t BUFFER_SIZE = 1024;
       int64_t buffer[BUFFER_SIZE];
@@ -1206,7 +1097,7 @@ namespace orc {
       while (lengthsRead < numValues) {
         uint64_t chunk = std::min(numValues - lengthsRead, BUFFER_SIZE);
         rle->next(buffer, chunk, nullptr);
-        for(size_t i=0; i < chunk; ++i) {
+        for (size_t i = 0; i < chunk; ++i) {
           childrenElements += static_cast<size_t>(buffer[i]);
         }
         lengthsRead += chunk;
@@ -1223,32 +1114,26 @@ namespace orc {
     return numValues;
   }
 
-  void MapColumnReader::next(ColumnVectorBatch& rowBatch,
-                             uint64_t numValues,
-                             char *notNull)
-  {
+  void MapColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) {
     nextInternal<false>(rowBatch, numValues, notNull);
   }
 
-  void MapColumnReader::nextEncoded(ColumnVectorBatch& rowBatch,
-                             uint64_t numValues,
-                             char *notNull)
-  {
+  void MapColumnReader::nextEncoded(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                    char* notNull) {
     nextInternal<true>(rowBatch, numValues, notNull);
   }
 
-  template<bool encoded>
-  void MapColumnReader::nextInternal(ColumnVectorBatch& rowBatch,
-                             uint64_t numValues,
-                             char *notNull) {
+  template <bool encoded>
+  void MapColumnReader::nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                     char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
-    MapVectorBatch &mapBatch = dynamic_cast<MapVectorBatch&>(rowBatch);
+    MapVectorBatch& mapBatch = dynamic_cast<MapVectorBatch&>(rowBatch);
     int64_t* offsets = mapBatch.offsets.data();
     notNull = mapBatch.hasNulls ? mapBatch.notNull.data() : nullptr;
     rle->next(offsets, numValues, notNull);
     uint64_t totalChildren = 0;
     if (notNull) {
-      for(size_t i=0; i < numValues; ++i) {
+      for (size_t i = 0; i < numValues; ++i) {
         if (notNull[i]) {
           uint64_t tmp = static_cast<uint64_t>(offsets[i]);
           offsets[i] = static_cast<int64_t>(totalChildren);
@@ -1258,14 +1143,14 @@ namespace orc {
         }
       }
     } else {
-      for(size_t i=0; i < numValues; ++i) {
+      for (size_t i = 0; i < numValues; ++i) {
         uint64_t tmp = static_cast<uint64_t>(offsets[i]);
         offsets[i] = static_cast<int64_t>(totalChildren);
         totalChildren += tmp;
       }
     }
     offsets[numValues] = static_cast<int64_t>(totalChildren);
-    ColumnReader *rawKeyReader = keyReader.get();
+    ColumnReader* rawKeyReader = keyReader.get();
     if (rawKeyReader) {
       if (encoded) {
         rawKeyReader->nextEncoded(*(mapBatch.keys.get()), totalChildren, nullptr);
@@ -1273,7 +1158,7 @@ namespace orc {
         rawKeyReader->next(*(mapBatch.keys.get()), totalChildren, nullptr);
       }
     }
-    ColumnReader *rawElementReader = elementReader.get();
+    ColumnReader* rawElementReader = elementReader.get();
     if (rawElementReader) {
       if (encoded) {
         rawElementReader->nextEncoded(*(mapBatch.elements.get()), totalChildren, nullptr);
@@ -1283,8 +1168,7 @@ namespace orc {
     }
   }
 
-  void MapColumnReader::seekToRowGroup(
-    std::unordered_map<uint64_t, PositionProvider>& positions) {
+  void MapColumnReader::seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) {
     ColumnReader::seekToRowGroup(positions);
     rle->seek(positions.at(columnId));
     if (keyReader.get()) {
@@ -1295,54 +1179,49 @@ namespace orc {
     }
   }
 
-  class UnionColumnReader: public ColumnReader {
-  private:
+  class UnionColumnReader : public ColumnReader {
+   private:
     std::unique_ptr<ByteRleDecoder> rle;
     std::vector<std::unique_ptr<ColumnReader>> childrenReader;
     std::vector<int64_t> childrenCounts;
     uint64_t numChildren;
 
-  public:
-    UnionColumnReader(const Type& type, StripeStreams& stipe);
+   public:
+    UnionColumnReader(const Type& type, StripeStreams& stipe, bool useTightNumericVector = false,
+                      bool throwOnSchemaEvolutionOverflow = false);
 
     uint64_t skip(uint64_t numValues) override;
 
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char *notNull) override;
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void nextEncoded(ColumnVectorBatch& rowBatch,
-                     uint64_t numValues,
-                     char *notNull) override;
+    void nextEncoded(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void seekToRowGroup(
-      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+    void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) override;
 
-  private:
-    template<bool encoded>
-    void nextInternal(ColumnVectorBatch& rowBatch,
-                      uint64_t numValues,
-                      char *notNull);
+   private:
+    template <bool encoded>
+    void nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull);
   };
 
-  UnionColumnReader::UnionColumnReader(const Type& type,
-                                       StripeStreams& stripe
-                                       ): ColumnReader(type, stripe) {
+  UnionColumnReader::UnionColumnReader(const Type& type, StripeStreams& stripe,
+                                       bool useTightNumericVector,
+                                       bool throwOnSchemaEvolutionOverflow)
+      : ColumnReader(type, stripe) {
     numChildren = type.getSubtypeCount();
     childrenReader.resize(numChildren);
     childrenCounts.resize(numChildren);
 
     std::unique_ptr<SeekableInputStream> stream =
         stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
-    if (stream == nullptr)
-      throw ParseError("LENGTH stream not found in Union column");
-    rle = createByteRleDecoder(std::move(stream));
+    if (stream == nullptr) throw ParseError("LENGTH stream not found in Union column");
+    rle = createByteRleDecoder(std::move(stream), metrics);
     // figure out which types are selected
     const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
-    for(unsigned int i=0; i < numChildren; ++i) {
-      const Type &child = *type.getSubtype(i);
+    for (unsigned int i = 0; i < numChildren; ++i) {
+      const Type& child = *type.getSubtype(i);
       if (selectedColumns[static_cast<size_t>(child.getColumnId())]) {
-        childrenReader[i] = buildReader(child, stripe);
+        childrenReader[i] =
+            buildReader(child, stripe, useTightNumericVector, throwOnSchemaEvolutionOverflow);
       }
     }
   }
@@ -1352,17 +1231,17 @@ namespace orc {
     const uint64_t BUFFER_SIZE = 1024;
     char buffer[BUFFER_SIZE];
     uint64_t lengthsRead = 0;
-    int64_t *counts = childrenCounts.data();
+    int64_t* counts = childrenCounts.data();
     memset(counts, 0, sizeof(int64_t) * numChildren);
     while (lengthsRead < numValues) {
       uint64_t chunk = std::min(numValues - lengthsRead, BUFFER_SIZE);
       rle->next(buffer, chunk, nullptr);
-      for(size_t i=0; i < chunk; ++i) {
+      for (size_t i = 0; i < chunk; ++i) {
         counts[static_cast<size_t>(buffer[i])] += 1;
       }
       lengthsRead += chunk;
     }
-    for(size_t i=0; i < numChildren; ++i) {
+    for (size_t i = 0; i < numChildren; ++i) {
       if (counts[i] != 0 && childrenReader[i] != nullptr) {
         childrenReader[i]->skip(static_cast<uint64_t>(counts[i]));
       }
@@ -1370,63 +1249,57 @@ namespace orc {
     return numValues;
   }
 
-  void UnionColumnReader::next(ColumnVectorBatch& rowBatch,
-                              uint64_t numValues,
-                              char *notNull) {
+  void UnionColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) {
     nextInternal<false>(rowBatch, numValues, notNull);
   }
 
-  void UnionColumnReader::nextEncoded(ColumnVectorBatch& rowBatch,
-                              uint64_t numValues,
-                              char *notNull) {
+  void UnionColumnReader::nextEncoded(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                      char* notNull) {
     nextInternal<true>(rowBatch, numValues, notNull);
   }
 
-  template<bool encoded>
-  void UnionColumnReader::nextInternal(ColumnVectorBatch& rowBatch,
-                               uint64_t numValues,
-                               char *notNull) {
+  template <bool encoded>
+  void UnionColumnReader::nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                       char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
-    UnionVectorBatch &unionBatch = dynamic_cast<UnionVectorBatch&>(rowBatch);
+    UnionVectorBatch& unionBatch = dynamic_cast<UnionVectorBatch&>(rowBatch);
     uint64_t* offsets = unionBatch.offsets.data();
     int64_t* counts = childrenCounts.data();
     memset(counts, 0, sizeof(int64_t) * numChildren);
     unsigned char* tags = unionBatch.tags.data();
     notNull = unionBatch.hasNulls ? unionBatch.notNull.data() : nullptr;
-    rle->next(reinterpret_cast<char *>(tags), numValues, notNull);
+    rle->next(reinterpret_cast<char*>(tags), numValues, notNull);
     // set the offsets for each row
     if (notNull) {
-      for(size_t i=0; i < numValues; ++i) {
+      for (size_t i = 0; i < numValues; ++i) {
         if (notNull[i]) {
-          offsets[i] =
-            static_cast<uint64_t>(counts[static_cast<size_t>(tags[i])]++);
+          offsets[i] = static_cast<uint64_t>(counts[static_cast<size_t>(tags[i])]++);
         }
       }
     } else {
-      for(size_t i=0; i < numValues; ++i) {
-        offsets[i] =
-          static_cast<uint64_t>(counts[static_cast<size_t>(tags[i])]++);
+      for (size_t i = 0; i < numValues; ++i) {
+        offsets[i] = static_cast<uint64_t>(counts[static_cast<size_t>(tags[i])]++);
       }
     }
     // read the right number of each child column
-    for(size_t i=0; i < numChildren; ++i) {
+    for (size_t i = 0; i < numChildren; ++i) {
       if (childrenReader[i] != nullptr) {
         if (encoded) {
           childrenReader[i]->nextEncoded(*(unionBatch.children[i]),
-                                  static_cast<uint64_t>(counts[i]), nullptr);
+                                         static_cast<uint64_t>(counts[i]), nullptr);
         } else {
-          childrenReader[i]->next(*(unionBatch.children[i]),
-                                  static_cast<uint64_t>(counts[i]), nullptr);
+          childrenReader[i]->next(*(unionBatch.children[i]), static_cast<uint64_t>(counts[i]),
+                                  nullptr);
         }
       }
     }
   }
 
   void UnionColumnReader::seekToRowGroup(
-    std::unordered_map<uint64_t, PositionProvider>& positions) {
+      std::unordered_map<uint64_t, PositionProvider>& positions) {
     ColumnReader::seekToRowGroup(positions);
     rle->seek(positions.at(columnId));
-    for(size_t i = 0; i < numChildren; ++i) {
+    for (size_t i = 0; i < numChildren; ++i) {
       if (childrenReader[i] != nullptr) {
         childrenReader[i]->seekToRowGroup(positions);
       }
@@ -1446,13 +1319,13 @@ namespace orc {
     }
   }
 
-  class Decimal64ColumnReader: public ColumnReader {
-  public:
+  class Decimal64ColumnReader : public ColumnReader {
+   public:
     static const uint32_t MAX_PRECISION_64 = 18;
     static const uint32_t MAX_PRECISION_128 = 38;
     static const int64_t POWERS_OF_TEN[MAX_PRECISION_64 + 1];
 
-  protected:
+   protected:
     std::unique_ptr<SeekableInputStream> valueStream;
     int32_t precision;
     int32_t scale;
@@ -1467,9 +1340,8 @@ namespace orc {
     void readBuffer() {
       while (buffer == bufferEnd) {
         int length;
-        if (!valueStream->Next(reinterpret_cast<const void**>(&buffer),
-                               &length)) {
-          throw ParseError("Read past end of stream in Decimal64ColumnReader "+
+        if (!valueStream->Next(reinterpret_cast<const void**>(&buffer), &length)) {
+          throw ParseError("Read past end of stream in Decimal64ColumnReader " +
                            valueStream->getName());
         }
         bufferEnd = buffer + length;
@@ -1489,69 +1361,61 @@ namespace orc {
         }
       }
       value = unZigZag(static_cast<uint64_t>(value));
-      if (scale > currentScale &&
-          static_cast<uint64_t>(scale - currentScale) <= MAX_PRECISION_64) {
+      if (scale > currentScale && static_cast<uint64_t>(scale - currentScale) <= MAX_PRECISION_64) {
         value *= POWERS_OF_TEN[scale - currentScale];
       } else if (scale < currentScale &&
-          static_cast<uint64_t>(currentScale - scale) <= MAX_PRECISION_64) {
+                 static_cast<uint64_t>(currentScale - scale) <= MAX_PRECISION_64) {
         value /= POWERS_OF_TEN[currentScale - scale];
       } else if (scale != currentScale) {
         throw ParseError("Decimal scale out of range");
       }
     }
 
-  public:
+   public:
     Decimal64ColumnReader(const Type& type, StripeStreams& stipe);
     ~Decimal64ColumnReader() override;
 
     uint64_t skip(uint64_t numValues) override;
 
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char *notNull) override;
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-    void seekToRowGroup(
-      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+    void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) override;
   };
   const uint32_t Decimal64ColumnReader::MAX_PRECISION_64;
   const uint32_t Decimal64ColumnReader::MAX_PRECISION_128;
-  const int64_t Decimal64ColumnReader::POWERS_OF_TEN[MAX_PRECISION_64 + 1]=
-    {1,
-     10,
-     100,
-     1000,
-     10000,
-     100000,
-     1000000,
-     10000000,
-     100000000,
-     1000000000,
-     10000000000,
-     100000000000,
-     1000000000000,
-     10000000000000,
-     100000000000000,
-     1000000000000000,
-     10000000000000000,
-     100000000000000000,
-     1000000000000000000};
-
-  Decimal64ColumnReader::Decimal64ColumnReader(const Type& type,
-                                               StripeStreams& stripe
-                                               ): ColumnReader(type, stripe) {
+  const int64_t Decimal64ColumnReader::POWERS_OF_TEN[MAX_PRECISION_64 + 1] = {1,
+                                                                              10,
+                                                                              100,
+                                                                              1000,
+                                                                              10000,
+                                                                              100000,
+                                                                              1000000,
+                                                                              10000000,
+                                                                              100000000,
+                                                                              1000000000,
+                                                                              10000000000,
+                                                                              100000000000,
+                                                                              1000000000000,
+                                                                              10000000000000,
+                                                                              100000000000000,
+                                                                              1000000000000000,
+                                                                              10000000000000000,
+                                                                              100000000000000000,
+                                                                              1000000000000000000};
+
+  Decimal64ColumnReader::Decimal64ColumnReader(const Type& type, StripeStreams& stripe)
+      : ColumnReader(type, stripe) {
     scale = static_cast<int32_t>(type.getScale());
     precision = static_cast<int32_t>(type.getPrecision());
     valueStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
-    if (valueStream == nullptr)
-      throw ParseError("DATA stream not found in Decimal64Column");
+    if (valueStream == nullptr) throw ParseError("DATA stream not found in Decimal64Column");
     buffer = nullptr;
     bufferEnd = nullptr;
     RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
     std::unique_ptr<SeekableInputStream> stream =
         stripe.getStream(columnId, proto::Stream_Kind_SECONDARY, true);
-    if (stream == nullptr)
-      throw ParseError("SECONDARY stream not found in Decimal64Column");
-    scaleDecoder = createRleDecoder(std::move(stream), true, vers, memoryPool);
+    if (stream == nullptr) throw ParseError("SECONDARY stream not found in Decimal64Column");
+    scaleDecoder = createRleDecoder(std::move(stream), true, vers, memoryPool, metrics);
   }
 
   Decimal64ColumnReader::~Decimal64ColumnReader() {
@@ -1571,13 +1435,10 @@ namespace orc {
     return numValues;
   }
 
-  void Decimal64ColumnReader::next(ColumnVectorBatch& rowBatch,
-                                   uint64_t numValues,
-                                   char *notNull) {
+  void Decimal64ColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
     notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
-    Decimal64VectorBatch &batch =
-      dynamic_cast<Decimal64VectorBatch&>(rowBatch);
+    Decimal64VectorBatch& batch = dynamic_cast<Decimal64VectorBatch&>(rowBatch);
     int64_t* values = batch.values.data();
     // read the next group of scales
     int64_t* scaleBuffer = batch.readScales.data();
@@ -1585,13 +1446,13 @@ namespace orc {
     batch.precision = precision;
     batch.scale = scale;
     if (notNull) {
-      for(size_t i=0; i < numValues; ++i) {
+      for (size_t i = 0; i < numValues; ++i) {
         if (notNull[i]) {
           readInt64(values[i], static_cast<int32_t>(scaleBuffer[i]));
         }
       }
     } else {
-      for(size_t i=0; i < numValues; ++i) {
+      for (size_t i = 0; i < numValues; ++i) {
         readInt64(values[i], static_cast<int32_t>(scaleBuffer[i]));
       }
     }
@@ -1599,28 +1460,25 @@ namespace orc {
 
   void scaleInt128(Int128& value, uint32_t scale, uint32_t currentScale) {
     if (scale > currentScale) {
-      while(scale > currentScale) {
+      while (scale > currentScale) {
         uint32_t scaleAdjust =
-          std::min(Decimal64ColumnReader::MAX_PRECISION_64,
-                   scale - currentScale);
+            std::min(Decimal64ColumnReader::MAX_PRECISION_64, scale - currentScale);
         value *= Decimal64ColumnReader::POWERS_OF_TEN[scaleAdjust];
         currentScale += scaleAdjust;
       }
     } else if (scale < currentScale) {
       Int128 remainder;
-      while(currentScale > scale) {
+      while (currentScale > scale) {
         uint32_t scaleAdjust =
-          std::min(Decimal64ColumnReader::MAX_PRECISION_64,
-                   currentScale - scale);
-        value = value.divide(Decimal64ColumnReader::POWERS_OF_TEN[scaleAdjust],
-                             remainder);
+            std::min(Decimal64ColumnReader::MAX_PRECISION_64, currentScale - scale);
+        value = value.divide(Decimal64ColumnReader::POWERS_OF_TEN[scaleAdjust], remainder);
         currentScale -= scaleAdjust;
       }
     }
   }
 
   void Decimal64ColumnReader::seekToRowGroup(
-    std::unordered_map<uint64_t, PositionProvider>& positions) {
+      std::unordered_map<uint64_t, PositionProvider>& positions) {
     ColumnReader::seekToRowGroup(positions);
     valueStream->seek(positions.at(columnId));
     scaleDecoder->seek(positions.at(columnId));
@@ -1629,16 +1487,14 @@ namespace orc {
     bufferEnd = nullptr;
   }
 
-  class Decimal128ColumnReader: public Decimal64ColumnReader {
-  public:
+  class Decimal128ColumnReader : public Decimal64ColumnReader {
+   public:
     Decimal128ColumnReader(const Type& type, StripeStreams& stipe);
     ~Decimal128ColumnReader() override;
 
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char *notNull) override;
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
-  private:
+   private:
     void readInt128(Int128& value, int32_t currentScale) {
       value = 0;
       Int128 work;
@@ -1648,22 +1504,19 @@ namespace orc {
         unsigned char ch = static_cast<unsigned char>(*(buffer++));
         work = ch & 0x7f;
         work <<= offset;
-        value |=  work;
+        value |= work;
         offset += 7;
         if (!(ch & 0x80)) {
           break;
         }
       }
       unZigZagInt128(value);
-      scaleInt128(value, static_cast<uint32_t>(scale),
-                  static_cast<uint32_t>(currentScale));
+      scaleInt128(value, static_cast<uint32_t>(scale), static_cast<uint32_t>(currentScale));
     }
   };
 
-  Decimal128ColumnReader::Decimal128ColumnReader
-                (const Type& type,
-                 StripeStreams& stripe
-                 ): Decimal64ColumnReader(type, stripe) {
+  Decimal128ColumnReader::Decimal128ColumnReader(const Type& type, StripeStreams& stripe)
+      : Decimal64ColumnReader(type, stripe) {
     // PASS
   }
 
@@ -1671,13 +1524,11 @@ namespace orc {
     // PASS
   }
 
-  void Decimal128ColumnReader::next(ColumnVectorBatch& rowBatch,
-                                   uint64_t numValues,
-                                   char *notNull) {
+  void Decimal128ColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                    char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
     notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
-    Decimal128VectorBatch &batch =
-      dynamic_cast<Decimal128VectorBatch&>(rowBatch);
+    Decimal128VectorBatch& batch = dynamic_cast<Decimal128VectorBatch&>(rowBatch);
     Int128* values = batch.values.data();
     // read the next group of scales
     int64_t* scaleBuffer = batch.readScales.data();
@@ -1685,38 +1536,35 @@ namespace orc {
     batch.precision = precision;
     batch.scale = scale;
     if (notNull) {
-      for(size_t i=0; i < numValues; ++i) {
+      for (size_t i = 0; i < numValues; ++i) {
         if (notNull[i]) {
           readInt128(values[i], static_cast<int32_t>(scaleBuffer[i]));
         }
       }
     } else {
-      for(size_t i=0; i < numValues; ++i) {
+      for (size_t i = 0; i < numValues; ++i) {
         readInt128(values[i], static_cast<int32_t>(scaleBuffer[i]));
       }
     }
   }
 
-  class Decimal64ColumnReaderV2: public ColumnReader {
-  protected:
+  class Decimal64ColumnReaderV2 : public ColumnReader {
+   protected:
     std::unique_ptr<RleDecoder> valueDecoder;
     int32_t precision;
     int32_t scale;
 
-  public:
+   public:
     Decimal64ColumnReaderV2(const Type& type, StripeStreams& stripe);
     ~Decimal64ColumnReaderV2() override;
 
     uint64_t skip(uint64_t numValues) override;
 
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char *notNull) override;
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
   };
 
-  Decimal64ColumnReaderV2::Decimal64ColumnReaderV2(const Type& type,
-                                                   StripeStreams& stripe
-                                                   ): ColumnReader(type, stripe) {
+  Decimal64ColumnReaderV2::Decimal64ColumnReaderV2(const Type& type, StripeStreams& stripe)
+      : ColumnReader(type, stripe) {
     scale = static_cast<int32_t>(type.getScale());
     precision = static_cast<int32_t>(type.getPrecision());
     std::unique_ptr<SeekableInputStream> stream =
@@ -1726,7 +1574,7 @@ namespace orc {
       ss << "DATA stream not found in Decimal64V2 column. ColumnId=" << columnId;
       throw ParseError(ss.str());
     }
-    valueDecoder = createRleDecoder(std::move(stream), true, RleVersion_2, memoryPool);
+    valueDecoder = createRleDecoder(std::move(stream), true, RleVersion_2, memoryPool, metrics);
   }
 
   Decimal64ColumnReaderV2::~Decimal64ColumnReaderV2() {
@@ -1739,20 +1587,18 @@ namespace orc {
     return numValues;
   }
 
-  void Decimal64ColumnReaderV2::next(ColumnVectorBatch& rowBatch,
-                                     uint64_t numValues,
-                                     char *notNull) {
+  void Decimal64ColumnReaderV2::next(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                     char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
     notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
-    Decimal64VectorBatch &batch =
-      dynamic_cast<Decimal64VectorBatch&>(rowBatch);
+    Decimal64VectorBatch& batch = dynamic_cast<Decimal64VectorBatch&>(rowBatch);
     valueDecoder->next(batch.values.data(), numValues, notNull);
     batch.precision = precision;
     batch.scale = scale;
   }
 
-  class DecimalHive11ColumnReader: public Decimal64ColumnReader {
-  private:
+  class DecimalHive11ColumnReader : public Decimal64ColumnReader {
+   private:
     bool throwOnOverflow;
     std::ostream* errorStream;
 
@@ -1762,7 +1608,7 @@ namespace orc {
     bool readInt128(Int128& value, int32_t currentScale) {
       // -/+ 99999999999999999999999999999999999999
       static const Int128 MIN_VALUE(-0x4b3b4ca85a86c47b, 0xf675ddc000000001);
-      static const Int128 MAX_VALUE( 0x4b3b4ca85a86c47a, 0x098a223fffffffff);
+      static const Int128 MAX_VALUE(0x4b3b4ca85a86c47a, 0x098a223fffffffff);
 
       value = 0;
       Int128 work;
@@ -1778,7 +1624,7 @@ namespace orc {
           result = false;
         }
         work <<= offset;
-        value |=  work;
+        value |= work;
         offset += 7;
         if (!(ch & 0x80)) {
           break;
@@ -1789,24 +1635,19 @@ namespace orc {
         return result;
       }
       unZigZagInt128(value);
-      scaleInt128(value, static_cast<uint32_t>(scale),
-                  static_cast<uint32_t>(currentScale));
+      scaleInt128(value, static_cast<uint32_t>(scale), static_cast<uint32_t>(currentScale));
       return value >= MIN_VALUE && value <= MAX_VALUE;
     }
 
-  public:
+   public:
     DecimalHive11ColumnReader(const Type& type, StripeStreams& stipe);
     ~DecimalHive11ColumnReader() override;
 
-    void next(ColumnVectorBatch& rowBatch,
-              uint64_t numValues,
-              char *notNull) override;
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
   };
 
-  DecimalHive11ColumnReader::DecimalHive11ColumnReader
-                    (const Type& type,
-                     StripeStreams& stripe
-                     ): Decimal64ColumnReader(type, stripe) {
+  DecimalHive11ColumnReader::DecimalHive11ColumnReader(const Type& type, StripeStreams& stripe)
+      : Decimal64ColumnReader(type, stripe) {
     scale = stripe.getForcedScaleOnHive11Decimal();
     throwOnOverflow = stripe.getThrowOnHive11DecimalOverflow();
     errorStream = stripe.getErrorStream();
@@ -1816,13 +1657,11 @@ namespace orc {
     // PASS
   }
 
-  void DecimalHive11ColumnReader::next(ColumnVectorBatch& rowBatch,
-                                       uint64_t numValues,
-                                       char *notNull) {
+  void DecimalHive11ColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                       char* notNull) {
     ColumnReader::next(rowBatch, numValues, notNull);
     notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
-    Decimal128VectorBatch &batch =
-      dynamic_cast<Decimal128VectorBatch&>(rowBatch);
+    Decimal128VectorBatch& batch = dynamic_cast<Decimal128VectorBatch&>(rowBatch);
     Int128* values = batch.values.data();
     // read the next group of scales
     int64_t* scaleBuffer = batch.readScales.data();
@@ -1832,10 +1671,9 @@ namespace orc {
     batch.precision = precision;
     batch.scale = scale;
     if (notNull) {
-      for(size_t i=0; i < numValues; ++i) {
+      for (size_t i = 0; i < numValues; ++i) {
         if (notNull[i]) {
-          if (!readInt128(values[i],
-                          static_cast<int32_t>(scaleBuffer[i]))) {
+          if (!readInt128(values[i], static_cast<int32_t>(scaleBuffer[i]))) {
             if (throwOnOverflow) {
               throw ParseError("Hive 0.11 decimal was more than 38 digits.");
             } else {
@@ -1848,9 +1686,8 @@ namespace orc {
         }
       }
     } else {
-      for(size_t i=0; i < numValues; ++i) {
-        if (!readInt128(values[i],
-                        static_cast<int32_t>(scaleBuffer[i]))) {
+      for (size_t i = 0; i < numValues; ++i) {
+        if (!readInt128(values[i], static_cast<int32_t>(scaleBuffer[i]))) {
           if (throwOnOverflow) {
             throw ParseError("Hive 0.11 decimal was more than 38 digits.");
           } else {
@@ -1866,109 +1703,133 @@ namespace orc {
   }
 
   static bool isLittleEndian() {
-    static union { uint32_t i; char c[4]; } num = { 0x01020304 };
+    static union {
+      uint32_t i;
+      char c[4];
+    } num = {0x01020304};
     return num.c[0] == 4;
   }
 
   /**
    * Create a reader for the given stripe.
    */
-  std::unique_ptr<ColumnReader> buildReader(const Type& type,
-                                            StripeStreams& stripe) {
-    switch (static_cast<int64_t>(type.getKind())) {
-    case DATE:
-    case INT:
-    case LONG:
-    case SHORT:
-      return std::unique_ptr<ColumnReader>(
-          new IntegerColumnReader(type, stripe));
-    case BINARY:
-    case CHAR:
-    case STRING:
-    case VARCHAR:
-      switch (static_cast<int64_t>(stripe.getEncoding(type.getColumnId()).kind())){
-      case proto::ColumnEncoding_Kind_DICTIONARY:
-      case proto::ColumnEncoding_Kind_DICTIONARY_V2:
-        return std::unique_ptr<ColumnReader>(
-            new StringDictionaryColumnReader(type, stripe));
-      case proto::ColumnEncoding_Kind_DIRECT:
-      case proto::ColumnEncoding_Kind_DIRECT_V2:
-        return std::unique_ptr<ColumnReader>(
-            new StringDirectColumnReader(type, stripe));
-      default:
-        throw NotImplementedYet("buildReader unhandled string encoding");
-      }
+  std::unique_ptr<ColumnReader> buildReader(const Type& type, StripeStreams& stripe,
+                                            bool useTightNumericVector,
+                                            bool throwOnSchemaEvolutionOverflow,
+                                            bool convertToReadType) {
+    if (convertToReadType && stripe.getSchemaEvolution() &&
+        stripe.getSchemaEvolution()->needConvert(type)) {
+      return buildConvertReader(type, stripe, useTightNumericVector,
+                                throwOnSchemaEvolutionOverflow);
+    }
 
-    case BOOLEAN:
-      return std::unique_ptr<ColumnReader>(
-          new BooleanColumnReader(type, stripe));
+    switch (static_cast<int64_t>(type.getKind())) {
+      case SHORT:
+        if (useTightNumericVector) {
+          return std::make_unique<IntegerColumnReader<ShortVectorBatch>>(type, stripe);
+        }
+        return std::make_unique<IntegerColumnReader<LongVectorBatch>>(type, stripe);
+      case INT:
+        if (useTightNumericVector) {
+          return std::make_unique<IntegerColumnReader<IntVectorBatch>>(type, stripe);
+        }
+        return std::make_unique<IntegerColumnReader<LongVectorBatch>>(type, stripe);
+      case LONG:
+      case DATE:
+        return std::make_unique<IntegerColumnReader<LongVectorBatch>>(type, stripe);
+      case BINARY:
+      case CHAR:
+      case STRING:
+      case VARCHAR:
+        switch (static_cast<int64_t>(stripe.getEncoding(type.getColumnId()).kind())) {
+          case proto::ColumnEncoding_Kind_DICTIONARY:
+          case proto::ColumnEncoding_Kind_DICTIONARY_V2:
+            return std::make_unique<StringDictionaryColumnReader>(type, stripe);
+          case proto::ColumnEncoding_Kind_DIRECT:
+          case proto::ColumnEncoding_Kind_DIRECT_V2:
+            return std::make_unique<StringDirectColumnReader>(type, stripe);
+          default:
+            throw NotImplementedYet("buildReader unhandled string encoding");
+        }
 
-    case BYTE:
-      return std::unique_ptr<ColumnReader>(
-          new ByteColumnReader(type, stripe));
+      case BOOLEAN: {
+        if (useTightNumericVector) {
+          return std::make_unique<BooleanColumnReader<ByteVectorBatch>>(type, stripe);
+        } else {
+          return std::make_unique<BooleanColumnReader<LongVectorBatch>>(type, stripe);
+        }
+      }
 
-    case LIST:
-      return std::unique_ptr<ColumnReader>(
-          new ListColumnReader(type, stripe));
+      case BYTE:
+        if (useTightNumericVector) {
+          return std::make_unique<ByteColumnReader<ByteVectorBatch>>(type, stripe);
+        }
+        return std::make_unique<ByteColumnReader<LongVectorBatch>>(type, stripe);
 
-    case MAP:
-      return std::unique_ptr<ColumnReader>(
-          new MapColumnReader(type, stripe));
+      case LIST:
+        return std::make_unique<ListColumnReader>(type, stripe, useTightNumericVector,
+                                                  throwOnSchemaEvolutionOverflow);
 
-    case UNION:
-      return std::unique_ptr<ColumnReader>(
-          new UnionColumnReader(type, stripe));
+      case MAP:
+        return std::make_unique<MapColumnReader>(type, stripe, useTightNumericVector,
+                                                 throwOnSchemaEvolutionOverflow);
 
-    case STRUCT:
-      return std::unique_ptr<ColumnReader>(
-          new StructColumnReader(type, stripe));
+      case UNION:
+        return std::make_unique<UnionColumnReader>(type, stripe, useTightNumericVector,
+                                                   throwOnSchemaEvolutionOverflow);
 
-    case FLOAT:
-      if (isLittleEndian()) {
-        return std::unique_ptr<ColumnReader>(
-            new DoubleColumnReader<FLOAT, true>(type, stripe));
-      }
-      return std::unique_ptr<ColumnReader>(
-          new DoubleColumnReader<FLOAT, false>(type, stripe));
+      case STRUCT:
+        return std::make_unique<StructColumnReader>(type, stripe, useTightNumericVector,
+                                                    throwOnSchemaEvolutionOverflow);
 
-    case DOUBLE:
-      if (isLittleEndian()) {
-        return std::unique_ptr<ColumnReader>(
-            new DoubleColumnReader<DOUBLE, true>(type, stripe));
-      }
-      return std::unique_ptr<ColumnReader>(
-          new DoubleColumnReader<DOUBLE, false>(type, stripe));
-
-    case TIMESTAMP:
-      return std::unique_ptr<ColumnReader>
-        (new TimestampColumnReader(type, stripe, false));
-
-    case TIMESTAMP_INSTANT:
-      return std::unique_ptr<ColumnReader>
-        (new TimestampColumnReader(type, stripe, true));
-
-    case DECIMAL:
-      // is this a Hive 0.11 or 0.12 file?
-      if (type.getPrecision() == 0) {
-        return std::unique_ptr<ColumnReader>
-          (new DecimalHive11ColumnReader(type, stripe));
+      case FLOAT: {
+        if (useTightNumericVector) {
+          if (isLittleEndian()) {
+            return std::make_unique<DoubleColumnReader<FLOAT, true, float, FloatVectorBatch>>(
+                type, stripe);
+          }
+          return std::make_unique<DoubleColumnReader<FLOAT, false, float, FloatVectorBatch>>(
+              type, stripe);
+        }
+        if (isLittleEndian()) {
+          return std::make_unique<DoubleColumnReader<FLOAT, true, double, DoubleVectorBatch>>(
+              type, stripe);
+        }
+        return std::make_unique<DoubleColumnReader<FLOAT, false, double, DoubleVectorBatch>>(
+            type, stripe);
       }
-      // can we represent the values using int64_t?
-      if (type.getPrecision() <= Decimal64ColumnReader::MAX_PRECISION_64) {
-        if (stripe.isDecimalAsLong()) {
-          return std::unique_ptr<ColumnReader>
-            (new Decimal64ColumnReaderV2(type, stripe));
+      case DOUBLE: {
+        if (isLittleEndian()) {
+          return std::make_unique<DoubleColumnReader<DOUBLE, true, double, DoubleVectorBatch>>(
+              type, stripe);
         }
-        return std::unique_ptr<ColumnReader>
-          (new Decimal64ColumnReader(type, stripe));
+        return std::make_unique<DoubleColumnReader<DOUBLE, false, double, DoubleVectorBatch>>(
+            type, stripe);
       }
-      // otherwise we use the Int128 implementation
-      return std::unique_ptr<ColumnReader>
-        (new Decimal128ColumnReader(type, stripe));
+      case TIMESTAMP:
+        return std::make_unique<TimestampColumnReader>(type, stripe, false);
+
+      case TIMESTAMP_INSTANT:
+        return std::make_unique<TimestampColumnReader>(type, stripe, true);
 
-    default:
-      throw NotImplementedYet("buildReader unhandled type");
+      case DECIMAL:
+        // is this a Hive 0.11 or 0.12 file?
+        if (type.getPrecision() == 0) {
+          return std::make_unique<DecimalHive11ColumnReader>(type, stripe);
+        }
+        // can we represent the values using int64_t?
+        if (type.getPrecision() <= Decimal64ColumnReader::MAX_PRECISION_64) {
+          if (stripe.isDecimalAsLong()) {
+            return std::make_unique<Decimal64ColumnReaderV2>(type, stripe);
+          }
+          return std::make_unique<Decimal64ColumnReader>(type, stripe);
+        }
+        // otherwise we use the Int128 implementation
+        return std::make_unique<Decimal128ColumnReader>(type, stripe);
+
+      default:
+        throw NotImplementedYet("buildReader unhandled type");
     }
   }
 
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/ColumnReader.hh b/contrib/libs/apache/orc/c++/src/ColumnReader.hh
index 80b59de2c1..f0f3fe1b52 100644
--- a/contrib/libs/apache/orc/c++/src/ColumnReader.hh
+++ b/contrib/libs/apache/orc/c++/src/ColumnReader.hh
@@ -30,8 +30,10 @@
 
 namespace orc {
 
+  class SchemaEvolution;
+
   class StripeStreams {
-  public:
+   public:
     virtual ~StripeStreams();
 
     /**
@@ -53,10 +55,9 @@ namespace orc {
      * @param shouldStream should the reading page the stream in
      * @return the new stream
      */
-    virtual std::unique_ptr<SeekableInputStream>
-                    getStream(uint64_t columnId,
-                              proto::Stream_Kind kind,
-                              bool shouldStream) const = 0;
+    virtual std::unique_ptr<SeekableInputStream> getStream(uint64_t columnId,
+                                                           proto::Stream_Kind kind,
+                                                           bool shouldStream) const = 0;
 
     /**
      * Get the memory pool for this reader.
@@ -64,6 +65,11 @@ namespace orc {
     virtual MemoryPool& getMemoryPool() const = 0;
 
     /**
+     * Get the reader metrics for this reader.
+     */
+    virtual ReaderMetrics* getReaderMetrics() const = 0;
+
+    /**
      * Get the writer's timezone, so that we can convert their dates correctly.
      */
     virtual const Timezone& getWriterTimezone() const = 0;
@@ -97,18 +103,24 @@ namespace orc {
      * encoded in RLE.
      */
     virtual bool isDecimalAsLong() const = 0;
+
+    /**
+     * @return get schema evolution utility object
+     */
+    virtual const SchemaEvolution* getSchemaEvolution() const = 0;
   };
 
   /**
    * The interface for reading ORC data types.
    */
   class ColumnReader {
-  protected:
+   protected:
     std::unique_ptr<ByteRleDecoder> notNullDecoder;
     uint64_t columnId;
     MemoryPool& memoryPool;
+    ReaderMetrics* metrics;
 
-  public:
+   public:
     ColumnReader(const Type& type, StripeStreams& stipe);
 
     virtual ~ColumnReader();
@@ -128,9 +140,7 @@ namespace orc {
      *           a mask (with at least numValues bytes) for which values to
      *           set.
      */
-    virtual void next(ColumnVectorBatch& rowBatch,
-                      uint64_t numValues,
-                      char* notNull);
+    virtual void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull);
 
     /**
      * Read the next group of values without decoding
@@ -140,10 +150,7 @@ namespace orc {
      *           a mask (with at least numValues bytes) for which values to
      *           set.
      */
-    virtual void nextEncoded(ColumnVectorBatch& rowBatch,
-                      uint64_t numValues,
-                      char* notNull)
-    {
+    virtual void nextEncoded(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) {
       rowBatch.isEncoded = false;
       next(rowBatch, numValues, notNull);
     }
@@ -152,16 +159,16 @@ namespace orc {
      * Seek to beginning of a row group in the current stripe
      * @param positions a list of PositionProviders storing the positions
      */
-    virtual void seekToRowGroup(
-      std::unordered_map<uint64_t, PositionProvider>& positions);
-
+    virtual void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions);
   };
 
   /**
    * Create a reader for the given stripe.
    */
-  std::unique_ptr<ColumnReader> buildReader(const Type& type,
-                                            StripeStreams& stripe);
-}
+  std::unique_ptr<ColumnReader> buildReader(const Type& type, StripeStreams& stripe,
+                                            bool useTightNumericVector = false,
+                                            bool throwOnSchemaEvolutionOverflow = false,
+                                            bool convertToReadType = true);
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/src/ColumnWriter.cc b/contrib/libs/apache/orc/c++/src/ColumnWriter.cc
index 32b68af349..f24be1f0b2 100644
--- a/contrib/libs/apache/orc/c++/src/ColumnWriter.cc
+++ b/contrib/libs/apache/orc/c++/src/ColumnWriter.cc
@@ -27,55 +27,43 @@
 
 namespace orc {
   StreamsFactory::~StreamsFactory() {
-    //PASS
+    // PASS
   }
 
   class StreamsFactoryImpl : public StreamsFactory {
-  public:
-    StreamsFactoryImpl(
-                       const WriterOptions& writerOptions,
-                       OutputStream* outputStream) :
-                       options(writerOptions),
-                       outStream(outputStream) {
-                       }
-
-    virtual std::unique_ptr<BufferedOutputStream>
-                    createStream(proto::Stream_Kind kind) const override;
-  private:
+   public:
+    StreamsFactoryImpl(const WriterOptions& writerOptions, OutputStream* outputStream)
+        : options(writerOptions), outStream(outputStream) {}
+
+    virtual std::unique_ptr<BufferedOutputStream> createStream(
+        proto::Stream_Kind kind) const override;
+
+   private:
     const WriterOptions& options;
     OutputStream* outStream;
   };
 
-  std::unique_ptr<BufferedOutputStream> StreamsFactoryImpl::createStream(
-                                                    proto::Stream_Kind) const {
+  std::unique_ptr<BufferedOutputStream> StreamsFactoryImpl::createStream(proto::Stream_Kind) const {
     // In the future, we can decide compression strategy and modifier
     // based on stream kind. But for now we just use the setting from
     // WriterOption
-    return createCompressor(
-                            options.getCompression(),
-                            outStream,
-                            options.getCompressionStrategy(),
+    return createCompressor(options.getCompression(), outStream, options.getCompressionStrategy(),
                             // BufferedOutputStream initial capacity
-                            1 * 1024 * 1024,
-                            options.getCompressionBlockSize(),
-                            *options.getMemoryPool());
+                            options.getOutputBufferCapacity(), options.getCompressionBlockSize(),
+                            *options.getMemoryPool(), options.getWriterMetrics());
   }
 
-  std::unique_ptr<StreamsFactory> createStreamsFactory(
-                                        const WriterOptions& options,
-                                        OutputStream* outStream) {
-    return std::unique_ptr<StreamsFactory>(
-                                   new StreamsFactoryImpl(options, outStream));
+  std::unique_ptr<StreamsFactory> createStreamsFactory(const WriterOptions& options,
+                                                       OutputStream* outStream) {
+    return std::make_unique<StreamsFactoryImpl>(options, outStream);
   }
 
   RowIndexPositionRecorder::~RowIndexPositionRecorder() {
     // PASS
   }
 
-  proto::ColumnEncoding_Kind RleVersionMapper(RleVersion rleVersion)
-  {
-    switch (rleVersion)
-    {
+  proto::ColumnEncoding_Kind RleVersionMapper(RleVersion rleVersion) {
+    switch (rleVersion) {
       case RleVersion_1:
         return proto::ColumnEncoding_Kind_DIRECT;
       case RleVersion_2:
@@ -85,24 +73,21 @@ namespace orc {
     }
   }
 
-  ColumnWriter::ColumnWriter(
-                             const Type& type,
-                             const StreamsFactory& factory,
-                             const WriterOptions& options) :
-                                columnId(type.getColumnId()),
-                                colIndexStatistics(),
-                                colStripeStatistics(),
-                                colFileStatistics(),
-                                enableIndex(options.getEnableIndex()),
-                                rowIndex(),
-                                rowIndexEntry(),
-                                rowIndexPosition(),
-                                enableBloomFilter(false),
-                                memPool(*options.getMemoryPool()),
-                                indexStream(),
-                                bloomFilterStream(),
-                                hasNullValue(false) {
-
+  ColumnWriter::ColumnWriter(const Type& type, const StreamsFactory& factory,
+                             const WriterOptions& options)
+      : columnId(type.getColumnId()),
+        colIndexStatistics(),
+        colStripeStatistics(),
+        colFileStatistics(),
+        enableIndex(options.getEnableIndex()),
+        rowIndex(),
+        rowIndexEntry(),
+        rowIndexPosition(),
+        enableBloomFilter(false),
+        memPool(*options.getMemoryPool()),
+        indexStream(),
+        bloomFilterStream(),
+        hasNullValue(false) {
     std::unique_ptr<BufferedOutputStream> presentStream =
         factory.createStream(proto::Stream_Kind_PRESENT);
     notNullEncoder = createBooleanRleEncoder(std::move(presentStream));
@@ -112,20 +97,17 @@ namespace orc {
     colFileStatistics = createColumnStatistics(type);
 
     if (enableIndex) {
-      rowIndex = std::unique_ptr<proto::RowIndex>(new proto::RowIndex());
-      rowIndexEntry =
-        std::unique_ptr<proto::RowIndexEntry>(new proto::RowIndexEntry());
-      rowIndexPosition = std::unique_ptr<RowIndexPositionRecorder>(
-                     new RowIndexPositionRecorder(*rowIndexEntry));
-      indexStream =
-        factory.createStream(proto::Stream_Kind_ROW_INDEX);
+      rowIndex = std::make_unique<proto::RowIndex>();
+      rowIndexEntry = std::make_unique<proto::RowIndexEntry>();
+      rowIndexPosition = std::make_unique<RowIndexPositionRecorder>(*rowIndexEntry);
+      indexStream = factory.createStream(proto::Stream_Kind_ROW_INDEX);
 
       // BloomFilters for non-UTF8 strings and non-UTC timestamps are not supported
-      if (options.isColumnUseBloomFilter(columnId)
-          && options.getBloomFilterVersion() == BloomFilterVersion::UTF8) {
+      if (options.isColumnUseBloomFilter(columnId) &&
+          options.getBloomFilterVersion() == BloomFilterVersion::UTF8) {
         enableBloomFilter = true;
-        bloomFilter.reset(new BloomFilterImpl(
-          options.getRowIndexStride(), options.getBloomFilterFPP()));
+        bloomFilter.reset(
+            new BloomFilterImpl(options.getRowIndexStride(), options.getBloomFilterFPP()));
         bloomFilterIndex.reset(new proto::BloomFilterIndex());
         bloomFilterStream = factory.createStream(proto::Stream_Kind_BLOOM_FILTER_UTF8);
       }
@@ -136,9 +118,7 @@ namespace orc {
     // PASS
   }
 
-  void ColumnWriter::add(ColumnVectorBatch& batch,
-                         uint64_t offset,
-                         uint64_t numValues,
+  void ColumnWriter::add(ColumnVectorBatch& batch, uint64_t offset, uint64_t numValues,
                          const char* incomingMask) {
     const char* notNull = batch.notNull.data() + offset;
     notNullEncoder->add(notNull, numValues, incomingMask);
@@ -167,8 +147,7 @@ namespace orc {
     return notNullEncoder->getBufferSize();
   }
 
-  void ColumnWriter::getStripeStatistics(
-    std::vector<proto::ColumnStatistics>& stats) const {
+  void ColumnWriter::getStripeStatistics(std::vector<proto::ColumnStatistics>& stats) const {
     getProtoBufStatistics(stats, colStripeStatistics.get());
   }
 
@@ -182,13 +161,12 @@ namespace orc {
     colIndexStatistics->reset();
   }
 
-  void ColumnWriter::getFileStatistics(
-    std::vector<proto::ColumnStatistics>& stats) const {
+  void ColumnWriter::getFileStatistics(std::vector<proto::ColumnStatistics>& stats) const {
     getProtoBufStatistics(stats, colFileStatistics.get());
   }
 
   void ColumnWriter::createRowIndexEntry() {
-    proto::ColumnStatistics *indexStats = rowIndexEntry->mutable_statistics();
+    proto::ColumnStatistics* indexStats = rowIndexEntry->mutable_statistics();
     colIndexStatistics->toProtoBuf(*indexStats);
 
     *rowIndex->add_entry() = *rowIndexEntry;
@@ -206,12 +184,12 @@ namespace orc {
 
   void ColumnWriter::addBloomFilterEntry() {
     if (enableBloomFilter) {
-      BloomFilterUTF8Utils::serialize(*bloomFilter, *bloomFilterIndex->add_bloomfilter());
+      BloomFilterUTF8Utils::serialize(*bloomFilter, *bloomFilterIndex->add_bloom_filter());
       bloomFilter->reset();
     }
   }
 
-  void ColumnWriter::writeIndex(std::vector<proto::Stream> &streams) const {
+  void ColumnWriter::writeIndex(std::vector<proto::Stream>& streams) const {
     if (!hasNullValue) {
       // remove positions of present stream
       int presentCount = indexStream->isCompressed() ? 4 : 3;
@@ -266,7 +244,7 @@ namespace orc {
 
     if (enableBloomFilter) {
       bloomFilter->reset();
-      bloomFilterIndex->clear_bloomfilter();
+      bloomFilterIndex->clear_bloom_filter();
     }
   }
 
@@ -275,28 +253,21 @@ namespace orc {
   }
 
   class StructColumnWriter : public ColumnWriter {
-  public:
-    StructColumnWriter(
-                       const Type& type,
-                       const StreamsFactory& factory,
+   public:
+    StructColumnWriter(const Type& type, const StreamsFactory& factory,
                        const WriterOptions& options);
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
     virtual void flush(std::vector<proto::Stream>& streams) override;
 
     virtual uint64_t getEstimatedSize() const override;
-    virtual void getColumnEncoding(
-      std::vector<proto::ColumnEncoding>& encodings) const override;
+    virtual void getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const override;
 
-    virtual void getStripeStatistics(
-      std::vector<proto::ColumnStatistics>& stats) const override;
+    virtual void getStripeStatistics(std::vector<proto::ColumnStatistics>& stats) const override;
 
-    virtual void getFileStatistics(
-      std::vector<proto::ColumnStatistics>& stats) const override;
+    virtual void getFileStatistics(std::vector<proto::ColumnStatistics>& stats) const override;
 
     virtual void mergeStripeStatsIntoFileStats() override;
 
@@ -304,23 +275,20 @@ namespace orc {
 
     virtual void createRowIndexEntry() override;
 
-    virtual void writeIndex(
-      std::vector<proto::Stream> &streams) const override;
+    virtual void writeIndex(std::vector<proto::Stream>& streams) const override;
 
     virtual void writeDictionary() override;
 
     virtual void reset() override;
 
-  private:
+   private:
     std::vector<std::unique_ptr<ColumnWriter>> children;
   };
 
-  StructColumnWriter::StructColumnWriter(
-                                         const Type& type,
-                                         const StreamsFactory& factory,
-                                         const WriterOptions& options) :
-                                         ColumnWriter(type, factory, options) {
-    for(unsigned int i = 0; i < type.getSubtypeCount(); ++i) {
+  StructColumnWriter::StructColumnWriter(const Type& type, const StreamsFactory& factory,
+                                         const WriterOptions& options)
+      : ColumnWriter(type, factory, options) {
+    for (unsigned int i = 0; i < type.getSubtypeCount(); ++i) {
       const Type& child = *type.getSubtype(i);
       children.push_back(buildWriter(child, factory, options));
     }
@@ -330,20 +298,15 @@ namespace orc {
     }
   }
 
-  void StructColumnWriter::add(
-                               ColumnVectorBatch& rowBatch,
-                               uint64_t offset,
-                               uint64_t numValues,
+  void StructColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                                const char* incomingMask) {
-    const StructVectorBatch* structBatch =
-      dynamic_cast<const StructVectorBatch *>(&rowBatch);
+    const StructVectorBatch* structBatch = dynamic_cast<const StructVectorBatch*>(&rowBatch);
     if (structBatch == nullptr) {
       throw InvalidArgument("Failed to cast to StructVectorBatch");
     }
 
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
-    const char* notNull = structBatch->hasNulls ?
-                          structBatch->notNull.data() + offset : nullptr;
+    const char* notNull = structBatch->hasNulls ? structBatch->notNull.data() + offset : nullptr;
     for (uint32_t i = 0; i < children.size(); ++i) {
       children[i]->add(*structBatch->fields[i], offset, numValues, notNull);
     }
@@ -372,8 +335,7 @@ namespace orc {
     }
   }
 
-  void StructColumnWriter::writeIndex(
-                      std::vector<proto::Stream> &streams) const {
+  void StructColumnWriter::writeIndex(std::vector<proto::Stream>& streams) const {
     ColumnWriter::writeIndex(streams);
     for (uint32_t i = 0; i < children.size(); ++i) {
       children[i]->writeIndex(streams);
@@ -388,19 +350,17 @@ namespace orc {
     return size;
   }
 
-  void StructColumnWriter::getColumnEncoding(
-                      std::vector<proto::ColumnEncoding>& encodings) const {
+  void StructColumnWriter::getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const {
     proto::ColumnEncoding encoding;
     encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
-    encoding.set_dictionarysize(0);
+    encoding.set_dictionary_size(0);
     encodings.push_back(encoding);
     for (uint32_t i = 0; i < children.size(); ++i) {
       children[i]->getColumnEncoding(encodings);
     }
   }
 
-  void StructColumnWriter::getStripeStatistics(
-    std::vector<proto::ColumnStatistics>& stats) const {
+  void StructColumnWriter::getStripeStatistics(std::vector<proto::ColumnStatistics>& stats) const {
     ColumnWriter::getStripeStatistics(stats);
 
     for (uint32_t i = 0; i < children.size(); ++i) {
@@ -416,8 +376,7 @@ namespace orc {
     }
   }
 
-  void StructColumnWriter::getFileStatistics(
-    std::vector<proto::ColumnStatistics>& stats) const {
+  void StructColumnWriter::getFileStatistics(std::vector<proto::ColumnStatistics>& stats) const {
     ColumnWriter::getFileStatistics(stats);
 
     for (uint32_t i = 0; i < children.size(); ++i) {
@@ -425,7 +384,7 @@ namespace orc {
     }
   }
 
-  void StructColumnWriter::mergeRowGroupStatsIntoStripeStats()  {
+  void StructColumnWriter::mergeRowGroupStatsIntoStripeStats() {
     ColumnWriter::mergeRowGroupStatsIntoStripeStats();
 
     for (uint32_t i = 0; i < children.size(); ++i) {
@@ -455,47 +414,38 @@ namespace orc {
     }
   }
 
+  template <typename BatchType>
   class IntegerColumnWriter : public ColumnWriter {
-  public:
-    IntegerColumnWriter(
-                        const Type& type,
-                        const StreamsFactory& factory,
+   public:
+    IntegerColumnWriter(const Type& type, const StreamsFactory& factory,
                         const WriterOptions& options);
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
     virtual void flush(std::vector<proto::Stream>& streams) override;
 
     virtual uint64_t getEstimatedSize() const override;
 
-    virtual void getColumnEncoding(
-              std::vector<proto::ColumnEncoding>& encodings) const override;
+    virtual void getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const override;
 
     virtual void recordPosition() const override;
 
-  protected:
+   protected:
     std::unique_ptr<RleEncoder> rleEncoder;
 
-  private:
+   private:
     RleVersion rleVersion;
   };
 
-  IntegerColumnWriter::IntegerColumnWriter(
-                           const Type& type,
-                           const StreamsFactory& factory,
-                           const WriterOptions& options) :
-                             ColumnWriter(type, factory, options),
-                             rleVersion(options.getRleVersion()) {
+  template <typename BatchType>
+  IntegerColumnWriter<BatchType>::IntegerColumnWriter(const Type& type,
+                                                      const StreamsFactory& factory,
+                                                      const WriterOptions& options)
+      : ColumnWriter(type, factory, options), rleVersion(options.getRleVersion()) {
     std::unique_ptr<BufferedOutputStream> dataStream =
-      factory.createStream(proto::Stream_Kind_DATA);
-    rleEncoder = createRleEncoder(
-                                  std::move(dataStream),
-                                  true,
-                                  rleVersion,
-                                  memPool,
+        factory.createStream(proto::Stream_Kind_DATA);
+    rleEncoder = createRleEncoder(std::move(dataStream), true, rleVersion, memPool,
                                   options.getAlignedBitpacking());
 
     if (enableIndex) {
@@ -503,15 +453,12 @@ namespace orc {
     }
   }
 
-  void IntegerColumnWriter::add(
-                                ColumnVectorBatch& rowBatch,
-                                uint64_t offset,
-                                uint64_t numValues,
-                                const char* incomingMask) {
-    const LongVectorBatch* longBatch =
-      dynamic_cast<const LongVectorBatch*>(&rowBatch);
-    if (longBatch == nullptr) {
-      throw InvalidArgument("Failed to cast to LongVectorBatch");
+  template <typename BatchType>
+  void IntegerColumnWriter<BatchType>::add(ColumnVectorBatch& rowBatch, uint64_t offset,
+                                           uint64_t numValues, const char* incomingMask) {
+    const BatchType* intBatch = dynamic_cast<const BatchType*>(&rowBatch);
+    if (intBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to IntegerVectorBatch");
     }
     IntegerColumnStatisticsImpl* intStats =
         dynamic_cast<IntegerColumnStatisticsImpl*>(colIndexStatistics.get());
@@ -521,9 +468,8 @@ namespace orc {
 
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
 
-    const int64_t* data = longBatch->data.data() + offset;
-    const char* notNull = longBatch->hasNulls ?
-                          longBatch->notNull.data() + offset : nullptr;
+    const auto* data = intBatch->data.data() + offset;
+    const char* notNull = intBatch->hasNulls ? intBatch->notNull.data() + offset : nullptr;
 
     rleEncoder->add(data, numValues, notNull);
 
@@ -533,9 +479,9 @@ namespace orc {
       if (notNull == nullptr || notNull[i]) {
         ++count;
         if (enableBloomFilter) {
-          bloomFilter->addLong(data[i]);
+          bloomFilter->addLong(static_cast<int64_t>(data[i]));
         }
-        intStats->update(data[i], 1);
+        intStats->update(static_cast<int64_t>(data[i]), 1);
       }
     }
     intStats->increase(count);
@@ -544,7 +490,8 @@ namespace orc {
     }
   }
 
-  void IntegerColumnWriter::flush(std::vector<proto::Stream>& streams) {
+  template <typename BatchType>
+  void IntegerColumnWriter<BatchType>::flush(std::vector<proto::Stream>& streams) {
     ColumnWriter::flush(streams);
 
     proto::Stream stream;
@@ -554,59 +501,57 @@ namespace orc {
     streams.push_back(stream);
   }
 
-  uint64_t IntegerColumnWriter::getEstimatedSize() const {
+  template <typename BatchType>
+  uint64_t IntegerColumnWriter<BatchType>::getEstimatedSize() const {
     uint64_t size = ColumnWriter::getEstimatedSize();
     size += rleEncoder->getBufferSize();
     return size;
   }
 
-  void IntegerColumnWriter::getColumnEncoding(
-                       std::vector<proto::ColumnEncoding>& encodings) const {
+  template <typename BatchType>
+  void IntegerColumnWriter<BatchType>::getColumnEncoding(
+      std::vector<proto::ColumnEncoding>& encodings) const {
     proto::ColumnEncoding encoding;
     encoding.set_kind(RleVersionMapper(rleVersion));
-    encoding.set_dictionarysize(0);
+    encoding.set_dictionary_size(0);
     if (enableBloomFilter) {
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+      encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
     }
     encodings.push_back(encoding);
   }
 
-  void IntegerColumnWriter::recordPosition() const {
+  template <typename BatchType>
+  void IntegerColumnWriter<BatchType>::recordPosition() const {
     ColumnWriter::recordPosition();
     rleEncoder->recordPosition(rowIndexPosition.get());
   }
 
+  template <typename BatchType>
   class ByteColumnWriter : public ColumnWriter {
-  public:
-    ByteColumnWriter(const Type& type,
-                     const StreamsFactory& factory,
-                     const WriterOptions& options);
-
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+   public:
+    ByteColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options);
+
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
     virtual void flush(std::vector<proto::Stream>& streams) override;
 
     virtual uint64_t getEstimatedSize() const override;
 
-    virtual void getColumnEncoding(
-            std::vector<proto::ColumnEncoding>& encodings) const override;
+    virtual void getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const override;
 
     virtual void recordPosition() const override;
 
-  private:
+   private:
     std::unique_ptr<ByteRleEncoder> byteRleEncoder;
   };
 
-  ByteColumnWriter::ByteColumnWriter(
-                        const Type& type,
-                        const StreamsFactory& factory,
-                        const WriterOptions& options) :
-                             ColumnWriter(type, factory, options) {
+  template <typename BatchType>
+  ByteColumnWriter<BatchType>::ByteColumnWriter(const Type& type, const StreamsFactory& factory,
+                                                const WriterOptions& options)
+      : ColumnWriter(type, factory, options) {
     std::unique_ptr<BufferedOutputStream> dataStream =
-                                  factory.createStream(proto::Stream_Kind_DATA);
+        factory.createStream(proto::Stream_Kind_DATA);
     byteRleEncoder = createByteRleEncoder(std::move(dataStream));
 
     if (enableIndex) {
@@ -614,13 +559,12 @@ namespace orc {
     }
   }
 
-  void ByteColumnWriter::add(ColumnVectorBatch& rowBatch,
-                             uint64_t offset,
-                             uint64_t numValues,
-                             const char* incomingMask) {
-    LongVectorBatch* byteBatch = dynamic_cast<LongVectorBatch*>(&rowBatch);
+  template <typename BatchType>
+  void ByteColumnWriter<BatchType>::add(ColumnVectorBatch& rowBatch, uint64_t offset,
+                                        uint64_t numValues, const char* incomingMask) {
+    BatchType* byteBatch = dynamic_cast<BatchType*>(&rowBatch);
     if (byteBatch == nullptr) {
-      throw InvalidArgument("Failed to cast to LongVectorBatch");
+      throw InvalidArgument("Failed to cast to IntegerVectorBatch");
     }
     IntegerColumnStatisticsImpl* intStats =
         dynamic_cast<IntegerColumnStatisticsImpl*>(colIndexStatistics.get());
@@ -630,9 +574,8 @@ namespace orc {
 
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
 
-    int64_t* data = byteBatch->data.data() + offset;
-    const char* notNull = byteBatch->hasNulls ?
-                          byteBatch->notNull.data() + offset : nullptr;
+    auto* data = byteBatch->data.data() + offset;
+    const char* notNull = byteBatch->hasNulls ? byteBatch->notNull.data() + offset : nullptr;
 
     char* byteData = reinterpret_cast<char*>(data);
     for (uint64_t i = 0; i < numValues; ++i) {
@@ -656,7 +599,8 @@ namespace orc {
     }
   }
 
-  void ByteColumnWriter::flush(std::vector<proto::Stream>& streams) {
+  template <typename BatchType>
+  void ByteColumnWriter<BatchType>::flush(std::vector<proto::Stream>& streams) {
     ColumnWriter::flush(streams);
 
     proto::Stream stream;
@@ -666,59 +610,59 @@ namespace orc {
     streams.push_back(stream);
   }
 
-  uint64_t ByteColumnWriter::getEstimatedSize() const {
+  template <typename BatchType>
+  uint64_t ByteColumnWriter<BatchType>::getEstimatedSize() const {
     uint64_t size = ColumnWriter::getEstimatedSize();
     size += byteRleEncoder->getBufferSize();
     return size;
   }
 
-  void ByteColumnWriter::getColumnEncoding(
-    std::vector<proto::ColumnEncoding>& encodings) const {
+  template <typename BatchType>
+  void ByteColumnWriter<BatchType>::getColumnEncoding(
+      std::vector<proto::ColumnEncoding>& encodings) const {
     proto::ColumnEncoding encoding;
     encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
-    encoding.set_dictionarysize(0);
+    encoding.set_dictionary_size(0);
     if (enableBloomFilter) {
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+      encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
     }
     encodings.push_back(encoding);
   }
 
-  void ByteColumnWriter::recordPosition() const {
+  template <typename BatchType>
+  void ByteColumnWriter<BatchType>::recordPosition() const {
     ColumnWriter::recordPosition();
     byteRleEncoder->recordPosition(rowIndexPosition.get());
   }
 
+  template <typename BatchType>
   class BooleanColumnWriter : public ColumnWriter {
-  public:
-    BooleanColumnWriter(const Type& type,
-                        const StreamsFactory& factory,
+   public:
+    BooleanColumnWriter(const Type& type, const StreamsFactory& factory,
                         const WriterOptions& options);
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
     virtual void flush(std::vector<proto::Stream>& streams) override;
 
     virtual uint64_t getEstimatedSize() const override;
 
-    virtual void getColumnEncoding(
-        std::vector<proto::ColumnEncoding>& encodings) const override;
+    virtual void getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const override;
 
     virtual void recordPosition() const override;
 
-  private:
+   private:
     std::unique_ptr<ByteRleEncoder> rleEncoder;
   };
 
-  BooleanColumnWriter::BooleanColumnWriter(
-                           const Type& type,
-                           const StreamsFactory& factory,
-                           const WriterOptions& options) :
-                               ColumnWriter(type, factory, options) {
+  template <typename BatchType>
+  BooleanColumnWriter<BatchType>::BooleanColumnWriter(const Type& type,
+                                                      const StreamsFactory& factory,
+                                                      const WriterOptions& options)
+      : ColumnWriter(type, factory, options) {
     std::unique_ptr<BufferedOutputStream> dataStream =
-      factory.createStream(proto::Stream_Kind_DATA);
+        factory.createStream(proto::Stream_Kind_DATA);
     rleEncoder = createBooleanRleEncoder(std::move(dataStream));
 
     if (enableIndex) {
@@ -726,13 +670,14 @@ namespace orc {
     }
   }
 
-  void BooleanColumnWriter::add(ColumnVectorBatch& rowBatch,
-                                uint64_t offset,
-                                uint64_t numValues,
-                                const char* incomingMask) {
-    LongVectorBatch* byteBatch = dynamic_cast<LongVectorBatch*>(&rowBatch);
+  template <typename BatchType>
+  void BooleanColumnWriter<BatchType>::add(ColumnVectorBatch& rowBatch, uint64_t offset,
+                                           uint64_t numValues, const char* incomingMask) {
+    BatchType* byteBatch = dynamic_cast<BatchType*>(&rowBatch);
     if (byteBatch == nullptr) {
-      throw InvalidArgument("Failed to cast to LongVectorBatch");
+      std::stringstream ss;
+      ss << "Failed to cast to " << typeid(BatchType).name();
+      throw InvalidArgument(ss.str());
     }
     BooleanColumnStatisticsImpl* boolStats =
         dynamic_cast<BooleanColumnStatisticsImpl*>(colIndexStatistics.get());
@@ -742,9 +687,8 @@ namespace orc {
 
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
 
-    int64_t* data = byteBatch->data.data() + offset;
-    const char* notNull = byteBatch->hasNulls ?
-                          byteBatch->notNull.data() + offset : nullptr;
+    auto* data = byteBatch->data.data() + offset;
+    const char* notNull = byteBatch->hasNulls ? byteBatch->notNull.data() + offset : nullptr;
 
     char* byteData = reinterpret_cast<char*>(data);
     for (uint64_t i = 0; i < numValues; ++i) {
@@ -768,7 +712,8 @@ namespace orc {
     }
   }
 
-  void BooleanColumnWriter::flush(std::vector<proto::Stream>& streams) {
+  template <typename BatchType>
+  void BooleanColumnWriter<BatchType>::flush(std::vector<proto::Stream>& streams) {
     ColumnWriter::flush(streams);
 
     proto::Stream stream;
@@ -778,65 +723,63 @@ namespace orc {
     streams.push_back(stream);
   }
 
-  uint64_t BooleanColumnWriter::getEstimatedSize() const {
+  template <typename BatchType>
+  uint64_t BooleanColumnWriter<BatchType>::getEstimatedSize() const {
     uint64_t size = ColumnWriter::getEstimatedSize();
     size += rleEncoder->getBufferSize();
     return size;
   }
 
-  void BooleanColumnWriter::getColumnEncoding(
-                       std::vector<proto::ColumnEncoding>& encodings) const {
+  template <typename BatchType>
+  void BooleanColumnWriter<BatchType>::getColumnEncoding(
+      std::vector<proto::ColumnEncoding>& encodings) const {
     proto::ColumnEncoding encoding;
     encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
-    encoding.set_dictionarysize(0);
+    encoding.set_dictionary_size(0);
     if (enableBloomFilter) {
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+      encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
     }
     encodings.push_back(encoding);
   }
 
-  void BooleanColumnWriter::recordPosition() const {
+  template <typename BatchType>
+  void BooleanColumnWriter<BatchType>::recordPosition() const {
     ColumnWriter::recordPosition();
     rleEncoder->recordPosition(rowIndexPosition.get());
   }
 
-  class DoubleColumnWriter : public ColumnWriter {
-  public:
-    DoubleColumnWriter(const Type& type,
-                       const StreamsFactory& factory,
-                       const WriterOptions& options,
-                       bool isFloat);
+  template <typename ValueType, typename BatchType>
+  class FloatingColumnWriter : public ColumnWriter {
+   public:
+    FloatingColumnWriter(const Type& type, const StreamsFactory& factory,
+                         const WriterOptions& options, bool isFloat);
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
     virtual void flush(std::vector<proto::Stream>& streams) override;
 
     virtual uint64_t getEstimatedSize() const override;
 
-    virtual void getColumnEncoding(
-        std::vector<proto::ColumnEncoding>& encodings) const override;
+    virtual void getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const override;
 
     virtual void recordPosition() const override;
 
-  private:
+   private:
     bool isFloat;
     std::unique_ptr<AppendOnlyBufferedStream> dataStream;
     DataBuffer<char> buffer;
   };
 
-  DoubleColumnWriter::DoubleColumnWriter(
-                          const Type& type,
-                          const StreamsFactory& factory,
-                          const WriterOptions& options,
-                          bool isFloatType) :
-                              ColumnWriter(type, factory, options),
-                              isFloat(isFloatType),
-                              buffer(*options.getMemoryPool()) {
-    dataStream.reset(new AppendOnlyBufferedStream(
-                             factory.createStream(proto::Stream_Kind_DATA)));
+  template <typename ValueType, typename BatchType>
+  FloatingColumnWriter<ValueType, BatchType>::FloatingColumnWriter(const Type& type,
+                                                                   const StreamsFactory& factory,
+                                                                   const WriterOptions& options,
+                                                                   bool isFloatType)
+      : ColumnWriter(type, factory, options),
+        isFloat(isFloatType),
+        buffer(*options.getMemoryPool()) {
+    dataStream.reset(new AppendOnlyBufferedStream(factory.createStream(proto::Stream_Kind_DATA)));
     buffer.resize(isFloat ? 4 : 8);
 
     if (enableIndex) {
@@ -854,26 +797,24 @@ namespace orc {
     }
   }
 
-  void DoubleColumnWriter::add(ColumnVectorBatch& rowBatch,
-                               uint64_t offset,
-                               uint64_t numValues,
-                               const char* incomingMask) {
-    const DoubleVectorBatch* dblBatch =
-      dynamic_cast<const DoubleVectorBatch*>(&rowBatch);
+  template <typename ValueType, typename BatchType>
+  void FloatingColumnWriter<ValueType, BatchType>::add(ColumnVectorBatch& rowBatch, uint64_t offset,
+                                                       uint64_t numValues,
+                                                       const char* incomingMask) {
+    const BatchType* dblBatch = dynamic_cast<const BatchType*>(&rowBatch);
     if (dblBatch == nullptr) {
-      throw InvalidArgument("Failed to cast to DoubleVectorBatch");
+      throw InvalidArgument("Failed to cast to FloatingVectorBatch");
     }
     DoubleColumnStatisticsImpl* doubleStats =
-      dynamic_cast<DoubleColumnStatisticsImpl*>(colIndexStatistics.get());
+        dynamic_cast<DoubleColumnStatisticsImpl*>(colIndexStatistics.get());
     if (doubleStats == nullptr) {
       throw InvalidArgument("Failed to cast to DoubleColumnStatisticsImpl");
     }
 
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
 
-    const double* doubleData = dblBatch->data.data() + offset;
-    const char* notNull = dblBatch->hasNulls ?
-                          dblBatch->notNull.data() + offset : nullptr;
+    const ValueType* doubleData = dblBatch->data.data() + offset;
+    const char* notNull = dblBatch->hasNulls ? dblBatch->notNull.data() + offset : nullptr;
 
     size_t bytes = isFloat ? 4 : 8;
     char* data = buffer.data();
@@ -883,14 +824,14 @@ namespace orc {
         if (isFloat) {
           encodeFloatNum<float, int32_t>(static_cast<float>(doubleData[i]), data);
         } else {
-          encodeFloatNum<double, int64_t>(doubleData[i], data);
+          encodeFloatNum<double, int64_t>(static_cast<double>(doubleData[i]), data);
         }
         dataStream->write(data, bytes);
         ++count;
         if (enableBloomFilter) {
-          bloomFilter->addDouble(doubleData[i]);
+          bloomFilter->addDouble(static_cast<double>(doubleData[i]));
         }
-        doubleStats->update(doubleData[i]);
+        doubleStats->update(static_cast<double>(doubleData[i]));
       }
     }
     doubleStats->increase(count);
@@ -899,7 +840,8 @@ namespace orc {
     }
   }
 
-  void DoubleColumnWriter::flush(std::vector<proto::Stream>& streams) {
+  template <typename ValueType, typename BatchType>
+  void FloatingColumnWriter<ValueType, BatchType>::flush(std::vector<proto::Stream>& streams) {
     ColumnWriter::flush(streams);
 
     proto::Stream stream;
@@ -909,24 +851,27 @@ namespace orc {
     streams.push_back(stream);
   }
 
-  uint64_t DoubleColumnWriter::getEstimatedSize() const {
+  template <typename ValueType, typename BatchType>
+  uint64_t FloatingColumnWriter<ValueType, BatchType>::getEstimatedSize() const {
     uint64_t size = ColumnWriter::getEstimatedSize();
     size += dataStream->getSize();
     return size;
   }
 
-  void DoubleColumnWriter::getColumnEncoding(
-                      std::vector<proto::ColumnEncoding>& encodings) const {
+  template <typename ValueType, typename BatchType>
+  void FloatingColumnWriter<ValueType, BatchType>::getColumnEncoding(
+      std::vector<proto::ColumnEncoding>& encodings) const {
     proto::ColumnEncoding encoding;
     encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
-    encoding.set_dictionarysize(0);
+    encoding.set_dictionary_size(0);
     if (enableBloomFilter) {
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+      encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
     }
     encodings.push_back(encoding);
   }
 
-  void DoubleColumnWriter::recordPosition() const {
+  template <typename ValueType, typename BatchType>
+  void FloatingColumnWriter<ValueType, BatchType>::recordPosition() const {
     ColumnWriter::recordPosition();
     dataStream->recordPosition(rowIndexPosition.get());
   }
@@ -935,27 +880,26 @@ namespace orc {
    * Implementation of increasing sorted string dictionary
    */
   class SortedStringDictionary {
-  public:
+   public:
     struct DictEntry {
-      DictEntry(const char * str, size_t len):data(str),length(len) {}
-      const char * data;
+      DictEntry(const char* str, size_t len) : data(str), length(len) {}
+      const char* data;
       size_t length;
     };
 
-    SortedStringDictionary():totalLength(0) {}
+    SortedStringDictionary() : totalLength(0) {}
 
     // insert a new string into dictionary, return its insertion order
-    size_t insert(const char * data, size_t len);
+    size_t insert(const char* data, size_t len);
 
     // write dictionary data & length to output buffer
-    void flush(AppendOnlyBufferedStream * dataStream,
-               RleEncoder * lengthEncoder) const;
+    void flush(AppendOnlyBufferedStream* dataStream, RleEncoder* lengthEncoder) const;
 
     // reorder input index buffer from insertion order to dictionary order
     void reorder(std::vector<int64_t>& idxBuffer) const;
 
     // get dict entries in insertion order
-    void getEntriesInInsertionOrder(std::vector<const DictEntry *>&) const;
+    void getEntriesInInsertionOrder(std::vector<const DictEntry*>&) const;
 
     // return count of entries
     size_t size() const;
@@ -965,7 +909,7 @@ namespace orc {
 
     void clear();
 
-  private:
+   private:
     struct LessThan {
       bool operator()(const DictEntry& left, const DictEntry& right) const {
         int ret = memcmp(left.data, right.data, std::min(left.length, right.length));
@@ -989,14 +933,14 @@ namespace orc {
   };
 
   // insert a new string into dictionary, return its insertion order
-  size_t SortedStringDictionary::insert(const char * str, size_t len) {
+  size_t SortedStringDictionary::insert(const char* str, size_t len) {
     auto ret = dict.insert({DictEntry(str, len), dict.size()});
     if (ret.second) {
       // make a copy to internal storage
       data.push_back(std::vector<char>(len));
       memcpy(data.back().data(), str, len);
       // update dictionary entry to link pointer to internal storage
-      DictEntry * entry = const_cast<DictEntry *>(&(ret.first->first));
+      DictEntry* entry = const_cast<DictEntry*>(&(ret.first->first));
       entry->data = data.back().data();
       totalLength += len;
     }
@@ -1004,8 +948,8 @@ namespace orc {
   }
 
   // write dictionary data & length to output buffer
-  void SortedStringDictionary::flush(AppendOnlyBufferedStream * dataStream,
-                               RleEncoder * lengthEncoder) const {
+  void SortedStringDictionary::flush(AppendOnlyBufferedStream* dataStream,
+                                     RleEncoder* lengthEncoder) const {
     for (auto it = dict.cbegin(); it != dict.cend(); ++it) {
       dataStream->write(it->first.data, it->first.length);
       lengthEncoder->write(static_cast<int64_t>(it->first.length));
@@ -1032,14 +976,13 @@ namespace orc {
 
     // do the transformation
     for (size_t i = 0; i != idxBuffer.size(); ++i) {
-      idxBuffer[i] = static_cast<int64_t>(
-        mapping[static_cast<size_t>(idxBuffer[i])]);
+      idxBuffer[i] = static_cast<int64_t>(mapping[static_cast<size_t>(idxBuffer[i])]);
     }
   }
 
   // get dict entries in insertion order
   void SortedStringDictionary::getEntriesInInsertionOrder(
-                    std::vector<const DictEntry *>& entries) const {
+      std::vector<const DictEntry*>& entries) const {
     entries.resize(dict.size());
     for (auto it = dict.cbegin(); it != dict.cend(); ++it) {
       entries[it->second] = &(it->first);
@@ -1056,29 +999,25 @@ namespace orc {
     return totalLength;
   }
 
-  void SortedStringDictionary::clear()  {
+  void SortedStringDictionary::clear() {
     totalLength = 0;
     data.clear();
     dict.clear();
   }
 
   class StringColumnWriter : public ColumnWriter {
-  public:
-    StringColumnWriter(const Type& type,
-                       const StreamsFactory& factory,
+   public:
+    StringColumnWriter(const Type& type, const StreamsFactory& factory,
                        const WriterOptions& options);
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
     virtual void flush(std::vector<proto::Stream>& streams) override;
 
     virtual uint64_t getEstimatedSize() const override;
 
-    virtual void getColumnEncoding(
-        std::vector<proto::ColumnEncoding>& encodings) const override;
+    virtual void getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const override;
 
     virtual void recordPosition() const override;
 
@@ -1088,7 +1027,7 @@ namespace orc {
 
     virtual void reset() override;
 
-  private:
+   private:
     /**
      * dictionary related functions
      */
@@ -1098,7 +1037,7 @@ namespace orc {
     void deleteDictStreams();
     void fallbackToDirectEncoding();
 
-  protected:
+   protected:
     RleVersion rleVersion;
     bool useCompression;
     const StreamsFactory& streamsFactory;
@@ -1128,18 +1067,16 @@ namespace orc {
     mutable std::vector<size_t> startOfRowGroups;
   };
 
-  StringColumnWriter::StringColumnWriter(
-                          const Type& type,
-                          const StreamsFactory& factory,
-                          const WriterOptions& options) :
-                              ColumnWriter(type, factory, options),
-                              rleVersion(options.getRleVersion()),
-                              useCompression(options.getCompression() != CompressionKind_NONE),
-                              streamsFactory(factory),
-                              alignedBitPacking(options.getAlignedBitpacking()),
-                              doneDictionaryCheck(false),
-                              useDictionary(options.getEnableDictionary()),
-                              dictSizeThreshold(options.getDictionaryKeySizeThreshold()){
+  StringColumnWriter::StringColumnWriter(const Type& type, const StreamsFactory& factory,
+                                         const WriterOptions& options)
+      : ColumnWriter(type, factory, options),
+        rleVersion(options.getRleVersion()),
+        useCompression(options.getCompression() != CompressionKind_NONE),
+        streamsFactory(factory),
+        alignedBitPacking(options.getAlignedBitpacking()),
+        doneDictionaryCheck(false),
+        useDictionary(options.getEnableDictionary()),
+        dictSizeThreshold(options.getDictionaryKeySizeThreshold()) {
     if (type.getKind() == TypeKind::BINARY) {
       useDictionary = false;
       doneDictionaryCheck = true;
@@ -1157,12 +1094,9 @@ namespace orc {
     }
   }
 
-  void StringColumnWriter::add(ColumnVectorBatch& rowBatch,
-                               uint64_t offset,
-                               uint64_t numValues,
+  void StringColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                                const char* incomingMask) {
-    const StringVectorBatch* stringBatch =
-      dynamic_cast<const StringVectorBatch*>(&rowBatch);
+    const StringVectorBatch* stringBatch = dynamic_cast<const StringVectorBatch*>(&rowBatch);
     if (stringBatch == nullptr) {
       throw InvalidArgument("Failed to cast to StringVectorBatch");
     }
@@ -1175,12 +1109,11 @@ namespace orc {
 
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
 
-    char *const * data = stringBatch->data.data() + offset;
+    char* const* data = stringBatch->data.data() + offset;
     const int64_t* length = stringBatch->length.data() + offset;
-    const char* notNull = stringBatch->hasNulls ?
-                          stringBatch->notNull.data() + offset : nullptr;
+    const char* notNull = stringBatch->hasNulls ? stringBatch->notNull.data() + offset : nullptr;
 
-    if (!useDictionary){
+    if (!useDictionary) {
       directLengthEncoder->add(length, numValues, notNull);
     }
 
@@ -1259,21 +1192,18 @@ namespace orc {
     return size;
   }
 
-  void StringColumnWriter::getColumnEncoding(
-    std::vector<proto::ColumnEncoding>& encodings) const {
+  void StringColumnWriter::getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const {
     proto::ColumnEncoding encoding;
     if (!useDictionary) {
-      encoding.set_kind(rleVersion == RleVersion_1 ?
-                        proto::ColumnEncoding_Kind_DIRECT :
-                        proto::ColumnEncoding_Kind_DIRECT_V2);
+      encoding.set_kind(rleVersion == RleVersion_1 ? proto::ColumnEncoding_Kind_DIRECT
+                                                   : proto::ColumnEncoding_Kind_DIRECT_V2);
     } else {
-      encoding.set_kind(rleVersion == RleVersion_1 ?
-                        proto::ColumnEncoding_Kind_DICTIONARY :
-                        proto::ColumnEncoding_Kind_DICTIONARY_V2);
+      encoding.set_kind(rleVersion == RleVersion_1 ? proto::ColumnEncoding_Kind_DICTIONARY
+                                                   : proto::ColumnEncoding_Kind_DICTIONARY_V2);
     }
-    encoding.set_dictionarysize(static_cast<uint32_t>(dictionary.size()));
+    encoding.set_dictionary_size(static_cast<uint32_t>(dictionary.size()));
     if (enableBloomFilter) {
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+      encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
     }
     encodings.push_back(encoding);
   }
@@ -1292,8 +1222,9 @@ namespace orc {
 
   bool StringColumnWriter::checkDictionaryKeyRatio() {
     if (!doneDictionaryCheck) {
-      useDictionary = dictionary.size() <= static_cast<size_t>(
-        static_cast<double>(dictionary.idxInDictBuffer.size()) * dictSizeThreshold);
+      useDictionary = dictionary.size() <=
+                      static_cast<size_t>(static_cast<double>(dictionary.idxInDictBuffer.size()) *
+                                          dictSizeThreshold);
       doneDictionaryCheck = true;
     }
 
@@ -1320,33 +1251,24 @@ namespace orc {
 
   void StringColumnWriter::createDirectStreams() {
     std::unique_ptr<BufferedOutputStream> directLengthStream =
-      streamsFactory.createStream(proto::Stream_Kind_LENGTH);
-    directLengthEncoder = createRleEncoder(std::move(directLengthStream),
-                                           false,
-                                           rleVersion,
-                                           memPool,
-                                           alignedBitPacking);
-    directDataStream.reset(new AppendOnlyBufferedStream(
-      streamsFactory.createStream(proto::Stream_Kind_DATA)));
+        streamsFactory.createStream(proto::Stream_Kind_LENGTH);
+    directLengthEncoder = createRleEncoder(std::move(directLengthStream), false, rleVersion,
+                                           memPool, alignedBitPacking);
+    directDataStream.reset(
+        new AppendOnlyBufferedStream(streamsFactory.createStream(proto::Stream_Kind_DATA)));
   }
 
   void StringColumnWriter::createDictStreams() {
     std::unique_ptr<BufferedOutputStream> dictDataStream =
-      streamsFactory.createStream(proto::Stream_Kind_DATA);
-    dictDataEncoder = createRleEncoder(std::move(dictDataStream),
-                                       false,
-                                       rleVersion,
-                                       memPool,
-                                       alignedBitPacking);
+        streamsFactory.createStream(proto::Stream_Kind_DATA);
+    dictDataEncoder =
+        createRleEncoder(std::move(dictDataStream), false, rleVersion, memPool, alignedBitPacking);
     std::unique_ptr<BufferedOutputStream> dictLengthStream =
-      streamsFactory.createStream(proto::Stream_Kind_LENGTH);
-    dictLengthEncoder = createRleEncoder(std::move(dictLengthStream),
-                                         false,
-                                         rleVersion,
-                                         memPool,
+        streamsFactory.createStream(proto::Stream_Kind_LENGTH);
+    dictLengthEncoder = createRleEncoder(std::move(dictLengthStream), false, rleVersion, memPool,
                                          alignedBitPacking);
     dictStream.reset(new AppendOnlyBufferedStream(
-      streamsFactory.createStream(proto::Stream_Kind_DICTIONARY_DATA)));
+        streamsFactory.createStream(proto::Stream_Kind_DICTIONARY_DATA)));
   }
 
   void StringColumnWriter::deleteDictStreams() {
@@ -1360,7 +1282,7 @@ namespace orc {
   }
 
   void StringColumnWriter::writeDictionary() {
-    if (useDictionary  && !doneDictionaryCheck) {
+    if (useDictionary && !doneDictionaryCheck) {
       // when index is disabled, dictionary check happens while writing 1st stripe
       if (!checkDictionaryKeyRatio()) {
         fallbackToDirectEncoding();
@@ -1376,7 +1298,7 @@ namespace orc {
       dictionary.reorder(dictionary.idxInDictBuffer);
 
       // write data sequences
-      int64_t * data = dictionary.idxInDictBuffer.data();
+      int64_t* data = dictionary.idxInDictBuffer.data();
       if (enableIndex) {
         size_t prevOffset = 0;
         for (size_t i = 0; i < startOfRowGroups.size(); ++i) {
@@ -1386,9 +1308,9 @@ namespace orc {
 
           // update index positions
           int rowGroupId = static_cast<int>(i);
-          proto::RowIndexEntry* indexEntry =
-            (rowGroupId < rowIndex->entry_size()) ?
-            rowIndex->mutable_entry(rowGroupId) : rowIndexEntry.get();
+          proto::RowIndexEntry* indexEntry = (rowGroupId < rowIndex->entry_size())
+                                                 ? rowIndex->mutable_entry(rowGroupId)
+                                                 : rowIndexEntry.get();
 
           // add positions for direct streams
           RowIndexPositionRecorder recorder(*indexEntry);
@@ -1397,8 +1319,7 @@ namespace orc {
           prevOffset = offset;
         }
 
-        dictDataEncoder->add(data + prevOffset,
-                             dictionary.idxInDictBuffer.size() - prevOffset,
+        dictDataEncoder->add(data + prevOffset, dictionary.idxInDictBuffer.size() - prevOffset,
                              nullptr);
       } else {
         dictDataEncoder->add(data, dictionary.idxInDictBuffer.size(), nullptr);
@@ -1412,18 +1333,18 @@ namespace orc {
     if (enableIndex) {
       // fallback happens at the 1st row group;
       // simply complete positions for direct streams
-      proto::RowIndexEntry * indexEntry = rowIndexEntry.get();
+      proto::RowIndexEntry* indexEntry = rowIndexEntry.get();
       RowIndexPositionRecorder recorder(*indexEntry);
       directDataStream->recordPosition(&recorder);
       directLengthEncoder->recordPosition(&recorder);
     }
 
     // get dictionary entries in insertion order
-    std::vector<const SortedStringDictionary::DictEntry *> entries;
+    std::vector<const SortedStringDictionary::DictEntry*> entries;
     dictionary.getEntriesInInsertionOrder(entries);
 
     // store each length of the data into a vector
-    const SortedStringDictionary::DictEntry * dictEntry = nullptr;
+    const SortedStringDictionary::DictEntry* dictEntry = nullptr;
     for (uint64_t i = 0; i != dictionary.idxInDictBuffer.size(); ++i) {
       // write one row data in direct encoding
       dictEntry = entries[static_cast<size_t>(dictionary.idxInDictBuffer[i])];
@@ -1438,7 +1359,7 @@ namespace orc {
     /**
      * Counts how many utf-8 chars of the input data
      */
-    static uint64_t charLength(const char * data, uint64_t length) {
+    static uint64_t charLength(const char* data, uint64_t length) {
       uint64_t chars = 0;
       for (uint64_t i = 0; i < length; i++) {
         if (isUtfStartByte(data[i])) {
@@ -1458,9 +1379,7 @@ namespace orc {
      * @param data the bytes of UTF-8
      * @param length the length of data to truncate
      */
-    static uint64_t truncateBytesTo(uint64_t maxCharLength,
-                                    const char * data,
-                                    uint64_t length) {
+    static uint64_t truncateBytesTo(uint64_t maxCharLength, const char* data, uint64_t length) {
       uint64_t chars = 0;
       if (length <= maxCharLength) {
         return length;
@@ -1490,8 +1409,8 @@ namespace orc {
      * @param from the first byte location
      * @param until the last byte location
      * @return the index of the last character
-    */
-    static uint64_t findLastCharacter(const char * text, uint64_t from, uint64_t until) {
+     */
+    static uint64_t findLastCharacter(const char* text, uint64_t from, uint64_t until) {
       uint64_t posn = until;
       /* we don't expect characters more than 5 bytes */
       while (posn >= from) {
@@ -1501,36 +1420,29 @@ namespace orc {
         posn -= 1;
       }
       /* beginning of a valid char not found */
-      throw std::logic_error(
-        "Could not truncate string, beginning of a valid char not found");
+      throw std::logic_error("Could not truncate string, beginning of a valid char not found");
     }
   };
 
   class CharColumnWriter : public StringColumnWriter {
-  public:
-    CharColumnWriter(const Type& type,
-                     const StreamsFactory& factory,
-                     const WriterOptions& options) :
-                         StringColumnWriter(type, factory, options),
-                         maxLength(type.getMaximumLength()),
-                         padBuffer(*options.getMemoryPool()) {
+   public:
+    CharColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options)
+        : StringColumnWriter(type, factory, options),
+          maxLength(type.getMaximumLength()),
+          padBuffer(*options.getMemoryPool()) {
       // utf-8 is currently 4 bytes long, but it could be up to 6
       padBuffer.resize(maxLength * 6);
     }
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
-  private:
+   private:
     uint64_t maxLength;
     DataBuffer<char> padBuffer;
   };
 
-  void CharColumnWriter::add(ColumnVectorBatch& rowBatch,
-                             uint64_t offset,
-                             uint64_t numValues,
+  void CharColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                              const char* incomingMask) {
     StringVectorBatch* charsBatch = dynamic_cast<StringVectorBatch*>(&rowBatch);
     if (charsBatch == nullptr) {
@@ -1547,26 +1459,24 @@ namespace orc {
 
     char** data = charsBatch->data.data() + offset;
     int64_t* length = charsBatch->length.data() + offset;
-    const char* notNull = charsBatch->hasNulls ?
-                          charsBatch->notNull.data() + offset : nullptr;
+    const char* notNull = charsBatch->hasNulls ? charsBatch->notNull.data() + offset : nullptr;
 
     uint64_t count = 0;
     for (uint64_t i = 0; i < numValues; ++i) {
       if (!notNull || notNull[i]) {
-        const char * charData = nullptr;
+        const char* charData = nullptr;
         uint64_t originLength = static_cast<uint64_t>(length[i]);
         uint64_t charLength = Utf8Utils::charLength(data[i], originLength);
         if (charLength >= maxLength) {
           charData = data[i];
-          length[i] = static_cast<int64_t>(
-            Utf8Utils::truncateBytesTo(maxLength, data[i], originLength));
+          length[i] =
+              static_cast<int64_t>(Utf8Utils::truncateBytesTo(maxLength, data[i], originLength));
         } else {
           charData = padBuffer.data();
           // the padding is exactly 1 byte per char
           length[i] = length[i] + static_cast<int64_t>(maxLength - charLength);
           memcpy(padBuffer.data(), data[i], originLength);
-          memset(padBuffer.data() + originLength,
-                 ' ',
+          memset(padBuffer.data() + originLength, ' ',
                  static_cast<size_t>(length[i]) - originLength);
         }
 
@@ -1596,27 +1506,21 @@ namespace orc {
   }
 
   class VarCharColumnWriter : public StringColumnWriter {
-  public:
-    VarCharColumnWriter(const Type& type,
-                        const StreamsFactory& factory,
-                        const WriterOptions& options) :
-                            StringColumnWriter(type, factory, options),
-                            maxLength(type.getMaximumLength()) {
+   public:
+    VarCharColumnWriter(const Type& type, const StreamsFactory& factory,
+                        const WriterOptions& options)
+        : StringColumnWriter(type, factory, options), maxLength(type.getMaximumLength()) {
       // PASS
     }
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
-  private:
+   private:
     uint64_t maxLength;
   };
 
-  void VarCharColumnWriter::add(ColumnVectorBatch& rowBatch,
-                                uint64_t offset,
-                                uint64_t numValues,
+  void VarCharColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                                 const char* incomingMask) {
     StringVectorBatch* charsBatch = dynamic_cast<StringVectorBatch*>(&rowBatch);
     if (charsBatch == nullptr) {
@@ -1633,14 +1537,13 @@ namespace orc {
 
     char* const* data = charsBatch->data.data() + offset;
     int64_t* length = charsBatch->length.data() + offset;
-    const char* notNull = charsBatch->hasNulls ?
-                          charsBatch->notNull.data() + offset : nullptr;
+    const char* notNull = charsBatch->hasNulls ? charsBatch->notNull.data() + offset : nullptr;
 
     uint64_t count = 0;
     for (uint64_t i = 0; i < numValues; ++i) {
       if (!notNull || notNull[i]) {
-        uint64_t itemLength = Utf8Utils::truncateBytesTo(
-          maxLength, data[i], static_cast<uint64_t>(length[i]));
+        uint64_t itemLength =
+            Utf8Utils::truncateBytesTo(maxLength, data[i], static_cast<uint64_t>(length[i]));
         length[i] = static_cast<int64_t>(itemLength);
 
         if (useDictionary) {
@@ -1669,23 +1572,18 @@ namespace orc {
   }
 
   class BinaryColumnWriter : public StringColumnWriter {
-  public:
-    BinaryColumnWriter(const Type& type,
-                       const StreamsFactory& factory,
-                       const WriterOptions& options) :
-                           StringColumnWriter(type, factory, options) {
+   public:
+    BinaryColumnWriter(const Type& type, const StreamsFactory& factory,
+                       const WriterOptions& options)
+        : StringColumnWriter(type, factory, options) {
       // PASS
     }
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
   };
 
-  void BinaryColumnWriter::add(ColumnVectorBatch& rowBatch,
-                               uint64_t offset,
-                               uint64_t numValues,
+  void BinaryColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                                const char* incomingMask) {
     StringVectorBatch* binBatch = dynamic_cast<StringVectorBatch*>(&rowBatch);
     if (binBatch == nullptr) {
@@ -1702,8 +1600,7 @@ namespace orc {
 
     char** data = binBatch->data.data() + offset;
     int64_t* length = binBatch->length.data() + offset;
-    const char* notNull = binBatch->hasNulls ?
-                          binBatch->notNull.data() + offset : nullptr;
+    const char* notNull = binBatch->hasNulls ? binBatch->notNull.data() + offset : nullptr;
 
     uint64_t count = 0;
     for (uint64_t i = 0; i < numValues; ++i) {
@@ -1726,60 +1623,43 @@ namespace orc {
   }
 
   class TimestampColumnWriter : public ColumnWriter {
-  public:
-    TimestampColumnWriter(const Type& type,
-                          const StreamsFactory& factory,
-                          const WriterOptions& options,
-                          bool isInstantType);
-
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+   public:
+    TimestampColumnWriter(const Type& type, const StreamsFactory& factory,
+                          const WriterOptions& options, bool isInstantType);
+
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
     virtual void flush(std::vector<proto::Stream>& streams) override;
 
     virtual uint64_t getEstimatedSize() const override;
 
-    virtual void getColumnEncoding(
-        std::vector<proto::ColumnEncoding>& encodings) const override;
+    virtual void getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const override;
 
     virtual void recordPosition() const override;
 
-  protected:
+   protected:
     std::unique_ptr<RleEncoder> secRleEncoder, nanoRleEncoder;
 
-  private:
+   private:
     RleVersion rleVersion;
-    const Timezone& timezone;
+    const Timezone* timezone;
     const bool isUTC;
   };
 
-  TimestampColumnWriter::TimestampColumnWriter(
-                             const Type& type,
-                             const StreamsFactory& factory,
-                             const WriterOptions& options,
-                             bool isInstantType) :
-                                 ColumnWriter(type, factory, options),
-                                 rleVersion(options.getRleVersion()),
-                                 timezone(isInstantType ?
-                                          getTimezoneByName("GMT") :
-                                          options.getTimezone()),
-                                 isUTC(isInstantType ||
-                                       options.getTimezoneName() == "GMT") {
+  TimestampColumnWriter::TimestampColumnWriter(const Type& type, const StreamsFactory& factory,
+                                               const WriterOptions& options, bool isInstantType)
+      : ColumnWriter(type, factory, options),
+        rleVersion(options.getRleVersion()),
+        timezone(isInstantType ? &getTimezoneByName("GMT") : &options.getTimezone()),
+        isUTC(isInstantType || options.getTimezoneName() == "GMT") {
     std::unique_ptr<BufferedOutputStream> dataStream =
         factory.createStream(proto::Stream_Kind_DATA);
     std::unique_ptr<BufferedOutputStream> secondaryStream =
         factory.createStream(proto::Stream_Kind_SECONDARY);
-    secRleEncoder = createRleEncoder(std::move(dataStream),
-                                     true,
-                                     rleVersion,
-                                     memPool,
+    secRleEncoder = createRleEncoder(std::move(dataStream), true, rleVersion, memPool,
                                      options.getAlignedBitpacking());
-    nanoRleEncoder = createRleEncoder(std::move(secondaryStream),
-                                      false,
-                                      rleVersion,
-                                      memPool,
+    nanoRleEncoder = createRleEncoder(std::move(secondaryStream), false, rleVersion, memPool,
                                       options.getAlignedBitpacking());
 
     if (enableIndex) {
@@ -1808,12 +1688,9 @@ namespace orc {
     }
   }
 
-  void TimestampColumnWriter::add(ColumnVectorBatch& rowBatch,
-                                  uint64_t offset,
-                                  uint64_t numValues,
+  void TimestampColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                                   const char* incomingMask) {
-    TimestampVectorBatch* tsBatch =
-      dynamic_cast<TimestampVectorBatch*>(&rowBatch);
+    TimestampVectorBatch* tsBatch = dynamic_cast<TimestampVectorBatch*>(&rowBatch);
     if (tsBatch == nullptr) {
       throw InvalidArgument("Failed to cast to TimestampVectorBatch");
     }
@@ -1826,10 +1703,9 @@ namespace orc {
 
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
 
-    const char* notNull = tsBatch->hasNulls ?
-                          tsBatch->notNull.data() + offset : nullptr;
-    int64_t *secs = tsBatch->data.data() + offset;
-    int64_t *nanos = tsBatch->nanoseconds.data() + offset;
+    const char* notNull = tsBatch->hasNulls ? tsBatch->notNull.data() + offset : nullptr;
+    int64_t* secs = tsBatch->data.data() + offset;
+    int64_t* nanos = tsBatch->nanoseconds.data() + offset;
 
     uint64_t count = 0;
     for (uint64_t i = 0; i < numValues; ++i) {
@@ -1837,7 +1713,7 @@ namespace orc {
         // TimestampVectorBatch already stores data in UTC
         int64_t millsUTC = secs[i] * 1000 + nanos[i] / 1000000;
         if (!isUTC) {
-          millsUTC = timezone.convertToUTC(secs[i]) * 1000 + nanos[i] / 1000000;
+          millsUTC = timezone->convertToUTC(secs[i]) * 1000 + nanos[i] / 1000000;
         }
         ++count;
         if (enableBloomFilter) {
@@ -1849,7 +1725,7 @@ namespace orc {
           secs[i] += 1;
         }
 
-        secs[i] -= timezone.getEpoch();
+        secs[i] -= timezone->getEpoch();
         nanos[i] = formatNano(nanos[i]);
       }
     }
@@ -1886,12 +1762,12 @@ namespace orc {
   }
 
   void TimestampColumnWriter::getColumnEncoding(
-    std::vector<proto::ColumnEncoding>& encodings) const {
+      std::vector<proto::ColumnEncoding>& encodings) const {
     proto::ColumnEncoding encoding;
     encoding.set_kind(RleVersionMapper(rleVersion));
-    encoding.set_dictionarysize(0);
+    encoding.set_dictionary_size(0);
     if (enableBloomFilter) {
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+      encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
     }
     encodings.push_back(encoding);
   }
@@ -1902,32 +1778,23 @@ namespace orc {
     nanoRleEncoder->recordPosition(rowIndexPosition.get());
   }
 
-  class DateColumnWriter : public IntegerColumnWriter {
-  public:
-    DateColumnWriter(const Type& type,
-                     const StreamsFactory& factory,
-                     const WriterOptions& options);
+  class DateColumnWriter : public IntegerColumnWriter<LongVectorBatch> {
+   public:
+    DateColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options);
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
   };
 
-  DateColumnWriter::DateColumnWriter(
-                        const Type &type,
-                        const StreamsFactory &factory,
-                        const WriterOptions &options) :
-                            IntegerColumnWriter(type, factory, options) {
+  DateColumnWriter::DateColumnWriter(const Type& type, const StreamsFactory& factory,
+                                     const WriterOptions& options)
+      : IntegerColumnWriter<LongVectorBatch>(type, factory, options) {
     // PASS
   }
 
-  void DateColumnWriter::add(ColumnVectorBatch& rowBatch,
-                             uint64_t offset,
-                             uint64_t numValues,
+  void DateColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                              const char* incomingMask) {
-    const LongVectorBatch* longBatch =
-      dynamic_cast<const LongVectorBatch*>(&rowBatch);
+    const LongVectorBatch* longBatch = dynamic_cast<const LongVectorBatch*>(&rowBatch);
     if (longBatch == nullptr) {
       throw InvalidArgument("Failed to cast to LongVectorBatch");
     }
@@ -1941,8 +1808,7 @@ namespace orc {
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
 
     const int64_t* data = longBatch->data.data() + offset;
-    const char* notNull = longBatch->hasNulls ?
-                          longBatch->notNull.data() + offset : nullptr;
+    const char* notNull = longBatch->hasNulls ? longBatch->notNull.data() + offset : nullptr;
 
     rleEncoder->add(data, numValues, notNull);
 
@@ -1963,55 +1829,45 @@ namespace orc {
   }
 
   class Decimal64ColumnWriter : public ColumnWriter {
-  public:
+   public:
     static const uint32_t MAX_PRECISION_64 = 18;
     static const uint32_t MAX_PRECISION_128 = 38;
 
-    Decimal64ColumnWriter(const Type& type,
-                          const StreamsFactory& factory,
+    Decimal64ColumnWriter(const Type& type, const StreamsFactory& factory,
                           const WriterOptions& options);
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
     virtual void flush(std::vector<proto::Stream>& streams) override;
 
     virtual uint64_t getEstimatedSize() const override;
 
-    virtual void getColumnEncoding(
-        std::vector<proto::ColumnEncoding>& encodings) const override;
+    virtual void getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const override;
 
     virtual void recordPosition() const override;
 
-  protected:
+   protected:
     RleVersion rleVersion;
     uint64_t precision;
     uint64_t scale;
     std::unique_ptr<AppendOnlyBufferedStream> valueStream;
     std::unique_ptr<RleEncoder> scaleEncoder;
 
-  private:
+   private:
     char buffer[10];
   };
 
-  Decimal64ColumnWriter::Decimal64ColumnWriter(
-                             const Type& type,
-                             const StreamsFactory& factory,
-                             const WriterOptions& options) :
-                                 ColumnWriter(type, factory, options),
-                                 rleVersion(options.getRleVersion()),
-                                 precision(type.getPrecision()),
-                                 scale(type.getScale()) {
-    valueStream.reset(new AppendOnlyBufferedStream(
-        factory.createStream(proto::Stream_Kind_DATA)));
+  Decimal64ColumnWriter::Decimal64ColumnWriter(const Type& type, const StreamsFactory& factory,
+                                               const WriterOptions& options)
+      : ColumnWriter(type, factory, options),
+        rleVersion(options.getRleVersion()),
+        precision(type.getPrecision()),
+        scale(type.getScale()) {
+    valueStream.reset(new AppendOnlyBufferedStream(factory.createStream(proto::Stream_Kind_DATA)));
     std::unique_ptr<BufferedOutputStream> scaleStream =
         factory.createStream(proto::Stream_Kind_SECONDARY);
-    scaleEncoder = createRleEncoder(std::move(scaleStream),
-                                    true,
-                                    rleVersion,
-                                    memPool,
+    scaleEncoder = createRleEncoder(std::move(scaleStream), true, rleVersion, memPool,
                                     options.getAlignedBitpacking());
 
     if (enableIndex) {
@@ -2019,26 +1875,22 @@ namespace orc {
     }
   }
 
-  void Decimal64ColumnWriter::add(ColumnVectorBatch& rowBatch,
-                                  uint64_t offset,
-                                  uint64_t numValues,
+  void Decimal64ColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                                   const char* incomingMask) {
-    const Decimal64VectorBatch* decBatch =
-      dynamic_cast<const Decimal64VectorBatch*>(&rowBatch);
+    const Decimal64VectorBatch* decBatch = dynamic_cast<const Decimal64VectorBatch*>(&rowBatch);
     if (decBatch == nullptr) {
       throw InvalidArgument("Failed to cast to Decimal64VectorBatch");
     }
 
     DecimalColumnStatisticsImpl* decStats =
-      dynamic_cast<DecimalColumnStatisticsImpl*>(colIndexStatistics.get());
+        dynamic_cast<DecimalColumnStatisticsImpl*>(colIndexStatistics.get());
     if (decStats == nullptr) {
       throw InvalidArgument("Failed to cast to DecimalColumnStatisticsImpl");
     }
 
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
 
-    const char* notNull = decBatch->hasNulls ?
-                          decBatch->notNull.data() + offset : nullptr;
+    const char* notNull = decBatch->hasNulls ? decBatch->notNull.data() + offset : nullptr;
     const int64_t* values = decBatch->values.data() + offset;
 
     uint64_t count = 0;
@@ -2059,10 +1911,8 @@ namespace orc {
         valueStream->write(buffer, static_cast<size_t>(data - buffer));
         ++count;
         if (enableBloomFilter) {
-          std::string decimal = Decimal(
-            values[i], static_cast<int32_t>(scale)).toString(true);
-          bloomFilter->addBytes(
-            decimal.c_str(), static_cast<int64_t>(decimal.size()));
+          std::string decimal = Decimal(values[i], static_cast<int32_t>(scale)).toString(true);
+          bloomFilter->addBytes(decimal.c_str(), static_cast<int64_t>(decimal.size()));
         }
         decStats->update(Decimal(values[i], static_cast<int32_t>(scale)));
       }
@@ -2099,12 +1949,12 @@ namespace orc {
   }
 
   void Decimal64ColumnWriter::getColumnEncoding(
-    std::vector<proto::ColumnEncoding>& encodings) const {
+      std::vector<proto::ColumnEncoding>& encodings) const {
     proto::ColumnEncoding encoding;
     encoding.set_kind(RleVersionMapper(rleVersion));
-    encoding.set_dictionarysize(0);
+    encoding.set_dictionary_size(0);
     if (enableBloomFilter) {
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+      encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
     }
     encodings.push_back(encoding);
   }
@@ -2116,44 +1966,35 @@ namespace orc {
   }
 
   class Decimal64ColumnWriterV2 : public ColumnWriter {
-  public:
-    Decimal64ColumnWriterV2(const Type& type,
-                            const StreamsFactory& factory,
+   public:
+    Decimal64ColumnWriterV2(const Type& type, const StreamsFactory& factory,
                             const WriterOptions& options);
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
     virtual void flush(std::vector<proto::Stream>& streams) override;
 
     virtual uint64_t getEstimatedSize() const override;
 
-    virtual void getColumnEncoding(
-        std::vector<proto::ColumnEncoding>& encodings) const override;
+    virtual void getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const override;
 
     virtual void recordPosition() const override;
 
-  protected:
+   protected:
     uint64_t precision;
     uint64_t scale;
     std::unique_ptr<RleEncoder> valueEncoder;
   };
 
-  Decimal64ColumnWriterV2::Decimal64ColumnWriterV2(
-                               const Type& type,
-                               const StreamsFactory& factory,
-                               const WriterOptions& options) :
-                                   ColumnWriter(type, factory, options),
-                                   precision(type.getPrecision()),
-                                   scale(type.getScale()) {
+  Decimal64ColumnWriterV2::Decimal64ColumnWriterV2(const Type& type, const StreamsFactory& factory,
+                                                   const WriterOptions& options)
+      : ColumnWriter(type, factory, options),
+        precision(type.getPrecision()),
+        scale(type.getScale()) {
     std::unique_ptr<BufferedOutputStream> dataStream =
         factory.createStream(proto::Stream_Kind_DATA);
-    valueEncoder = createRleEncoder(std::move(dataStream),
-                                    true,
-                                    RleVersion_2,
-                                    memPool,
+    valueEncoder = createRleEncoder(std::move(dataStream), true, RleVersion_2, memPool,
                                     options.getAlignedBitpacking());
 
     if (enableIndex) {
@@ -2161,18 +2002,15 @@ namespace orc {
     }
   }
 
-  void Decimal64ColumnWriterV2::add(ColumnVectorBatch& rowBatch,
-                                    uint64_t offset,
-                                    uint64_t numValues,
-                                    const char* incomingMask) {
-    const Decimal64VectorBatch* decBatch =
-      dynamic_cast<const Decimal64VectorBatch*>(&rowBatch);
+  void Decimal64ColumnWriterV2::add(ColumnVectorBatch& rowBatch, uint64_t offset,
+                                    uint64_t numValues, const char* incomingMask) {
+    const Decimal64VectorBatch* decBatch = dynamic_cast<const Decimal64VectorBatch*>(&rowBatch);
     if (decBatch == nullptr) {
       throw InvalidArgument("Failed to cast to Decimal64VectorBatch");
     }
 
     DecimalColumnStatisticsImpl* decStats =
-      dynamic_cast<DecimalColumnStatisticsImpl*>(colIndexStatistics.get());
+        dynamic_cast<DecimalColumnStatisticsImpl*>(colIndexStatistics.get());
     if (decStats == nullptr) {
       throw InvalidArgument("Failed to cast to DecimalColumnStatisticsImpl");
     }
@@ -2180,8 +2018,7 @@ namespace orc {
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
 
     const int64_t* data = decBatch->values.data() + offset;
-    const char* notNull = decBatch->hasNulls ?
-                          decBatch->notNull.data() + offset : nullptr;
+    const char* notNull = decBatch->hasNulls ? decBatch->notNull.data() + offset : nullptr;
 
     valueEncoder->add(data, numValues, notNull);
 
@@ -2190,10 +2027,8 @@ namespace orc {
       if (!notNull || notNull[i]) {
         ++count;
         if (enableBloomFilter) {
-          std::string decimal = Decimal(
-            data[i], static_cast<int32_t>(scale)).toString(true);
-          bloomFilter->addBytes(
-            decimal.c_str(), static_cast<int64_t>(decimal.size()));
+          std::string decimal = Decimal(data[i], static_cast<int32_t>(scale)).toString(true);
+          bloomFilter->addBytes(decimal.c_str(), static_cast<int64_t>(decimal.size()));
         }
         decStats->update(Decimal(data[i], static_cast<int32_t>(scale)));
       }
@@ -2221,12 +2056,12 @@ namespace orc {
   }
 
   void Decimal64ColumnWriterV2::getColumnEncoding(
-    std::vector<proto::ColumnEncoding>& encodings) const {
+      std::vector<proto::ColumnEncoding>& encodings) const {
     proto::ColumnEncoding encoding;
     encoding.set_kind(RleVersionMapper(RleVersion_2));
-    encoding.set_dictionarysize(0);
+    encoding.set_dictionary_size(0);
     if (enableBloomFilter) {
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+      encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
     }
     encodings.push_back(encoding);
   }
@@ -2237,25 +2072,20 @@ namespace orc {
   }
 
   class Decimal128ColumnWriter : public Decimal64ColumnWriter {
-  public:
-    Decimal128ColumnWriter(const Type& type,
-                           const StreamsFactory& factory,
+   public:
+    Decimal128ColumnWriter(const Type& type, const StreamsFactory& factory,
                            const WriterOptions& options);
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
-  private:
+   private:
     char buffer[20];
   };
 
-  Decimal128ColumnWriter::Decimal128ColumnWriter(
-                              const Type& type,
-                              const StreamsFactory& factory,
-                              const WriterOptions& options) :
-                                Decimal64ColumnWriter(type, factory, options) {
+  Decimal128ColumnWriter::Decimal128ColumnWriter(const Type& type, const StreamsFactory& factory,
+                                                 const WriterOptions& options)
+      : Decimal64ColumnWriter(type, factory, options) {
     // PASS
   }
 
@@ -2272,26 +2102,22 @@ namespace orc {
     return val;
   }
 
-  void Decimal128ColumnWriter::add(ColumnVectorBatch& rowBatch,
-                                   uint64_t offset,
-                                   uint64_t numValues,
+  void Decimal128ColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                                    const char* incomingMask) {
-    const Decimal128VectorBatch* decBatch =
-      dynamic_cast<const Decimal128VectorBatch*>(&rowBatch);
+    const Decimal128VectorBatch* decBatch = dynamic_cast<const Decimal128VectorBatch*>(&rowBatch);
     if (decBatch == nullptr) {
       throw InvalidArgument("Failed to cast to Decimal128VectorBatch");
     }
 
     DecimalColumnStatisticsImpl* decStats =
-      dynamic_cast<DecimalColumnStatisticsImpl*>(colIndexStatistics.get());
+        dynamic_cast<DecimalColumnStatisticsImpl*>(colIndexStatistics.get());
     if (decStats == nullptr) {
       throw InvalidArgument("Failed to cast to DecimalColumnStatisticsImpl");
     }
 
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
 
-    const char* notNull = decBatch->hasNulls ?
-                          decBatch->notNull.data() + offset : nullptr;
+    const char* notNull = decBatch->hasNulls ? decBatch->notNull.data() + offset : nullptr;
     const Int128* values = decBatch->values.data() + offset;
 
     // The current encoding of decimal columns stores the integer representation
@@ -2314,10 +2140,8 @@ namespace orc {
 
         ++count;
         if (enableBloomFilter) {
-          std::string decimal = Decimal(
-            values[i], static_cast<int32_t>(scale)).toString(true);
-          bloomFilter->addBytes(
-            decimal.c_str(), static_cast<int64_t>(decimal.size()));
+          std::string decimal = Decimal(values[i], static_cast<int32_t>(scale)).toString(true);
+          bloomFilter->addBytes(decimal.c_str(), static_cast<int64_t>(decimal.size()));
         }
         decStats->update(Decimal(values[i], static_cast<int32_t>(scale)));
       }
@@ -2331,29 +2155,22 @@ namespace orc {
   }
 
   class ListColumnWriter : public ColumnWriter {
-  public:
-    ListColumnWriter(const Type& type,
-                     const StreamsFactory& factory,
-                     const WriterOptions& options);
+   public:
+    ListColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options);
     ~ListColumnWriter() override;
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
     virtual void flush(std::vector<proto::Stream>& streams) override;
 
     virtual uint64_t getEstimatedSize() const override;
 
-    virtual void getColumnEncoding(
-      std::vector<proto::ColumnEncoding>& encodings) const override;
+    virtual void getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const override;
 
-    virtual void getStripeStatistics(
-      std::vector<proto::ColumnStatistics>& stats) const override;
+    virtual void getStripeStatistics(std::vector<proto::ColumnStatistics>& stats) const override;
 
-    virtual void getFileStatistics(
-      std::vector<proto::ColumnStatistics>& stats) const override;
+    virtual void getFileStatistics(std::vector<proto::ColumnStatistics>& stats) const override;
 
     virtual void mergeStripeStatsIntoFileStats() override;
 
@@ -2361,8 +2178,7 @@ namespace orc {
 
     virtual void createRowIndexEntry() override;
 
-    virtual void writeIndex(
-      std::vector<proto::Stream> &streams) const override;
+    virtual void writeIndex(std::vector<proto::Stream>& streams) const override;
 
     virtual void recordPosition() const override;
 
@@ -2370,24 +2186,18 @@ namespace orc {
 
     virtual void reset() override;
 
-  private:
+   private:
     std::unique_ptr<RleEncoder> lengthEncoder;
     RleVersion rleVersion;
     std::unique_ptr<ColumnWriter> child;
   };
 
-  ListColumnWriter::ListColumnWriter(const Type& type,
-                                     const StreamsFactory& factory,
-                                     const WriterOptions& options) :
-                                       ColumnWriter(type, factory, options),
-                                       rleVersion(options.getRleVersion()){
-
+  ListColumnWriter::ListColumnWriter(const Type& type, const StreamsFactory& factory,
+                                     const WriterOptions& options)
+      : ColumnWriter(type, factory, options), rleVersion(options.getRleVersion()) {
     std::unique_ptr<BufferedOutputStream> lengthStream =
-      factory.createStream(proto::Stream_Kind_LENGTH);
-    lengthEncoder = createRleEncoder(std::move(lengthStream),
-                                     false,
-                                     rleVersion,
-                                     memPool,
+        factory.createStream(proto::Stream_Kind_LENGTH);
+    lengthEncoder = createRleEncoder(std::move(lengthStream), false, rleVersion, memPool,
                                      options.getAlignedBitpacking());
 
     if (type.getSubtypeCount() == 1) {
@@ -2403,9 +2213,7 @@ namespace orc {
     // PASS
   }
 
-  void ListColumnWriter::add(ColumnVectorBatch& rowBatch,
-                             uint64_t offset,
-                             uint64_t numValues,
+  void ListColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                              const char* incomingMask) {
     ListVectorBatch* listBatch = dynamic_cast<ListVectorBatch*>(&rowBatch);
     if (listBatch == nullptr) {
@@ -2420,8 +2228,7 @@ namespace orc {
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
 
     int64_t* offsets = listBatch->offsets.data() + offset;
-    const char* notNull = listBatch->hasNulls ?
-                          listBatch->notNull.data() + offset : nullptr;
+    const char* notNull = listBatch->hasNulls ? listBatch->notNull.data() + offset : nullptr;
 
     uint64_t elemOffset = static_cast<uint64_t>(offsets[0]);
     uint64_t totalNumValues = static_cast<uint64_t>(offsets[numValues] - offsets[0]);
@@ -2473,7 +2280,7 @@ namespace orc {
     }
   }
 
-  void ListColumnWriter::writeIndex(std::vector<proto::Stream> &streams) const {
+  void ListColumnWriter::writeIndex(std::vector<proto::Stream>& streams) const {
     ColumnWriter::writeIndex(streams);
     if (child.get()) {
       child->writeIndex(streams);
@@ -2489,13 +2296,12 @@ namespace orc {
     return size;
   }
 
-  void ListColumnWriter::getColumnEncoding(
-                    std::vector<proto::ColumnEncoding>& encodings) const {
+  void ListColumnWriter::getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const {
     proto::ColumnEncoding encoding;
     encoding.set_kind(RleVersionMapper(rleVersion));
-    encoding.set_dictionarysize(0);
+    encoding.set_dictionary_size(0);
     if (enableBloomFilter) {
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+      encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
     }
     encodings.push_back(encoding);
     if (child.get()) {
@@ -2503,8 +2309,7 @@ namespace orc {
     }
   }
 
-  void ListColumnWriter::getStripeStatistics(
-                    std::vector<proto::ColumnStatistics>& stats) const {
+  void ListColumnWriter::getStripeStatistics(std::vector<proto::ColumnStatistics>& stats) const {
     ColumnWriter::getStripeStatistics(stats);
     if (child.get()) {
       child->getStripeStatistics(stats);
@@ -2518,15 +2323,14 @@ namespace orc {
     }
   }
 
-  void ListColumnWriter::getFileStatistics(
-                    std::vector<proto::ColumnStatistics>& stats) const {
+  void ListColumnWriter::getFileStatistics(std::vector<proto::ColumnStatistics>& stats) const {
     ColumnWriter::getFileStatistics(stats);
     if (child.get()) {
       child->getFileStatistics(stats);
     }
   }
 
-  void ListColumnWriter::mergeRowGroupStatsIntoStripeStats()  {
+  void ListColumnWriter::mergeRowGroupStatsIntoStripeStats() {
     ColumnWriter::mergeRowGroupStatsIntoStripeStats();
     if (child.get()) {
       child->mergeRowGroupStatsIntoStripeStats();
@@ -2559,29 +2363,22 @@ namespace orc {
   }
 
   class MapColumnWriter : public ColumnWriter {
-  public:
-    MapColumnWriter(const Type& type,
-                    const StreamsFactory& factory,
-                    const WriterOptions& options);
+   public:
+    MapColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options);
     ~MapColumnWriter() override;
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
     virtual void flush(std::vector<proto::Stream>& streams) override;
 
     virtual uint64_t getEstimatedSize() const override;
 
-    virtual void getColumnEncoding(
-      std::vector<proto::ColumnEncoding>& encodings) const override;
+    virtual void getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const override;
 
-    virtual void getStripeStatistics(
-      std::vector<proto::ColumnStatistics>& stats) const override;
+    virtual void getStripeStatistics(std::vector<proto::ColumnStatistics>& stats) const override;
 
-    virtual void getFileStatistics(
-      std::vector<proto::ColumnStatistics>& stats) const override;
+    virtual void getFileStatistics(std::vector<proto::ColumnStatistics>& stats) const override;
 
     virtual void mergeStripeStatsIntoFileStats() override;
 
@@ -2589,8 +2386,7 @@ namespace orc {
 
     virtual void createRowIndexEntry() override;
 
-    virtual void writeIndex(
-      std::vector<proto::Stream> &streams) const override;
+    virtual void writeIndex(std::vector<proto::Stream>& streams) const override;
 
     virtual void recordPosition() const override;
 
@@ -2598,24 +2394,19 @@ namespace orc {
 
     virtual void reset() override;
 
-  private:
+   private:
     std::unique_ptr<ColumnWriter> keyWriter;
     std::unique_ptr<ColumnWriter> elemWriter;
     std::unique_ptr<RleEncoder> lengthEncoder;
     RleVersion rleVersion;
   };
 
-  MapColumnWriter::MapColumnWriter(const Type& type,
-                                   const StreamsFactory& factory,
-                                   const WriterOptions& options) :
-                                     ColumnWriter(type, factory, options),
-                                     rleVersion(options.getRleVersion()){
+  MapColumnWriter::MapColumnWriter(const Type& type, const StreamsFactory& factory,
+                                   const WriterOptions& options)
+      : ColumnWriter(type, factory, options), rleVersion(options.getRleVersion()) {
     std::unique_ptr<BufferedOutputStream> lengthStream =
-      factory.createStream(proto::Stream_Kind_LENGTH);
-    lengthEncoder = createRleEncoder(std::move(lengthStream),
-                                     false,
-                                     rleVersion,
-                                     memPool,
+        factory.createStream(proto::Stream_Kind_LENGTH);
+    lengthEncoder = createRleEncoder(std::move(lengthStream), false, rleVersion, memPool,
                                      options.getAlignedBitpacking());
 
     if (type.getSubtypeCount() > 0) {
@@ -2635,9 +2426,7 @@ namespace orc {
     // PASS
   }
 
-  void MapColumnWriter::add(ColumnVectorBatch& rowBatch,
-                            uint64_t offset,
-                            uint64_t numValues,
+  void MapColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                             const char* incomingMask) {
     MapVectorBatch* mapBatch = dynamic_cast<MapVectorBatch*>(&rowBatch);
     if (mapBatch == nullptr) {
@@ -2652,8 +2441,7 @@ namespace orc {
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
 
     int64_t* offsets = mapBatch->offsets.data() + offset;
-    const char* notNull = mapBatch->hasNulls ?
-                          mapBatch->notNull.data() + offset : nullptr;
+    const char* notNull = mapBatch->hasNulls ? mapBatch->notNull.data() + offset : nullptr;
 
     uint64_t elemOffset = static_cast<uint64_t>(offsets[0]);
     uint64_t totalNumValues = static_cast<uint64_t>(offsets[numValues] - offsets[0]);
@@ -2712,8 +2500,7 @@ namespace orc {
     }
   }
 
-  void MapColumnWriter::writeIndex(
-    std::vector<proto::Stream> &streams) const {
+  void MapColumnWriter::writeIndex(std::vector<proto::Stream>& streams) const {
     ColumnWriter::writeIndex(streams);
     if (keyWriter.get()) {
       keyWriter->writeIndex(streams);
@@ -2735,13 +2522,12 @@ namespace orc {
     return size;
   }
 
-  void MapColumnWriter::getColumnEncoding(
-                   std::vector<proto::ColumnEncoding>& encodings) const {
+  void MapColumnWriter::getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const {
     proto::ColumnEncoding encoding;
     encoding.set_kind(RleVersionMapper(rleVersion));
-    encoding.set_dictionarysize(0);
+    encoding.set_dictionary_size(0);
     if (enableBloomFilter) {
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+      encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
     }
     encodings.push_back(encoding);
     if (keyWriter.get()) {
@@ -2752,8 +2538,7 @@ namespace orc {
     }
   }
 
-  void MapColumnWriter::getStripeStatistics(
-                   std::vector<proto::ColumnStatistics>& stats) const {
+  void MapColumnWriter::getStripeStatistics(std::vector<proto::ColumnStatistics>& stats) const {
     ColumnWriter::getStripeStatistics(stats);
     if (keyWriter.get()) {
       keyWriter->getStripeStatistics(stats);
@@ -2773,8 +2558,7 @@ namespace orc {
     }
   }
 
-  void MapColumnWriter::getFileStatistics(
-                   std::vector<proto::ColumnStatistics>& stats) const {
+  void MapColumnWriter::getFileStatistics(std::vector<proto::ColumnStatistics>& stats) const {
     ColumnWriter::getFileStatistics(stats);
     if (keyWriter.get()) {
       keyWriter->getFileStatistics(stats);
@@ -2784,7 +2568,7 @@ namespace orc {
     }
   }
 
-  void MapColumnWriter::mergeRowGroupStatsIntoStripeStats()  {
+  void MapColumnWriter::mergeRowGroupStatsIntoStripeStats() {
     ColumnWriter::mergeRowGroupStatsIntoStripeStats();
     if (keyWriter.get()) {
       keyWriter->mergeRowGroupStatsIntoStripeStats();
@@ -2829,28 +2613,22 @@ namespace orc {
   }
 
   class UnionColumnWriter : public ColumnWriter {
-  public:
-    UnionColumnWriter(const Type& type,
-                      const StreamsFactory& factory,
+   public:
+    UnionColumnWriter(const Type& type, const StreamsFactory& factory,
                       const WriterOptions& options);
 
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                      const char* incomingMask) override;
 
     virtual void flush(std::vector<proto::Stream>& streams) override;
 
     virtual uint64_t getEstimatedSize() const override;
 
-    virtual void getColumnEncoding(
-      std::vector<proto::ColumnEncoding>& encodings) const override;
+    virtual void getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const override;
 
-    virtual void getStripeStatistics(
-      std::vector<proto::ColumnStatistics>& stats) const override;
+    virtual void getStripeStatistics(std::vector<proto::ColumnStatistics>& stats) const override;
 
-    virtual void getFileStatistics(
-      std::vector<proto::ColumnStatistics>& stats) const override;
+    virtual void getFileStatistics(std::vector<proto::ColumnStatistics>& stats) const override;
 
     virtual void mergeStripeStatsIntoFileStats() override;
 
@@ -2858,8 +2636,7 @@ namespace orc {
 
     virtual void createRowIndexEntry() override;
 
-    virtual void writeIndex(
-      std::vector<proto::Stream> &streams) const override;
+    virtual void writeIndex(std::vector<proto::Stream>& streams) const override;
 
     virtual void recordPosition() const override;
 
@@ -2867,24 +2644,20 @@ namespace orc {
 
     virtual void reset() override;
 
-  private:
+   private:
     std::unique_ptr<ByteRleEncoder> rleEncoder;
     std::vector<std::unique_ptr<ColumnWriter>> children;
   };
 
-  UnionColumnWriter::UnionColumnWriter(const Type& type,
-                                       const StreamsFactory& factory,
-                                       const WriterOptions& options) :
-    ColumnWriter(type, factory, options) {
-
+  UnionColumnWriter::UnionColumnWriter(const Type& type, const StreamsFactory& factory,
+                                       const WriterOptions& options)
+      : ColumnWriter(type, factory, options) {
     std::unique_ptr<BufferedOutputStream> dataStream =
-      factory.createStream(proto::Stream_Kind_DATA);
+        factory.createStream(proto::Stream_Kind_DATA);
     rleEncoder = createByteRleEncoder(std::move(dataStream));
 
     for (uint64_t i = 0; i != type.getSubtypeCount(); ++i) {
-      children.push_back(buildWriter(*type.getSubtype(i),
-                                     factory,
-                                     options));
+      children.push_back(buildWriter(*type.getSubtype(i), factory, options));
     }
 
     if (enableIndex) {
@@ -2892,9 +2665,7 @@ namespace orc {
     }
   }
 
-  void UnionColumnWriter::add(ColumnVectorBatch& rowBatch,
-                              uint64_t offset,
-                              uint64_t numValues,
+  void UnionColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
                               const char* incomingMask) {
     UnionVectorBatch* unionBatch = dynamic_cast<UnionVectorBatch*>(&rowBatch);
     if (unionBatch == nullptr) {
@@ -2903,10 +2674,9 @@ namespace orc {
 
     ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
 
-    const char* notNull = unionBatch->hasNulls ?
-                          unionBatch->notNull.data() + offset : nullptr;
-    unsigned char * tags = unionBatch->tags.data() + offset;
-    uint64_t * offsets = unionBatch->offsets.data() + offset;
+    const char* notNull = unionBatch->hasNulls ? unionBatch->notNull.data() + offset : nullptr;
+    unsigned char* tags = unionBatch->tags.data() + offset;
+    uint64_t* offsets = unionBatch->offsets.data() + offset;
 
     std::vector<int64_t> childOffset(children.size(), -1);
     std::vector<uint64_t> childLength(children.size(), 0);
@@ -2922,8 +2692,7 @@ namespace orc {
 
     for (uint32_t i = 0; i < children.size(); ++i) {
       if (childLength[i] > 0) {
-        children[i]->add(*unionBatch->children[i],
-                         static_cast<uint64_t>(childOffset[i]),
+        children[i]->add(*unionBatch->children[i], static_cast<uint64_t>(childOffset[i]),
                          childLength[i], nullptr);
       }
     }
@@ -2964,7 +2733,7 @@ namespace orc {
     }
   }
 
-  void UnionColumnWriter::writeIndex(std::vector<proto::Stream> &streams) const {
+  void UnionColumnWriter::writeIndex(std::vector<proto::Stream>& streams) const {
     ColumnWriter::writeIndex(streams);
     for (uint32_t i = 0; i < children.size(); ++i) {
       children[i]->writeIndex(streams);
@@ -2980,13 +2749,12 @@ namespace orc {
     return size;
   }
 
-  void UnionColumnWriter::getColumnEncoding(
-                     std::vector<proto::ColumnEncoding>& encodings) const {
+  void UnionColumnWriter::getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const {
     proto::ColumnEncoding encoding;
     encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
-    encoding.set_dictionarysize(0);
+    encoding.set_dictionary_size(0);
     if (enableBloomFilter) {
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+      encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
     }
     encodings.push_back(encoding);
     for (uint32_t i = 0; i < children.size(); ++i) {
@@ -2994,8 +2762,7 @@ namespace orc {
     }
   }
 
-  void UnionColumnWriter::getStripeStatistics(
-                     std::vector<proto::ColumnStatistics>& stats) const {
+  void UnionColumnWriter::getStripeStatistics(std::vector<proto::ColumnStatistics>& stats) const {
     ColumnWriter::getStripeStatistics(stats);
     for (uint32_t i = 0; i < children.size(); ++i) {
       children[i]->getStripeStatistics(stats);
@@ -3009,15 +2776,14 @@ namespace orc {
     }
   }
 
-  void UnionColumnWriter::getFileStatistics(
-                     std::vector<proto::ColumnStatistics>& stats) const {
+  void UnionColumnWriter::getFileStatistics(std::vector<proto::ColumnStatistics>& stats) const {
     ColumnWriter::getFileStatistics(stats);
     for (uint32_t i = 0; i < children.size(); ++i) {
       children[i]->getFileStatistics(stats);
     }
   }
 
-  void UnionColumnWriter::mergeRowGroupStatsIntoStripeStats()  {
+  void UnionColumnWriter::mergeRowGroupStatsIntoStripeStats() {
     ColumnWriter::mergeRowGroupStatsIntoStripeStats();
     for (uint32_t i = 0; i < children.size(); ++i) {
       children[i]->mergeRowGroupStatsIntoStripeStats();
@@ -3049,140 +2815,80 @@ namespace orc {
     }
   }
 
-  std::unique_ptr<ColumnWriter> buildWriter(
-                                            const Type& type,
-                                            const StreamsFactory& factory,
+  std::unique_ptr<ColumnWriter> buildWriter(const Type& type, const StreamsFactory& factory,
                                             const WriterOptions& options) {
     switch (static_cast<int64_t>(type.getKind())) {
       case STRUCT:
-        return std::unique_ptr<ColumnWriter>(
-          new StructColumnWriter(
-                                 type,
-                                 factory,
-                                 options));
+        return std::make_unique<StructColumnWriter>(type, factory, options);
+      case SHORT:
+        if (options.getUseTightNumericVector()) {
+          return std::make_unique<IntegerColumnWriter<ShortVectorBatch>>(type, factory, options);
+        }
+        return std::make_unique<IntegerColumnWriter<LongVectorBatch>>(type, factory, options);
       case INT:
+        if (options.getUseTightNumericVector()) {
+          return std::make_unique<IntegerColumnWriter<IntVectorBatch>>(type, factory, options);
+        }
+        return std::make_unique<IntegerColumnWriter<LongVectorBatch>>(type, factory, options);
       case LONG:
-      case SHORT:
-        return std::unique_ptr<ColumnWriter>(
-          new IntegerColumnWriter(
-                                  type,
-                                  factory,
-                                  options));
+        return std::make_unique<IntegerColumnWriter<LongVectorBatch>>(type, factory, options);
       case BYTE:
-        return std::unique_ptr<ColumnWriter>(
-          new ByteColumnWriter(
-                               type,
-                               factory,
-                               options));
+        if (options.getUseTightNumericVector()) {
+          return std::make_unique<ByteColumnWriter<ByteVectorBatch>>(type, factory, options);
+        }
+        return std::make_unique<ByteColumnWriter<LongVectorBatch>>(type, factory, options);
       case BOOLEAN:
-        return std::unique_ptr<ColumnWriter>(
-          new BooleanColumnWriter(
-                                  type,
-                                  factory,
-                                  options));
+        if (options.getUseTightNumericVector()) {
+          return std::make_unique<BooleanColumnWriter<ByteVectorBatch>>(type, factory, options);
+        }
+        return std::make_unique<BooleanColumnWriter<LongVectorBatch>>(type, factory, options);
       case DOUBLE:
-        return std::unique_ptr<ColumnWriter>(
-          new DoubleColumnWriter(
-                                 type,
-                                 factory,
-                                 options,
-                                 false));
+        return std::make_unique<FloatingColumnWriter<double, DoubleVectorBatch>>(type, factory,
+                                                                                 options, false);
       case FLOAT:
-        return std::unique_ptr<ColumnWriter>(
-          new DoubleColumnWriter(
-                                 type,
-                                 factory,
-                                 options,
-                                 true));
+        if (options.getUseTightNumericVector()) {
+          return std::make_unique<FloatingColumnWriter<float, FloatVectorBatch>>(type, factory,
+                                                                                 options, true);
+        }
+        return std::make_unique<FloatingColumnWriter<double, DoubleVectorBatch>>(type, factory,
+                                                                                 options, true);
       case BINARY:
-        return std::unique_ptr<ColumnWriter>(
-          new BinaryColumnWriter(
-                                 type,
-                                 factory,
-                                 options));
+        return std::make_unique<BinaryColumnWriter>(type, factory, options);
       case STRING:
-        return std::unique_ptr<ColumnWriter>(
-          new StringColumnWriter(
-                                 type,
-                                 factory,
-                                 options));
+        return std::make_unique<StringColumnWriter>(type, factory, options);
       case CHAR:
-        return std::unique_ptr<ColumnWriter>(
-          new CharColumnWriter(
-                               type,
-                               factory,
-                               options));
+        return std::make_unique<CharColumnWriter>(type, factory, options);
       case VARCHAR:
-        return std::unique_ptr<ColumnWriter>(
-          new VarCharColumnWriter(
-                                  type,
-                                  factory,
-                                  options));
+        return std::make_unique<VarCharColumnWriter>(type, factory, options);
       case DATE:
-        return std::unique_ptr<ColumnWriter>(
-          new DateColumnWriter(
-                               type,
-                               factory,
-                               options));
+        return std::make_unique<DateColumnWriter>(type, factory, options);
       case TIMESTAMP:
-        return std::unique_ptr<ColumnWriter>(
-          new TimestampColumnWriter(
-                                    type,
-                                    factory,
-                                    options,
-                                    false));
+        return std::make_unique<TimestampColumnWriter>(type, factory, options, false);
       case TIMESTAMP_INSTANT:
-        return std::unique_ptr<ColumnWriter>(
-          new TimestampColumnWriter(
-                                    type,
-                                    factory,
-                                    options,
-                                    true));
+        return std::make_unique<TimestampColumnWriter>(type, factory, options, true);
       case DECIMAL:
         if (type.getPrecision() <= Decimal64ColumnWriter::MAX_PRECISION_64) {
           if (options.getFileVersion() == FileVersion::UNSTABLE_PRE_2_0()) {
-            return std::unique_ptr<ColumnWriter>(
-              new Decimal64ColumnWriterV2(
-                                          type,
-                                          factory,
-                                          options));
+            return std::make_unique<Decimal64ColumnWriterV2>(type, factory, options);
           }
-          return std::unique_ptr<ColumnWriter>(
-            new Decimal64ColumnWriter(
-                                      type,
-                                      factory,
-                                      options));
+          return std::make_unique<Decimal64ColumnWriter>(type, factory, options);
         } else if (type.getPrecision() <= Decimal64ColumnWriter::MAX_PRECISION_128) {
-          return std::unique_ptr<ColumnWriter>(
-            new Decimal128ColumnWriter(
-                                       type,
-                                       factory,
-                                       options));
+          return std::make_unique<Decimal128ColumnWriter>(type, factory, options);
         } else {
-          throw NotImplementedYet("Decimal precision more than 38 is not "
-                                    "supported");
+          throw NotImplementedYet(
+              "Decimal precision more than 38 is not "
+              "supported");
         }
       case LIST:
-        return std::unique_ptr<ColumnWriter>(
-          new ListColumnWriter(
-                               type,
-                               factory,
-                               options));
+        return std::make_unique<ListColumnWriter>(type, factory, options);
       case MAP:
-        return std::unique_ptr<ColumnWriter>(
-          new MapColumnWriter(
-                              type,
-                              factory,
-                              options));
+        return std::make_unique<MapColumnWriter>(type, factory, options);
       case UNION:
-        return std::unique_ptr<ColumnWriter>(
-          new UnionColumnWriter(
-                                type,
-                                factory,
-                                options));
+        return std::make_unique<UnionColumnWriter>(type, factory, options);
       default:
-        throw NotImplementedYet("Type is not supported yet for creating "
-                                  "ColumnWriter.");
+        throw NotImplementedYet(
+            "Type is not supported yet for creating "
+            "ColumnWriter.");
     }
   }
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/ColumnWriter.hh b/contrib/libs/apache/orc/c++/src/ColumnWriter.hh
index 20983774c4..f21ffd6f83 100644
--- a/contrib/libs/apache/orc/c++/src/ColumnWriter.hh
+++ b/contrib/libs/apache/orc/c++/src/ColumnWriter.hh
@@ -24,15 +24,15 @@
 #include "BloomFilter.hh"
 #include "ByteRLE.hh"
 #include "Compression.hh"
-#include "orc/Exceptions.hh"
 #include "Statistics.hh"
+#include "orc/Exceptions.hh"
 
 #include "wrap/orc-proto-wrapper.hh"
 
 namespace orc {
 
   class StreamsFactory {
-  public:
+   public:
     virtual ~StreamsFactory();
 
     /**
@@ -40,29 +40,26 @@ namespace orc {
      * @param kind the kind of the stream
      * @return the buffered output stream
      */
-    virtual std::unique_ptr<BufferedOutputStream>
-                    createStream(proto::Stream_Kind kind) const = 0;
+    virtual std::unique_ptr<BufferedOutputStream> createStream(proto::Stream_Kind kind) const = 0;
   };
 
-  std::unique_ptr<StreamsFactory> createStreamsFactory(
-                                        const WriterOptions& options,
-                                        OutputStream * outStream);
+  std::unique_ptr<StreamsFactory> createStreamsFactory(const WriterOptions& options,
+                                                       OutputStream* outStream);
 
   /**
    * record stream positions for row index
    */
   class RowIndexPositionRecorder : public PositionRecorder {
-  public:
+   public:
     virtual ~RowIndexPositionRecorder() override;
 
-    RowIndexPositionRecorder(proto::RowIndexEntry& entry):
-      rowIndexEntry(entry) {}
+    RowIndexPositionRecorder(proto::RowIndexEntry& entry) : rowIndexEntry(entry) {}
 
     virtual void add(uint64_t pos) override {
       rowIndexEntry.add_positions(pos);
     }
 
-  private:
+   private:
     proto::RowIndexEntry& rowIndexEntry;
   };
 
@@ -70,7 +67,7 @@ namespace orc {
    * The interface for writing ORC data types.
    */
   class ColumnWriter {
-  protected:
+   protected:
     std::unique_ptr<ByteRleEncoder> notNullEncoder;
     uint64_t columnId;
     std::unique_ptr<MutableColumnStatistics> colIndexStatistics;
@@ -88,9 +85,8 @@ namespace orc {
     std::unique_ptr<BloomFilterImpl> bloomFilter;
     std::unique_ptr<proto::BloomFilterIndex> bloomFilterIndex;
 
-  public:
-    ColumnWriter(const Type& type, const StreamsFactory& factory,
-                 const WriterOptions& options);
+   public:
+    ColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options);
 
     virtual ~ColumnWriter();
 
@@ -103,10 +99,8 @@ namespace orc {
      *                     a mask (with at least numValues bytes) for which
      *                     values to write.
      */
-    virtual void add(ColumnVectorBatch& rowBatch,
-                     uint64_t offset,
-                     uint64_t numValues,
-                     const char * incomingMask);
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
+                     const char* incomingMask);
     /**
      * Flush column writer output streams.
      * @param streams vector to store streams generated by flush()
@@ -123,22 +117,19 @@ namespace orc {
      * Get the encoding used by the writer for this column.
      * @param encodings vector to store the returned ColumnEncoding info
      */
-    virtual void getColumnEncoding(
-      std::vector<proto::ColumnEncoding>& encodings) const = 0;
+    virtual void getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const = 0;
 
     /**
      * Get the stripe statistics for this column.
      * @param stats vector to store the returned stripe statistics
      */
-    virtual void getStripeStatistics(
-      std::vector<proto::ColumnStatistics>& stats) const;
+    virtual void getStripeStatistics(std::vector<proto::ColumnStatistics>& stats) const;
 
     /**
      * Get the file statistics for this column.
      * @param stats vector to store the returned file statistics
      */
-    virtual void getFileStatistics(
-      std::vector<proto::ColumnStatistics>& stats) const;
+    virtual void getFileStatistics(std::vector<proto::ColumnStatistics>& stats) const;
 
     /**
      * Merge index stats into stripe stats and reset index stats.
@@ -167,7 +158,7 @@ namespace orc {
      * Write row index streams for this column.
      * @param streams output list of ROW_INDEX streams
      */
-    virtual void writeIndex(std::vector<proto::Stream> &streams) const;
+    virtual void writeIndex(std::vector<proto::Stream>& streams) const;
 
     /**
      * Record positions for index.
@@ -188,22 +179,21 @@ namespace orc {
      */
     virtual void writeDictionary();
 
-  protected:
+   protected:
     /**
      * Utility function to translate ColumnStatistics into protobuf form and
      * add it to output list.
      * @param statsList output list for protobuf stats
      * @param stats ColumnStatistics to be transformed and added
      */
-     void getProtoBufStatistics(
-                                std::vector<proto::ColumnStatistics>& statsList,
-                                const MutableColumnStatistics* stats) const {
-       proto::ColumnStatistics pbStats;
-       stats->toProtoBuf(pbStats);
-       statsList.push_back(pbStats);
-     }
+    void getProtoBufStatistics(std::vector<proto::ColumnStatistics>& statsList,
+                               const MutableColumnStatistics* stats) const {
+      proto::ColumnStatistics pbStats;
+      stats->toProtoBuf(pbStats);
+      statsList.push_back(pbStats);
+    }
 
-  protected:
+   protected:
     MemoryPool& memPool;
     std::unique_ptr<BufferedOutputStream> indexStream;
     std::unique_ptr<BufferedOutputStream> bloomFilterStream;
@@ -213,10 +203,8 @@ namespace orc {
   /**
    * Create a writer for the given type.
    */
-  std::unique_ptr<ColumnWriter> buildWriter(
-                                            const Type& type,
-                                            const StreamsFactory& factory,
+  std::unique_ptr<ColumnWriter> buildWriter(const Type& type, const StreamsFactory& factory,
                                             const WriterOptions& options);
-}
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/src/Common.cc b/contrib/libs/apache/orc/c++/src/Common.cc
index 477bfd3b4c..cf2ff27ef1 100644
--- a/contrib/libs/apache/orc/c++/src/Common.cc
+++ b/contrib/libs/apache/orc/c++/src/Common.cc
@@ -82,6 +82,8 @@ namespace orc {
         return "Scritchley Go";
       case TRINO_WRITER:
         return "Trino";
+      case CUDF_WRITER:
+        return "CUDF";
       default: {
         std::ostringstream buffer;
         buffer << "Unknown(" << id << ")";
@@ -138,14 +140,14 @@ namespace orc {
     ss << majorVersion << '.' << minorVersion;
     return ss.str();
   }
-  
-  const FileVersion& FileVersion::v_0_11(){
-    static FileVersion version(0,11);
+
+  const FileVersion& FileVersion::v_0_11() {
+    static FileVersion version(0, 11);
     return version;
   }
-  
-  const FileVersion& FileVersion::v_0_12(){
-    static FileVersion version(0,12);
+
+  const FileVersion& FileVersion::v_0_12() {
+    static FileVersion version(0, 12);
     return version;
   }
 
@@ -156,9 +158,9 @@ namespace orc {
    * without providing any forward or backward compatibility.
    *
    * When 2.0 is released, this version identifier will be completely removed.
-  */
+   */
   const FileVersion& FileVersion::UNSTABLE_PRE_2_0() {
     static FileVersion version(1, 9999);
     return version;
   }
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/Compression.cc b/contrib/libs/apache/orc/c++/src/Compression.cc
index ea10171507..94be774ab4 100644
--- a/contrib/libs/apache/orc/c++/src/Compression.cc
+++ b/contrib/libs/apache/orc/c++/src/Compression.cc
@@ -16,13 +16,15 @@
  * limitations under the License.
  */
 
-#include "Adaptor.hh"
 #include "Compression.hh"
-#include "orc/Exceptions.hh"
+#include "Adaptor.hh"
 #include "LzoDecompressor.hh"
+#include "Utils.hh"
 #include "lz4.h"
+#include "orc/Exceptions.hh"
 
 #include <algorithm>
+#include <array>
 #include <iomanip>
 #include <iostream>
 #include <sstream>
@@ -47,28 +49,30 @@
 
 namespace orc {
 
-  class CompressionStreamBase: public BufferedOutputStream {
-  public:
-    CompressionStreamBase(OutputStream * outStream,
-                          int compressionLevel,
-                          uint64_t capacity,
-                          uint64_t blockSize,
-                          MemoryPool& pool);
+  class CompressionStreamBase : public BufferedOutputStream {
+   public:
+    CompressionStreamBase(OutputStream* outStream, int compressionLevel, uint64_t capacity,
+                          uint64_t blockSize, MemoryPool& pool, WriterMetrics* metrics);
 
-    virtual bool Next(void** data, int*size) override = 0;
+    virtual bool Next(void** data, int* size) override = 0;
     virtual void BackUp(int count) override;
 
     virtual std::string getName() const override = 0;
     virtual uint64_t flush() override;
+    virtual void suppress() override;
 
-    virtual bool isCompressed() const override { return true; }
+    virtual bool isCompressed() const override {
+      return true;
+    }
     virtual uint64_t getSize() const override;
 
-  protected:
-    void writeHeader(char * buffer, size_t compressedSize, bool original) {
-      buffer[0] = static_cast<char>((compressedSize << 1) + (original ? 1 : 0));
-      buffer[1] = static_cast<char>(compressedSize >> 7);
-      buffer[2] = static_cast<char>(compressedSize >> 15);
+   protected:
+    void writeData(const unsigned char* data, int size);
+
+    void writeHeader(size_t compressedSize, bool original) {
+      *header[0] = static_cast<char>((compressedSize << 1) + (original ? 1 : 0));
+      *header[1] = static_cast<char>(compressedSize >> 7);
+      *header[2] = static_cast<char>(compressedSize >> 15);
     }
 
     // ensure enough room for compression block header
@@ -81,7 +85,7 @@ namespace orc {
     int level;
 
     // Compressed data output buffer
-    char * outputBuffer;
+    char* outputBuffer;
 
     // Size for compressionBuffer
     int bufferSize;
@@ -91,24 +95,24 @@ namespace orc {
 
     // Compress output buffer size
     int outputSize;
+
+    // Compression block header pointer array
+    static const uint32_t HEADER_SIZE = 3;
+    std::array<char*, HEADER_SIZE> header;
   };
 
-  CompressionStreamBase::CompressionStreamBase(OutputStream * outStream,
-                                               int compressionLevel,
-                                               uint64_t capacity,
-                                               uint64_t blockSize,
-                                               MemoryPool& pool) :
-                                                BufferedOutputStream(pool,
-                                                                     outStream,
-                                                                     capacity,
-                                                                     blockSize),
-                                                rawInputBuffer(pool, blockSize),
-                                                level(compressionLevel),
-                                                outputBuffer(nullptr),
-                                                bufferSize(0),
-                                                outputPosition(0),
-                                                outputSize(0) {
-    // PASS
+  CompressionStreamBase::CompressionStreamBase(OutputStream* outStream, int compressionLevel,
+                                               uint64_t capacity, uint64_t blockSize,
+                                               MemoryPool& pool, WriterMetrics* metrics)
+      : BufferedOutputStream(pool, outStream, capacity, blockSize, metrics),
+        rawInputBuffer(pool, blockSize),
+        level(compressionLevel),
+        outputBuffer(nullptr),
+        bufferSize(0),
+        outputPosition(0),
+        outputSize(0) {
+    // init header pointer array
+    header.fill(nullptr);
   }
 
   void CompressionStreamBase::BackUp(int count) {
@@ -119,7 +123,7 @@ namespace orc {
   }
 
   uint64_t CompressionStreamBase::flush() {
-    void * data;
+    void* data;
     int size;
     if (!Next(&data, &size)) {
       throw std::runtime_error("Failed to flush compression buffer.");
@@ -129,79 +133,91 @@ namespace orc {
     return BufferedOutputStream::flush();
   }
 
+  void CompressionStreamBase::suppress() {
+    outputBuffer = nullptr;
+    bufferSize = outputPosition = outputSize = 0;
+    BufferedOutputStream::suppress();
+  }
+
   uint64_t CompressionStreamBase::getSize() const {
-    return BufferedOutputStream::getSize() -
-           static_cast<uint64_t>(outputSize - outputPosition);
+    return BufferedOutputStream::getSize() - static_cast<uint64_t>(outputSize - outputPosition);
+  }
+
+  // write the data content into outputBuffer
+  void CompressionStreamBase::writeData(const unsigned char* data, int size) {
+    int offset = 0;
+    while (offset < size) {
+      if (outputPosition == outputSize) {
+        if (!BufferedOutputStream::Next(reinterpret_cast<void**>(&outputBuffer), &outputSize)) {
+          throw std::runtime_error("Failed to get next output buffer from output stream.");
+        }
+        outputPosition = 0;
+      } else if (outputPosition > outputSize) {
+        // for safety this will unlikely happen
+        throw std::logic_error("Write to an out-of-bound place during compression!");
+      }
+      int currentSize = std::min(outputSize - outputPosition, size - offset);
+      memcpy(outputBuffer + outputPosition, data + offset, static_cast<size_t>(currentSize));
+      offset += currentSize;
+      outputPosition += currentSize;
+    }
   }
 
   void CompressionStreamBase::ensureHeader() {
     // adjust 3 bytes for the compression header
-    if (outputPosition + 3 >= outputSize) {
-      int newPosition = outputPosition + 3 - outputSize;
-      if (!BufferedOutputStream::Next(
-        reinterpret_cast<void **>(&outputBuffer),
-        &outputSize)) {
-        throw std::runtime_error(
-          "Failed to get next output buffer from output stream.");
+    for (uint32_t i = 0; i < HEADER_SIZE; ++i) {
+      if (outputPosition >= outputSize) {
+        if (!BufferedOutputStream::Next(reinterpret_cast<void**>(&outputBuffer), &outputSize)) {
+          throw std::runtime_error("Failed to get next output buffer from output stream.");
+        }
+        outputPosition = 0;
       }
-      outputPosition = newPosition;
-    } else {
-      outputPosition += 3;
+      header[i] = outputBuffer + outputPosition;
+      ++outputPosition;
     }
   }
 
   /**
    * Streaming compression base class
    */
-  class CompressionStream: public CompressionStreamBase {
-  public:
-    CompressionStream(OutputStream * outStream,
-                          int compressionLevel,
-                          uint64_t capacity,
-                          uint64_t blockSize,
-                          MemoryPool& pool);
-
-    virtual bool Next(void** data, int*size) override;
+  class CompressionStream : public CompressionStreamBase {
+   public:
+    CompressionStream(OutputStream* outStream, int compressionLevel, uint64_t capacity,
+                      uint64_t blockSize, MemoryPool& pool, WriterMetrics* metrics);
+
+    virtual bool Next(void** data, int* size) override;
     virtual std::string getName() const override = 0;
 
-  protected:
+   protected:
     // return total compressed size
     virtual uint64_t doStreamingCompression() = 0;
   };
 
-  CompressionStream::CompressionStream(OutputStream * outStream,
-                                       int compressionLevel,
-                                       uint64_t capacity,
-                                       uint64_t blockSize,
-                                       MemoryPool& pool) :
-                                         CompressionStreamBase(outStream,
-                                                               compressionLevel,
-                                                               capacity,
-                                                               blockSize,
-                                                               pool) {
+  CompressionStream::CompressionStream(OutputStream* outStream, int compressionLevel,
+                                       uint64_t capacity, uint64_t blockSize, MemoryPool& pool,
+                                       WriterMetrics* metrics)
+      : CompressionStreamBase(outStream, compressionLevel, capacity, blockSize, pool, metrics) {
     // PASS
   }
 
-  bool CompressionStream::Next(void** data, int*size) {
+  bool CompressionStream::Next(void** data, int* size) {
     if (bufferSize != 0) {
       ensureHeader();
 
+      uint64_t preSize = getSize();
       uint64_t totalCompressedSize = doStreamingCompression();
-
-      char * header = outputBuffer + outputPosition - totalCompressedSize - 3;
       if (totalCompressedSize >= static_cast<unsigned long>(bufferSize)) {
-        writeHeader(header, static_cast<size_t>(bufferSize), true);
-        memcpy(
-          header + 3,
-          rawInputBuffer.data(),
-          static_cast<size_t>(bufferSize));
-
-        int backup = static_cast<int>(totalCompressedSize) - bufferSize;
-        BufferedOutputStream::BackUp(backup);
-        outputPosition -= backup;
-        outputSize -= backup;
+        writeHeader(static_cast<size_t>(bufferSize), true);
+        // reset output buffer
+        outputBuffer = nullptr;
+        outputPosition = outputSize = 0;
+        uint64_t backup = getSize() - preSize;
+        BufferedOutputStream::BackUp(static_cast<int>(backup));
+
+        // copy raw input buffer into block buffer
+        writeData(rawInputBuffer.data(), bufferSize);
       } else {
-        writeHeader(header, totalCompressedSize, false);
+        writeHeader(totalCompressedSize, false);
       }
     }
 
@@ -212,13 +228,10 @@ namespace orc {
     return true;
   }
 
-  class ZlibCompressionStream: public CompressionStream {
-  public:
-    ZlibCompressionStream(OutputStream * outStream,
-                          int compressionLevel,
-                          uint64_t capacity,
-                          uint64_t blockSize,
-                          MemoryPool& pool);
+  class ZlibCompressionStream : public CompressionStream {
+   public:
+    ZlibCompressionStream(OutputStream* outStream, int compressionLevel, uint64_t capacity,
+                          uint64_t blockSize, MemoryPool& pool, WriterMetrics* metrics);
 
     virtual ~ZlibCompressionStream() override {
       end();
@@ -226,26 +239,19 @@ namespace orc {
 
     virtual std::string getName() const override;
 
-  protected:
+   protected:
     virtual uint64_t doStreamingCompression() override;
 
-  private:
+   private:
     void init();
     void end();
     z_stream strm;
   };
 
-  ZlibCompressionStream::ZlibCompressionStream(
-                        OutputStream * outStream,
-                        int compressionLevel,
-                        uint64_t capacity,
-                        uint64_t blockSize,
-                        MemoryPool& pool)
-                        : CompressionStream(outStream,
-                                            compressionLevel,
-                                            capacity,
-                                            blockSize,
-                                            pool) {
+  ZlibCompressionStream::ZlibCompressionStream(OutputStream* outStream, int compressionLevel,
+                                               uint64_t capacity, uint64_t blockSize,
+                                               MemoryPool& pool, WriterMetrics* metrics)
+      : CompressionStream(outStream, compressionLevel, capacity, blockSize, pool, metrics) {
     init();
   }
 
@@ -259,18 +265,13 @@ namespace orc {
 
     do {
       if (outputPosition >= outputSize) {
-        if (!BufferedOutputStream::Next(
-          reinterpret_cast<void **>(&outputBuffer),
-          &outputSize)) {
-          throw std::runtime_error(
-            "Failed to get next output buffer from output stream.");
+        if (!BufferedOutputStream::Next(reinterpret_cast<void**>(&outputBuffer), &outputSize)) {
+          throw std::runtime_error("Failed to get next output buffer from output stream.");
         }
         outputPosition = 0;
       }
-      strm.next_out = reinterpret_cast<unsigned char *>
-      (outputBuffer + outputPosition);
-      strm.avail_out = static_cast<unsigned int>
-      (outputSize - outputPosition);
+      strm.next_out = reinterpret_cast<unsigned char*>(outputBuffer + outputPosition);
+      strm.avail_out = static_cast<unsigned int>(outputSize - outputPosition);
 
       int ret = deflate(&strm, Z_FINISH);
       outputPosition = outputSize - static_cast<int>(strm.avail_out);
@@ -291,7 +292,7 @@ namespace orc {
     return "ZlibCompressionStream";
   }
 
-DIAGNOSTIC_PUSH
+  DIAGNOSTIC_PUSH
 
 #if defined(__GNUC__) || defined(__clang__)
   DIAGNOSTIC_IGNORE("-Wold-style-cast")
@@ -303,8 +304,7 @@ DIAGNOSTIC_PUSH
     strm.opaque = nullptr;
     strm.next_in = nullptr;
 
-    if (deflateInit2(&strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY)
-        != Z_OK) {
+    if (deflateInit2(&strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
       throw std::runtime_error("Error while calling deflateInit2() for zlib.");
     }
   }
@@ -313,42 +313,46 @@ DIAGNOSTIC_PUSH
     (void)deflateEnd(&strm);
   }
 
-DIAGNOSTIC_PUSH
+  DIAGNOSTIC_PUSH
 
-  enum DecompressState { DECOMPRESS_HEADER,
-                         DECOMPRESS_START,
-                         DECOMPRESS_CONTINUE,
-                         DECOMPRESS_ORIGINAL,
-                         DECOMPRESS_EOF};
+  enum DecompressState {
+    DECOMPRESS_HEADER,
+    DECOMPRESS_START,
+    DECOMPRESS_CONTINUE,
+    DECOMPRESS_ORIGINAL,
+    DECOMPRESS_EOF
+  };
 
   std::string decompressStateToString(DecompressState state) {
     switch (state) {
-      case DECOMPRESS_HEADER: return "DECOMPRESS_HEADER";
-      case DECOMPRESS_START: return "DECOMPRESS_START";
-      case DECOMPRESS_CONTINUE: return "DECOMPRESS_CONTINUE";
-      case DECOMPRESS_ORIGINAL: return "DECOMPRESS_ORIGINAL";
-      case DECOMPRESS_EOF: return "DECOMPRESS_EOF";
+      case DECOMPRESS_HEADER:
+        return "DECOMPRESS_HEADER";
+      case DECOMPRESS_START:
+        return "DECOMPRESS_START";
+      case DECOMPRESS_CONTINUE:
+        return "DECOMPRESS_CONTINUE";
+      case DECOMPRESS_ORIGINAL:
+        return "DECOMPRESS_ORIGINAL";
+      case DECOMPRESS_EOF:
+        return "DECOMPRESS_EOF";
     }
     return "unknown";
   }
 
   class DecompressionStream : public SeekableInputStream {
-  public:
-    DecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
-                        size_t bufferSize,
-                        MemoryPool& pool);
+   public:
+    DecompressionStream(std::unique_ptr<SeekableInputStream> inStream, size_t bufferSize,
+                        MemoryPool& pool, ReaderMetrics* metrics);
     virtual ~DecompressionStream() override {}
-    virtual bool Next(const void** data, int*size) override;
+    virtual bool Next(const void** data, int* size) override;
     virtual void BackUp(int count) override;
     virtual bool Skip(int count) override;
     virtual int64_t ByteCount() const override;
     virtual void seek(PositionProvider& position) override;
     virtual std::string getName() const override = 0;
 
-  protected:
-    virtual void NextDecompress(const void** data,
-                                int*size,
-                                size_t availableSize) = 0;
+   protected:
+    virtual void NextDecompress(const void** data, int* size, size_t availableSize) = 0;
 
     std::string getStreamName() const;
     void readBuffer(bool failOnEof);
@@ -366,8 +370,8 @@ DIAGNOSTIC_PUSH
 
     // The starting and current position of the buffer for the uncompressed
     // data. It either points to the data buffer or the underlying input stream.
-    const char *outputBufferStart;
-    const char *outputBuffer;
+    const char* outputBufferStart;
+    const char* outputBuffer;
     size_t outputBufferLength;
     // The uncompressed buffer length. For compressed chunk, it's the original
     // (ie. the overall) and the actual length of the decompressed data.
@@ -379,9 +383,9 @@ DIAGNOSTIC_PUSH
     size_t remainingLength;
 
     // the last buffer returned from the input
-    const char *inputBufferStart;
-    const char *inputBuffer;
-    const char *inputBufferEnd;
+    const char* inputBufferStart;
+    const char* inputBuffer;
+    const char* inputBufferEnd;
 
     // Variables for saving the position of the header and the start of the
     // buffer. Used when we have to seek a position.
@@ -390,37 +394,38 @@ DIAGNOSTIC_PUSH
 
     // roughly the number of bytes returned
     off_t bytesReturned;
+
+    ReaderMetrics* metrics;
   };
 
-  DecompressionStream::DecompressionStream(
-      std::unique_ptr<SeekableInputStream> inStream,
-      size_t bufferSize,
-      MemoryPool& _pool
-      ) : pool(_pool),
-          input(std::move(inStream)),
-          outputDataBuffer(pool, bufferSize),
-          state(DECOMPRESS_HEADER),
-          outputBufferStart(nullptr),
-          outputBuffer(nullptr),
-          outputBufferLength(0),
-          uncompressedBufferLength(0),
-          remainingLength(0),
-          inputBufferStart(nullptr),
-          inputBuffer(nullptr),
-          inputBufferEnd(nullptr),
-          headerPosition(0),
-          inputBufferStartPosition(0),
-          bytesReturned(0)  {
-  }
+  DecompressionStream::DecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
+                                           size_t bufferSize, MemoryPool& _pool,
+                                           ReaderMetrics* _metrics)
+      : pool(_pool),
+        input(std::move(inStream)),
+        outputDataBuffer(pool, bufferSize),
+        state(DECOMPRESS_HEADER),
+        outputBufferStart(nullptr),
+        outputBuffer(nullptr),
+        outputBufferLength(0),
+        uncompressedBufferLength(0),
+        remainingLength(0),
+        inputBufferStart(nullptr),
+        inputBuffer(nullptr),
+        inputBufferEnd(nullptr),
+        headerPosition(0),
+        inputBufferStartPosition(0),
+        bytesReturned(0),
+        metrics(_metrics) {}
 
   std::string DecompressionStream::getStreamName() const {
     return input->getName();
   }
 
   void DecompressionStream::readBuffer(bool failOnEof) {
+    SCOPED_MINUS_STOPWATCH(metrics, DecompressionLatencyUs);
     int length;
-    if (!input->Next(reinterpret_cast<const void**>(&inputBuffer),
-                      &length)) {
+    if (!input->Next(reinterpret_cast<const void**>(&inputBuffer), &length)) {
       if (failOnEof) {
         throw ParseError("Read past EOF in DecompressionStream::readBuffer");
       }
@@ -430,8 +435,7 @@ DIAGNOSTIC_PUSH
       inputBufferStart = nullptr;
     } else {
       inputBufferEnd = inputBuffer + length;
-      inputBufferStartPosition
-        = static_cast<size_t>(input->ByteCount() - length);
+      inputBufferStartPosition = static_cast<size_t>(input->ByteCount() - length);
       inputBufferStart = inputBuffer;
     }
   }
@@ -462,7 +466,8 @@ DIAGNOSTIC_PUSH
     }
   }
 
-  bool DecompressionStream::Next(const void** data, int*size) {
+  bool DecompressionStream::Next(const void** data, int* size) {
+    SCOPED_STOPWATCH(metrics, DecompressionLatencyUs, DecompressionCall);
     // If we are starting a new header, we will have to store its positions
     // after decompressing.
     bool saveBufferPositions = false;
@@ -478,8 +483,8 @@ DIAGNOSTIC_PUSH
     if (state == DECOMPRESS_HEADER || remainingLength == 0) {
       readHeader();
       // Here we already read the three bytes of the header.
-      headerPosition = inputBufferStartPosition
-        + static_cast<size_t>(inputBuffer - inputBufferStart) - 3;
+      headerPosition =
+          inputBufferStartPosition + static_cast<size_t>(inputBuffer - inputBufferStart) - 3;
       saveBufferPositions = true;
     }
     if (state == DECOMPRESS_EOF) {
@@ -489,8 +494,7 @@ DIAGNOSTIC_PUSH
       readBuffer(true);
     }
     size_t availableSize =
-      std::min(static_cast<size_t>(inputBufferEnd - inputBuffer),
-               remainingLength);
+        std::min(static_cast<size_t>(inputBufferEnd - inputBuffer), remainingLength);
     if (state == DECOMPRESS_ORIGINAL) {
       *data = inputBuffer;
       *size = static_cast<int>(availableSize);
@@ -501,8 +505,9 @@ DIAGNOSTIC_PUSH
     } else if (state == DECOMPRESS_START) {
       NextDecompress(data, size, availableSize);
     } else {
-      throw std::logic_error("Unknown compression state in "
-                             "DecompressionStream::Next");
+      throw std::logic_error(
+          "Unknown compression state in "
+          "DecompressionStream::Next");
     }
     bytesReturned += static_cast<off_t>(*size);
     if (saveBufferPositions) {
@@ -530,7 +535,7 @@ DIAGNOSTIC_PUSH
     // this is a stupid implementation for now.
     // should skip entire blocks without decompressing
     while (count > 0) {
-      const void *ptr;
+      const void* ptr;
       int len;
       if (!Next(&ptr, &len)) {
         return false;
@@ -560,10 +565,10 @@ DIAGNOSTIC_PUSH
     // Case 1: the seeked position is in the current chunk and it's buffered and
     // decompressed/uncompressed. Note that after the headerPosition comes the 3 bytes of
     // the header.
-    if (headerPosition == seekedHeaderPosition
-        && inputBufferStartPosition <= headerPosition + 3 && inputBufferStart) {
-      position.next(); // Skip the input level position, i.e. seekedHeaderPosition.
-      size_t posInChunk = position.next(); // Chunk level position.
+    if (headerPosition == seekedHeaderPosition && inputBufferStartPosition <= headerPosition + 3 &&
+        inputBufferStart) {
+      position.next();  // Skip the input level position, i.e. seekedHeaderPosition.
+      size_t posInChunk = position.next();  // Chunk level position.
       // Case 1.a: The position is in the decompressed/uncompressed buffer. Here we only
       // need to set the output buffer's pointer to the seeked position.
       if (uncompressedBufferLength >= posInChunk) {
@@ -575,9 +580,8 @@ DIAGNOSTIC_PUSH
       // Skip bytes to seek.
       if (!Skip(static_cast<int>(posInChunk - uncompressedBufferLength))) {
         std::ostringstream ss;
-        ss << "Bad seek to (chunkHeader=" << seekedHeaderPosition << ", posInChunk="
-           << posInChunk << ") in " << getName() << ". DecompressionState: "
-           << decompressStateToString(state);
+        ss << "Bad seek to (chunkHeader=" << seekedHeaderPosition << ", posInChunk=" << posInChunk
+           << ") in " << getName() << ". DecompressionState: " << decompressStateToString(state);
         throw ParseError(ss.str());
       }
       return;
@@ -592,15 +596,14 @@ DIAGNOSTIC_PUSH
       // Case 2: The input is buffered, but not yet decompressed. No need to
       // force re-reading the inputBuffer, we just have to move it to the
       // seeked position.
-      position.next(); // Skip the input level position.
-      inputBuffer
-        = inputBufferStart + (seekedHeaderPosition - inputBufferStartPosition);
+      position.next();  // Skip the input level position.
+      inputBuffer = inputBufferStart + (seekedHeaderPosition - inputBufferStartPosition);
     } else {
       // Case 3: The seeked position is not in the input buffer, here we are
       // forcing to read it.
       inputBuffer = nullptr;
       inputBufferEnd = nullptr;
-      input->seek(position); // Actually use the input level position.
+      input->seek(position);  // Actually use the input level position.
     }
     bytesReturned = static_cast<off_t>(input->ByteCount());
     if (!Skip(static_cast<int>(position.next()))) {
@@ -609,33 +612,29 @@ DIAGNOSTIC_PUSH
   }
 
   class ZlibDecompressionStream : public DecompressionStream {
-  public:
-    ZlibDecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
-                            size_t blockSize,
-                            MemoryPool& pool);
+   public:
+    ZlibDecompressionStream(std::unique_ptr<SeekableInputStream> inStream, size_t blockSize,
+                            MemoryPool& pool, ReaderMetrics* metrics);
     virtual ~ZlibDecompressionStream() override;
     virtual std::string getName() const override;
 
-  protected:
-    virtual void NextDecompress(const void** data,
-                                int* size,
-                                size_t availableSize) override;
-  private:
+   protected:
+    virtual void NextDecompress(const void** data, int* size, size_t availableSize) override;
+
+   private:
     z_stream zstream;
   };
 
-DIAGNOSTIC_PUSH
+  DIAGNOSTIC_PUSH
 
 #if defined(__GNUC__) || defined(__clang__)
   DIAGNOSTIC_IGNORE("-Wold-style-cast")
 #endif
 
-  ZlibDecompressionStream::ZlibDecompressionStream
-                   (std::unique_ptr<SeekableInputStream> inStream,
-                    size_t bufferSize,
-                    MemoryPool& _pool
-                    ): DecompressionStream
-                          (std::move(inStream), bufferSize, _pool) {
+  ZlibDecompressionStream::ZlibDecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
+                                                   size_t bufferSize, MemoryPool& _pool,
+                                                   ReaderMetrics* _metrics)
+      : DecompressionStream(std::move(inStream), bufferSize, _pool, _metrics) {
     zstream.next_in = nullptr;
     zstream.avail_in = 0;
     zstream.zalloc = nullptr;
@@ -645,20 +644,20 @@ DIAGNOSTIC_PUSH
     zstream.avail_out = static_cast<uInt>(outputDataBuffer.capacity());
     int64_t result = inflateInit2(&zstream, -15);
     switch (result) {
-    case Z_OK:
-      break;
-    case Z_MEM_ERROR:
-      throw std::logic_error("Memory error from inflateInit2");
-    case Z_VERSION_ERROR:
-      throw std::logic_error("Version error from inflateInit2");
-    case Z_STREAM_ERROR:
-      throw std::logic_error("Stream error from inflateInit2");
-    default:
-      throw std::logic_error("Unknown error from inflateInit2");
+      case Z_OK:
+        break;
+      case Z_MEM_ERROR:
+        throw std::logic_error("Memory error from inflateInit2");
+      case Z_VERSION_ERROR:
+        throw std::logic_error("Version error from inflateInit2");
+      case Z_STREAM_ERROR:
+        throw std::logic_error("Stream error from inflateInit2");
+      default:
+        throw std::logic_error("Unknown error from inflateInit2");
     }
   }
 
-DIAGNOSTIC_POP
+  DIAGNOSTIC_POP
 
   ZlibDecompressionStream::~ZlibDecompressionStream() {
     int64_t result = inflateEnd(&zstream);
@@ -668,49 +667,48 @@ DIAGNOSTIC_POP
     }
   }
 
-  void ZlibDecompressionStream::NextDecompress(const void** data, int* size,
-      size_t availableSize) {
-    zstream.next_in =
-      reinterpret_cast<Bytef*>(const_cast<char*>(inputBuffer));
+  void ZlibDecompressionStream::NextDecompress(const void** data, int* size, size_t availableSize) {
+    zstream.next_in = reinterpret_cast<Bytef*>(const_cast<char*>(inputBuffer));
     zstream.avail_in = static_cast<uInt>(availableSize);
     outputBuffer = outputDataBuffer.data();
-    zstream.next_out =
-      reinterpret_cast<Bytef*>(const_cast<char*>(outputBuffer));
+    zstream.next_out = reinterpret_cast<Bytef*>(const_cast<char*>(outputBuffer));
     zstream.avail_out = static_cast<uInt>(outputDataBuffer.capacity());
     if (inflateReset(&zstream) != Z_OK) {
-      throw std::logic_error("Bad inflateReset in "
-                              "ZlibDecompressionStream::NextDecompress");
+      throw std::logic_error(
+          "Bad inflateReset in "
+          "ZlibDecompressionStream::NextDecompress");
     }
     int64_t result;
     do {
-      result = inflate(&zstream, availableSize == remainingLength ? Z_FINISH :
-                        Z_SYNC_FLUSH);
+      result = inflate(&zstream, availableSize == remainingLength ? Z_FINISH : Z_SYNC_FLUSH);
       switch (result) {
-      case Z_OK:
-        remainingLength -= availableSize;
-        inputBuffer += availableSize;
-        readBuffer(true);
-        availableSize =
-          std::min(static_cast<size_t>(inputBufferEnd - inputBuffer),
-                    remainingLength);
-        zstream.next_in =
-          reinterpret_cast<Bytef*>(const_cast<char*>(inputBuffer));
-        zstream.avail_in = static_cast<uInt>(availableSize);
-        break;
-      case Z_STREAM_END:
-        break;
-      case Z_BUF_ERROR:
-        throw std::logic_error("Buffer error in "
-                                "ZlibDecompressionStream::NextDecompress");
-      case Z_DATA_ERROR:
-        throw std::logic_error("Data error in "
-                                "ZlibDecompressionStream::NextDecompress");
-      case Z_STREAM_ERROR:
-        throw std::logic_error("Stream error in "
-                                "ZlibDecompressionStream::NextDecompress");
-      default:
-        throw std::logic_error("Unknown error in "
-                                "ZlibDecompressionStream::NextDecompress");
+        case Z_OK:
+          remainingLength -= availableSize;
+          inputBuffer += availableSize;
+          readBuffer(true);
+          availableSize =
+              std::min(static_cast<size_t>(inputBufferEnd - inputBuffer), remainingLength);
+          zstream.next_in = reinterpret_cast<Bytef*>(const_cast<char*>(inputBuffer));
+          zstream.avail_in = static_cast<uInt>(availableSize);
+          break;
+        case Z_STREAM_END:
+          break;
+        case Z_BUF_ERROR:
+          throw std::logic_error(
+              "Buffer error in "
+              "ZlibDecompressionStream::NextDecompress");
+        case Z_DATA_ERROR:
+          throw std::logic_error(
+              "Data error in "
+              "ZlibDecompressionStream::NextDecompress");
+        case Z_STREAM_ERROR:
+          throw std::logic_error(
+              "Stream error in "
+              "ZlibDecompressionStream::NextDecompress");
+        default:
+          throw std::logic_error(
+              "Unknown error in "
+              "ZlibDecompressionStream::NextDecompress");
       }
     } while (result != Z_STREAM_END);
     *size = static_cast<int>(outputDataBuffer.capacity() - zstream.avail_out);
@@ -727,44 +725,38 @@ DIAGNOSTIC_POP
     return result.str();
   }
 
-  class BlockDecompressionStream: public DecompressionStream {
-  public:
-    BlockDecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
-                             size_t blockSize,
-                             MemoryPool& pool);
+  class BlockDecompressionStream : public DecompressionStream {
+   public:
+    BlockDecompressionStream(std::unique_ptr<SeekableInputStream> inStream, size_t blockSize,
+                             MemoryPool& pool, ReaderMetrics* metrics);
 
     virtual ~BlockDecompressionStream() override {}
     virtual std::string getName() const override = 0;
 
-  protected:
-    virtual void NextDecompress(const void** data,
-                                int* size,
-                                size_t availableSize) override;
+   protected:
+    virtual void NextDecompress(const void** data, int* size, size_t availableSize) override;
+
+    virtual uint64_t decompress(const char* input, uint64_t length, char* output,
+                                size_t maxOutputLength) = 0;
 
-    virtual uint64_t decompress(const char *input, uint64_t length,
-                                char *output, size_t maxOutputLength) = 0;
-  private:
+   private:
     // may need to stitch together multiple input buffers;
     // to give snappy a contiguous block
     DataBuffer<char> inputDataBuffer;
   };
 
-  BlockDecompressionStream::BlockDecompressionStream
-                   (std::unique_ptr<SeekableInputStream> inStream,
-                    size_t blockSize,
-                    MemoryPool& _pool
-                    ) : DecompressionStream
-                            (std::move(inStream), blockSize, _pool),
-                        inputDataBuffer(pool, blockSize) {
-  }
-
+  BlockDecompressionStream::BlockDecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
+                                                     size_t blockSize, MemoryPool& _pool,
+                                                     ReaderMetrics* _metrics)
+      : DecompressionStream(std::move(inStream), blockSize, _pool, _metrics),
+        inputDataBuffer(pool, blockSize) {}
 
   void BlockDecompressionStream::NextDecompress(const void** data, int* size,
-      size_t availableSize) {
+                                                size_t availableSize) {
     // Get contiguous bytes of compressed block.
-    const char *compressed = inputBuffer;
+    const char* compressed = inputBuffer;
     if (remainingLength == availableSize) {
-        inputBuffer += availableSize;
+      inputBuffer += availableSize;
     } else {
       // Did not read enough from input.
       if (inputDataBuffer.capacity() < remainingLength) {
@@ -774,19 +766,16 @@ DIAGNOSTIC_POP
       inputBuffer += availableSize;
       compressed = inputDataBuffer.data();
 
-      for (size_t pos = availableSize; pos < remainingLength; ) {
+      for (size_t pos = availableSize; pos < remainingLength;) {
         readBuffer(true);
         size_t avail =
-            std::min(static_cast<size_t>(inputBufferEnd -
-                                          inputBuffer),
-                      remainingLength - pos);
+            std::min(static_cast<size_t>(inputBufferEnd - inputBuffer), remainingLength - pos);
         ::memcpy(inputDataBuffer.data() + pos, inputBuffer, avail);
         pos += avail;
         inputBuffer += avail;
       }
     }
-    outputBufferLength = decompress(compressed, remainingLength,
-                                    outputDataBuffer.data(),
+    outputBufferLength = decompress(compressed, remainingLength, outputDataBuffer.data(),
                                     outputDataBuffer.capacity());
     remainingLength = 0;
     state = DECOMPRESS_HEADER;
@@ -796,15 +785,11 @@ DIAGNOSTIC_POP
     outputBufferLength = 0;
   }
 
-  class SnappyDecompressionStream: public BlockDecompressionStream {
-  public:
-    SnappyDecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
-                              size_t blockSize,
-                              MemoryPool& _pool
-                              ): BlockDecompressionStream
-                                 (std::move(inStream),
-                                  blockSize,
-                                  _pool) {
+  class SnappyDecompressionStream : public BlockDecompressionStream {
+   public:
+    SnappyDecompressionStream(std::unique_ptr<SeekableInputStream> inStream, size_t blockSize,
+                              MemoryPool& _pool, ReaderMetrics* _metrics)
+        : BlockDecompressionStream(std::move(inStream), blockSize, _pool, _metrics) {
       // PASS
     }
 
@@ -814,15 +799,12 @@ DIAGNOSTIC_POP
       return result.str();
     }
 
-  protected:
-    virtual uint64_t decompress(const char *input, uint64_t length,
-                                char *output, size_t maxOutputLength
-                                ) override;
+   protected:
+    virtual uint64_t decompress(const char* input, uint64_t length, char* output,
+                                size_t maxOutputLength) override;
   };
 
-  uint64_t SnappyDecompressionStream::decompress(const char *_input,
-                                                 uint64_t length,
-                                                 char *output,
+  uint64_t SnappyDecompressionStream::decompress(const char* _input, uint64_t length, char* output,
                                                  size_t maxOutputLength) {
     size_t outLength;
     if (!snappy::GetUncompressedLength(_input, length, &outLength)) {
@@ -839,15 +821,11 @@ DIAGNOSTIC_POP
     return outLength;
   }
 
-  class LzoDecompressionStream: public BlockDecompressionStream {
-  public:
-    LzoDecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
-                           size_t blockSize,
-                           MemoryPool& _pool
-                           ): BlockDecompressionStream
-                                  (std::move(inStream),
-                                   blockSize,
-                                   _pool) {
+  class LzoDecompressionStream : public BlockDecompressionStream {
+   public:
+    LzoDecompressionStream(std::unique_ptr<SeekableInputStream> inStream, size_t blockSize,
+                           MemoryPool& _pool, ReaderMetrics* _metrics)
+        : BlockDecompressionStream(std::move(inStream), blockSize, _pool, _metrics) {
       // PASS
     }
 
@@ -857,29 +835,21 @@ DIAGNOSTIC_POP
       return result.str();
     }
 
-  protected:
-    virtual uint64_t decompress(const char *input, uint64_t length,
-                                char *output, size_t maxOutputLength
-                                ) override;
+   protected:
+    virtual uint64_t decompress(const char* input, uint64_t length, char* output,
+                                size_t maxOutputLength) override;
   };
 
-  uint64_t LzoDecompressionStream::decompress(const char *inputPtr,
-                                              uint64_t length,
-                                              char *output,
+  uint64_t LzoDecompressionStream::decompress(const char* inputPtr, uint64_t length, char* output,
                                               size_t maxOutputLength) {
-    return lzoDecompress(inputPtr, inputPtr + length, output,
-                         output + maxOutputLength);
+    return lzoDecompress(inputPtr, inputPtr + length, output, output + maxOutputLength);
   }
 
-  class Lz4DecompressionStream: public BlockDecompressionStream {
-  public:
-    Lz4DecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
-                           size_t blockSize,
-                           MemoryPool& _pool
-                           ): BlockDecompressionStream
-                              (std::move(inStream),
-                               blockSize,
-                               _pool) {
+  class Lz4DecompressionStream : public BlockDecompressionStream {
+   public:
+    Lz4DecompressionStream(std::unique_ptr<SeekableInputStream> inStream, size_t blockSize,
+                           MemoryPool& _pool, ReaderMetrics* _metrics)
+        : BlockDecompressionStream(std::move(inStream), blockSize, _pool, _metrics) {
       // PASS
     }
 
@@ -889,15 +859,12 @@ DIAGNOSTIC_POP
       return result.str();
     }
 
-  protected:
-    virtual uint64_t decompress(const char *input, uint64_t length,
-                                char *output, size_t maxOutputLength
-                                ) override;
+   protected:
+    virtual uint64_t decompress(const char* input, uint64_t length, char* output,
+                                size_t maxOutputLength) override;
   };
 
-  uint64_t Lz4DecompressionStream::decompress(const char *inputPtr,
-                                              uint64_t length,
-                                              char *output,
+  uint64_t Lz4DecompressionStream::decompress(const char* inputPtr, uint64_t length, char* output,
                                               size_t maxOutputLength) {
     int result = LZ4_decompress_safe(inputPtr, output, static_cast<int>(length),
                                      static_cast<int>(maxOutputLength));
@@ -910,26 +877,20 @@ DIAGNOSTIC_POP
   /**
    * Block compression base class
    */
-  class BlockCompressionStream: public CompressionStreamBase {
-  public:
-    BlockCompressionStream(OutputStream * outStream,
-                           int compressionLevel,
-                           uint64_t capacity,
-                           uint64_t blockSize,
-                           MemoryPool& pool)
-                           : CompressionStreamBase(outStream,
-                                                   compressionLevel,
-                                                   capacity,
-                                                   blockSize,
-                                                   pool)
-                           , compressorBuffer(pool) {
+  class BlockCompressionStream : public CompressionStreamBase {
+   public:
+    BlockCompressionStream(OutputStream* outStream, int compressionLevel, uint64_t capacity,
+                           uint64_t blockSize, MemoryPool& pool, WriterMetrics* metrics)
+        : CompressionStreamBase(outStream, compressionLevel, capacity, blockSize, pool, metrics),
+          compressorBuffer(pool) {
       // PASS
     }
 
-    virtual bool Next(void** data, int*size) override;
+    virtual bool Next(void** data, int* size) override;
+    virtual void suppress() override;
     virtual std::string getName() const override = 0;
 
-  protected:
+   protected:
     // compresses a block and returns the compressed size
     virtual uint64_t doBlockCompression() = 0;
 
@@ -941,50 +902,27 @@ DIAGNOSTIC_POP
     DataBuffer<unsigned char> compressorBuffer;
   };
 
-  bool BlockCompressionStream::Next(void** data, int*size) {
+  bool BlockCompressionStream::Next(void** data, int* size) {
     if (bufferSize != 0) {
       ensureHeader();
 
       // perform compression
       size_t totalCompressedSize = doBlockCompression();
 
-      const unsigned char * dataToWrite = nullptr;
+      const unsigned char* dataToWrite = nullptr;
       int totalSizeToWrite = 0;
-      char * header = outputBuffer + outputPosition - 3;
 
       if (totalCompressedSize >= static_cast<size_t>(bufferSize)) {
-        writeHeader(header, static_cast<size_t>(bufferSize), true);
+        writeHeader(static_cast<size_t>(bufferSize), true);
         dataToWrite = rawInputBuffer.data();
         totalSizeToWrite = bufferSize;
       } else {
-        writeHeader(header, totalCompressedSize, false);
+        writeHeader(totalCompressedSize, false);
         dataToWrite = compressorBuffer.data();
         totalSizeToWrite = static_cast<int>(totalCompressedSize);
       }
 
-      char * dst = header + 3;
-      while (totalSizeToWrite > 0) {
-        if (outputPosition == outputSize) {
-          if (!BufferedOutputStream::Next(reinterpret_cast<void **>(&outputBuffer),
-                                          &outputSize)) {
-            throw std::logic_error(
-              "Failed to get next output buffer from output stream.");
-          }
-          outputPosition = 0;
-          dst = outputBuffer;
-        } else if (outputPosition > outputSize) {
-          // this will unlikely happen, but we have seen a few on zstd v1.1.0
-          throw std::logic_error("Write to an out-of-bound place!");
-        }
-
-        int sizeToWrite = std::min(totalSizeToWrite, outputSize - outputPosition);
-        std::memcpy(dst, dataToWrite, static_cast<size_t>(sizeToWrite));
-
-        outputPosition += sizeToWrite;
-        dataToWrite += sizeToWrite;
-        totalSizeToWrite -= sizeToWrite;
-        dst += sizeToWrite;
-      }
+      writeData(dataToWrite, totalSizeToWrite);
     }
 
     *data = rawInputBuffer.data();
@@ -995,52 +933,48 @@ DIAGNOSTIC_POP
     return true;
   }
 
+  void BlockCompressionStream::suppress() {
+    compressorBuffer.resize(0);
+    CompressionStreamBase::suppress();
+  }
+
   /**
    * LZ4 block compression
    */
-  class Lz4CompressionSteam: public BlockCompressionStream {
-  public:
-    Lz4CompressionSteam(OutputStream * outStream,
-                        int compressionLevel,
-                        uint64_t capacity,
-                        uint64_t blockSize,
-                        MemoryPool& pool)
-                        : BlockCompressionStream(outStream,
-                                                 compressionLevel,
-                                                 capacity,
-                                                 blockSize,
-                                                 pool) {
+  class Lz4CompressionSteam : public BlockCompressionStream {
+   public:
+    Lz4CompressionSteam(OutputStream* outStream, int compressionLevel, uint64_t capacity,
+                        uint64_t blockSize, MemoryPool& pool, WriterMetrics* metrics)
+        : BlockCompressionStream(outStream, compressionLevel, capacity, blockSize, pool, metrics) {
       this->init();
     }
 
     virtual std::string getName() const override {
       return "Lz4CompressionStream";
     }
-    
+
     virtual ~Lz4CompressionSteam() override {
       this->end();
     }
 
-  protected:
+   protected:
     virtual uint64_t doBlockCompression() override;
 
     virtual uint64_t estimateMaxCompressionSize() override {
       return static_cast<uint64_t>(LZ4_compressBound(bufferSize));
     }
 
-  private:
+   private:
     void init();
     void end();
-    LZ4_stream_t *state;
+    LZ4_stream_t* state;
   };
 
   uint64_t Lz4CompressionSteam::doBlockCompression() {
-    int result = LZ4_compress_fast_extState(static_cast<void*>(state),
-                                            reinterpret_cast<const char*>(rawInputBuffer.data()),
-                                            reinterpret_cast<char*>(compressorBuffer.data()),
-                                            bufferSize,
-                                            static_cast<int>(compressorBuffer.size()),
-                                            level);
+    int result = LZ4_compress_fast_extState(
+        static_cast<void*>(state), reinterpret_cast<const char*>(rawInputBuffer.data()),
+        reinterpret_cast<char*>(compressorBuffer.data()), bufferSize,
+        static_cast<int>(compressorBuffer.size()), level);
     if (result == 0) {
       throw std::runtime_error("Error during block compression using lz4.");
     }
@@ -1062,34 +996,25 @@ DIAGNOSTIC_POP
   /**
    * Snappy block compression
    */
-  class SnappyCompressionStream: public BlockCompressionStream {
-  public:
-    SnappyCompressionStream(OutputStream * outStream,
-                        int compressionLevel,
-                        uint64_t capacity,
-                        uint64_t blockSize,
-                        MemoryPool& pool)
-                        : BlockCompressionStream(outStream,
-                                                 compressionLevel,
-                                                 capacity,
-                                                 blockSize,
-                                                 pool) {
-    }
+  class SnappyCompressionStream : public BlockCompressionStream {
+   public:
+    SnappyCompressionStream(OutputStream* outStream, int compressionLevel, uint64_t capacity,
+                            uint64_t blockSize, MemoryPool& pool, WriterMetrics* metrics)
+        : BlockCompressionStream(outStream, compressionLevel, capacity, blockSize, pool, metrics) {}
 
     virtual std::string getName() const override {
       return "SnappyCompressionStream";
     }
-    
+
     virtual ~SnappyCompressionStream() override {
       // PASS
     }
 
-  protected:
+   protected:
     virtual uint64_t doBlockCompression() override;
 
     virtual uint64_t estimateMaxCompressionSize() override {
-      return static_cast<uint64_t>
-        (snappy::MaxCompressedLength(static_cast<size_t>(bufferSize)));
+      return static_cast<uint64_t>(snappy::MaxCompressedLength(static_cast<size_t>(bufferSize)));
     }
   };
 
@@ -1097,92 +1022,75 @@ DIAGNOSTIC_POP
     size_t compressedLength;
     snappy::RawCompress(reinterpret_cast<const char*>(rawInputBuffer.data()),
                         static_cast<size_t>(bufferSize),
-                        reinterpret_cast<char*>(compressorBuffer.data()),
-                        &compressedLength);
+                        reinterpret_cast<char*>(compressorBuffer.data()), &compressedLength);
     return static_cast<uint64_t>(compressedLength);
   }
 
   /**
    * ZSTD block compression
    */
-  class ZSTDCompressionStream: public BlockCompressionStream {
-  public:
-    ZSTDCompressionStream(OutputStream * outStream,
-                          int compressionLevel,
-                          uint64_t capacity,
-                          uint64_t blockSize,
-                          MemoryPool& pool)
-                          : BlockCompressionStream(outStream,
-                                                   compressionLevel,
-                                                   capacity,
-                                                   blockSize,
-                                                   pool) {
+  class ZSTDCompressionStream : public BlockCompressionStream {
+   public:
+    ZSTDCompressionStream(OutputStream* outStream, int compressionLevel, uint64_t capacity,
+                          uint64_t blockSize, MemoryPool& pool, WriterMetrics* metrics)
+        : BlockCompressionStream(outStream, compressionLevel, capacity, blockSize, pool, metrics) {
       this->init();
     }
 
     virtual std::string getName() const override {
       return "ZstdCompressionStream";
     }
-    
+
     virtual ~ZSTDCompressionStream() override {
       this->end();
     }
 
-  protected:
+   protected:
     virtual uint64_t doBlockCompression() override;
 
     virtual uint64_t estimateMaxCompressionSize() override {
       return ZSTD_compressBound(static_cast<size_t>(bufferSize));
     }
-    
-  private:
+
+   private:
     void init();
     void end();
-    ZSTD_CCtx *cctx;
+    ZSTD_CCtx* cctx;
   };
 
   uint64_t ZSTDCompressionStream::doBlockCompression() {
-    return ZSTD_compressCCtx(cctx,
-                             compressorBuffer.data(),
-                             compressorBuffer.size(),
-                             rawInputBuffer.data(),
-                             static_cast<size_t>(bufferSize),
-                             level);
+    return ZSTD_compressCCtx(cctx, compressorBuffer.data(), compressorBuffer.size(),
+                             rawInputBuffer.data(), static_cast<size_t>(bufferSize), level);
   }
-  
-DIAGNOSTIC_PUSH
+
+  DIAGNOSTIC_PUSH
 
 #if defined(__GNUC__) || defined(__clang__)
   DIAGNOSTIC_IGNORE("-Wold-style-cast")
 #endif
 
   void ZSTDCompressionStream::init() {
-
     cctx = ZSTD_createCCtx();
     if (!cctx) {
       throw std::runtime_error("Error while calling ZSTD_createCCtx() for zstd.");
     }
   }
 
-
   void ZSTDCompressionStream::end() {
     (void)ZSTD_freeCCtx(cctx);
     cctx = nullptr;
   }
 
-DIAGNOSTIC_PUSH
+  DIAGNOSTIC_PUSH
 
   /**
    * ZSTD block decompression
    */
-  class ZSTDDecompressionStream: public BlockDecompressionStream {
-  public:
-    ZSTDDecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
-                            size_t blockSize,
-                            MemoryPool& _pool)
-                            : BlockDecompressionStream(std::move(inStream),
-                                                       blockSize,
-                                                       _pool) {
+  class ZSTDDecompressionStream : public BlockDecompressionStream {
+   public:
+    ZSTDDecompressionStream(std::unique_ptr<SeekableInputStream> inStream, size_t blockSize,
+                            MemoryPool& _pool, ReaderMetrics* _metrics)
+        : BlockDecompressionStream(std::move(inStream), blockSize, _pool, _metrics) {
       this->init();
     }
 
@@ -1196,127 +1104,106 @@ DIAGNOSTIC_PUSH
       return result.str();
     }
 
-  protected:
-    virtual uint64_t decompress(const char *input,
-                                uint64_t length,
-                                char *output,
+   protected:
+    virtual uint64_t decompress(const char* input, uint64_t length, char* output,
                                 size_t maxOutputLength) override;
 
-  private:
+   private:
     void init();
     void end();
-    ZSTD_DCtx *dctx;
+    ZSTD_DCtx* dctx;
   };
 
-  uint64_t ZSTDDecompressionStream::decompress(const char *inputPtr,
-                                               uint64_t length,
-                                               char *output,
+  uint64_t ZSTDDecompressionStream::decompress(const char* inputPtr, uint64_t length, char* output,
                                                size_t maxOutputLength) {
-    return static_cast<uint64_t>(ZSTD_decompressDCtx(dctx,
-                                                     output,
-                                                     maxOutputLength,
-                                                     inputPtr,
-                                                     length));
+    return static_cast<uint64_t>(
+        ZSTD_decompressDCtx(dctx, output, maxOutputLength, inputPtr, length));
   }
 
-DIAGNOSTIC_PUSH
+  DIAGNOSTIC_PUSH
 
 #if defined(__GNUC__) || defined(__clang__)
   DIAGNOSTIC_IGNORE("-Wold-style-cast")
 #endif
 
   void ZSTDDecompressionStream::init() {
-
     dctx = ZSTD_createDCtx();
     if (!dctx) {
       throw std::runtime_error("Error while calling ZSTD_createDCtx() for zstd.");
     }
   }
 
-
   void ZSTDDecompressionStream::end() {
     (void)ZSTD_freeDCtx(dctx);
     dctx = nullptr;
   }
 
-DIAGNOSTIC_PUSH
+  DIAGNOSTIC_PUSH
 
-  std::unique_ptr<BufferedOutputStream>
-     createCompressor(
-                      CompressionKind kind,
-                      OutputStream * outStream,
-                      CompressionStrategy strategy,
-                      uint64_t bufferCapacity,
-                      uint64_t compressionBlockSize,
-                      MemoryPool& pool) {
+  std::unique_ptr<BufferedOutputStream> createCompressor(CompressionKind kind,
+                                                         OutputStream* outStream,
+                                                         CompressionStrategy strategy,
+                                                         uint64_t bufferCapacity,
+                                                         uint64_t compressionBlockSize,
+                                                         MemoryPool& pool, WriterMetrics* metrics) {
     switch (static_cast<int64_t>(kind)) {
-    case CompressionKind_NONE: {
-      return std::unique_ptr<BufferedOutputStream>
-        (new BufferedOutputStream(
-                pool, outStream, bufferCapacity, compressionBlockSize));
-    }
-    case CompressionKind_ZLIB: {
-      int level = (strategy == CompressionStrategy_SPEED) ?
-              Z_BEST_SPEED + 1 : Z_DEFAULT_COMPRESSION;
-      return std::unique_ptr<BufferedOutputStream>
-        (new ZlibCompressionStream(
-                outStream, level, bufferCapacity, compressionBlockSize, pool));
-    }
-    case CompressionKind_ZSTD: {
-      int level = (strategy == CompressionStrategy_SPEED) ?
-              1 : ZSTD_CLEVEL_DEFAULT;
-      return std::unique_ptr<BufferedOutputStream>
-        (new ZSTDCompressionStream(
-          outStream, level, bufferCapacity, compressionBlockSize, pool));
-    }
-    case CompressionKind_LZ4: {
-      int level = (strategy == CompressionStrategy_SPEED) ?
-              LZ4_ACCELERATION_MAX : LZ4_ACCELERATION_DEFAULT;
-      return std::unique_ptr<BufferedOutputStream>
-        (new Lz4CompressionSteam(
-          outStream, level, bufferCapacity, compressionBlockSize, pool));
-    }
-    case CompressionKind_SNAPPY: {
-      int level = 0;
-      return std::unique_ptr<BufferedOutputStream>
-        (new SnappyCompressionStream(
-          outStream, level, bufferCapacity, compressionBlockSize, pool));
-    }
-    case CompressionKind_LZO:
-    default:
-      throw NotImplementedYet("compression codec");
+      case CompressionKind_NONE: {
+        return std::make_unique<BufferedOutputStream>(pool, outStream, bufferCapacity,
+                                                      compressionBlockSize, metrics);
+      }
+      case CompressionKind_ZLIB: {
+        int level =
+            (strategy == CompressionStrategy_SPEED) ? Z_BEST_SPEED + 1 : Z_DEFAULT_COMPRESSION;
+        return std::make_unique<ZlibCompressionStream>(outStream, level, bufferCapacity,
+                                                       compressionBlockSize, pool, metrics);
+      }
+      case CompressionKind_ZSTD: {
+        int level = (strategy == CompressionStrategy_SPEED) ? 1 : ZSTD_CLEVEL_DEFAULT;
+        return std::make_unique<ZSTDCompressionStream>(outStream, level, bufferCapacity,
+                                                       compressionBlockSize, pool, metrics);
+      }
+      case CompressionKind_LZ4: {
+        int level = (strategy == CompressionStrategy_SPEED) ? LZ4_ACCELERATION_MAX
+                                                            : LZ4_ACCELERATION_DEFAULT;
+        return std::make_unique<Lz4CompressionSteam>(outStream, level, bufferCapacity,
+                                                     compressionBlockSize, pool, metrics);
+      }
+      case CompressionKind_SNAPPY: {
+        int level = 0;
+        return std::make_unique<SnappyCompressionStream>(outStream, level, bufferCapacity,
+                                                         compressionBlockSize, pool, metrics);
+      }
+      case CompressionKind_LZO:
+      default:
+        throw NotImplementedYet("compression codec");
     }
   }
 
-  std::unique_ptr<SeekableInputStream>
-     createDecompressor(CompressionKind kind,
-                        std::unique_ptr<SeekableInputStream> input,
-                        uint64_t blockSize,
-                        MemoryPool& pool) {
+  std::unique_ptr<SeekableInputStream> createDecompressor(
+      CompressionKind kind, std::unique_ptr<SeekableInputStream> input, uint64_t blockSize,
+      MemoryPool& pool, ReaderMetrics* metrics) {
     switch (static_cast<int64_t>(kind)) {
-    case CompressionKind_NONE:
-      return REDUNDANT_MOVE(input);
-    case CompressionKind_ZLIB:
-      return std::unique_ptr<SeekableInputStream>
-        (new ZlibDecompressionStream(std::move(input), blockSize, pool));
-    case CompressionKind_SNAPPY:
-      return std::unique_ptr<SeekableInputStream>
-        (new SnappyDecompressionStream(std::move(input), blockSize, pool));
-    case CompressionKind_LZO:
-      return std::unique_ptr<SeekableInputStream>
-        (new LzoDecompressionStream(std::move(input), blockSize, pool));
-    case CompressionKind_LZ4:
-      return std::unique_ptr<SeekableInputStream>
-        (new Lz4DecompressionStream(std::move(input), blockSize, pool));
-    case CompressionKind_ZSTD:
-      return std::unique_ptr<SeekableInputStream>
-        (new ZSTDDecompressionStream(std::move(input), blockSize, pool));
-    default: {
-      std::ostringstream buffer;
-      buffer << "Unknown compression codec " << kind;
-      throw NotImplementedYet(buffer.str());
-    }
+      case CompressionKind_NONE:
+        return input;
+      case CompressionKind_ZLIB:
+        return std::make_unique<ZlibDecompressionStream>(std::move(input), blockSize, pool,
+                                                         metrics);
+      case CompressionKind_SNAPPY:
+        return std::make_unique<SnappyDecompressionStream>(std::move(input), blockSize, pool,
+                                                           metrics);
+      case CompressionKind_LZO:
+        return std::make_unique<LzoDecompressionStream>(std::move(input), blockSize, pool, metrics);
+      case CompressionKind_LZ4:
+        return std::make_unique<Lz4DecompressionStream>(std::move(input), blockSize, pool, metrics);
+      case CompressionKind_ZSTD:
+        return std::make_unique<ZSTDDecompressionStream>(std::move(input), blockSize, pool,
+                                                         metrics);
+      default: {
+        std::ostringstream buffer;
+        buffer << "Unknown compression codec " << kind;
+        throw NotImplementedYet(buffer.str());
+      }
     }
   }
 
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/Compression.hh b/contrib/libs/apache/orc/c++/src/Compression.hh
index ff79377d83..55b152dd63 100644
--- a/contrib/libs/apache/orc/c++/src/Compression.hh
+++ b/contrib/libs/apache/orc/c++/src/Compression.hh
@@ -30,12 +30,11 @@ namespace orc {
    * @param input the input stream that is the underlying source
    * @param bufferSize the maximum size of the buffer
    * @param pool the memory pool
+   * @param metrics the reader metrics
    */
-  std::unique_ptr<SeekableInputStream>
-     createDecompressor(CompressionKind kind,
-                        std::unique_ptr<SeekableInputStream> input,
-                        uint64_t bufferSize,
-                        MemoryPool& pool);
+  std::unique_ptr<SeekableInputStream> createDecompressor(
+      CompressionKind kind, std::unique_ptr<SeekableInputStream> input, uint64_t bufferSize,
+      MemoryPool& pool, ReaderMetrics* metrics);
 
   /**
    * Create a compressor for the given compression kind.
@@ -46,13 +45,12 @@ namespace orc {
    * @param compressionBlockSize compression buffer block size
    * @param pool the memory pool
    */
-  std::unique_ptr<BufferedOutputStream>
-     createCompressor(CompressionKind kind,
-                      OutputStream * outStream,
-                      CompressionStrategy strategy,
-                      uint64_t bufferCapacity,
-                      uint64_t compressionBlockSize,
-                      MemoryPool& pool);
-}
+  std::unique_ptr<BufferedOutputStream> createCompressor(CompressionKind kind,
+                                                         OutputStream* outStream,
+                                                         CompressionStrategy strategy,
+                                                         uint64_t bufferCapacity,
+                                                         uint64_t compressionBlockSize,
+                                                         MemoryPool& pool, WriterMetrics* metrics);
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/src/ConvertColumnReader.cc b/contrib/libs/apache/orc/c++/src/ConvertColumnReader.cc
new file mode 100644
index 0000000000..459cafa1a0
--- /dev/null
+++ b/contrib/libs/apache/orc/c++/src/ConvertColumnReader.cc
@@ -0,0 +1,1001 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvertColumnReader.hh"
+
+namespace orc {
+
+  // Assume that we are using tight numeric vector batch
+  using BooleanVectorBatch = ByteVectorBatch;
+
+  ConvertColumnReader::ConvertColumnReader(const Type& _readType, const Type& fileType,
+                                           StripeStreams& stripe, bool _throwOnOverflow)
+      : ColumnReader(_readType, stripe), readType(_readType), throwOnOverflow(_throwOnOverflow) {
+    reader = buildReader(fileType, stripe, /*useTightNumericVector=*/true,
+                         /*throwOnOverflow=*/false, /*convertToReadType*/ false);
+    data =
+        fileType.createRowBatch(0, memoryPool, /*encoded=*/false, /*useTightNumericVector=*/true);
+  }
+
+  void ConvertColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) {
+    reader->next(*data, numValues, notNull);
+    rowBatch.resize(data->capacity);
+    rowBatch.numElements = data->numElements;
+    rowBatch.hasNulls = data->hasNulls;
+    if (!rowBatch.hasNulls) {
+      memset(rowBatch.notNull.data(), 1, data->notNull.size());
+    } else {
+      memcpy(rowBatch.notNull.data(), data->notNull.data(), data->notNull.size());
+    }
+  }
+
+  uint64_t ConvertColumnReader::skip(uint64_t numValues) {
+    return reader->skip(numValues);
+  }
+
+  void ConvertColumnReader::seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) {
+    reader->seekToRowGroup(positions);
+  }
+
+  static inline bool canFitInLong(double value) {
+    constexpr double MIN_LONG_AS_DOUBLE = -0x1p63;
+    constexpr double MAX_LONG_AS_DOUBLE_PLUS_ONE = 0x1p63;
+    return ((MIN_LONG_AS_DOUBLE - value < 1.0) && (value < MAX_LONG_AS_DOUBLE_PLUS_ONE));
+  }
+
+  template <typename FileType, typename ReadType>
+  static inline void handleOverflow(ColumnVectorBatch& dstBatch, uint64_t idx, bool shouldThrow) {
+    if (!shouldThrow) {
+      dstBatch.notNull.data()[idx] = 0;
+      dstBatch.hasNulls = true;
+    } else {
+      std::ostringstream ss;
+      ss << "Overflow when convert from " << typeid(FileType).name() << " to "
+         << typeid(ReadType).name();
+      throw SchemaEvolutionError(ss.str());
+    }
+  }
+
+  // return false if overflow
+  template <typename ReadType>
+  static bool downCastToInteger(ReadType& dstValue, int64_t inputLong) {
+    dstValue = static_cast<ReadType>(inputLong);
+    if constexpr (std::is_same<ReadType, int64_t>::value) {
+      return true;
+    }
+    if (static_cast<int64_t>(dstValue) != inputLong) {
+      return false;
+    }
+    return true;
+  }
+
+  template <typename DestBatchPtrType>
+  static inline DestBatchPtrType SafeCastBatchTo(ColumnVectorBatch* batch) {
+    auto result = dynamic_cast<DestBatchPtrType>(batch);
+    if (result == nullptr) {
+      std::ostringstream ss;
+      ss << "Bad cast when convert from ColumnVectorBatch to "
+         << typeid(typename std::remove_const<
+                       typename std::remove_pointer<DestBatchPtrType>::type>::type)
+                .name();
+      throw InvalidArgument(ss.str());
+    }
+    return result;
+  }
+
+  // set null or throw exception if overflow
+  template <typename ReadType, typename FileType>
+  static inline void convertNumericElement(const FileType& srcValue, ReadType& destValue,
+                                           ColumnVectorBatch& destBatch, uint64_t idx,
+                                           bool shouldThrow) {
+    constexpr bool isFileTypeFloatingPoint(std::is_floating_point<FileType>::value);
+    constexpr bool isReadTypeFloatingPoint(std::is_floating_point<ReadType>::value);
+    int64_t longValue = static_cast<int64_t>(srcValue);
+    if (isFileTypeFloatingPoint) {
+      if (isReadTypeFloatingPoint) {
+        destValue = static_cast<ReadType>(srcValue);
+      } else {
+        if (!canFitInLong(static_cast<double>(srcValue)) ||
+            !downCastToInteger(destValue, longValue)) {
+          handleOverflow<FileType, ReadType>(destBatch, idx, shouldThrow);
+        }
+      }
+    } else {
+      if (isReadTypeFloatingPoint) {
+        destValue = static_cast<ReadType>(srcValue);
+        if (destValue != destValue) {  // check is NaN
+          handleOverflow<FileType, ReadType>(destBatch, idx, shouldThrow);
+        }
+      } else {
+        if (!downCastToInteger(destValue, static_cast<int64_t>(srcValue))) {
+          handleOverflow<FileType, ReadType>(destBatch, idx, shouldThrow);
+        }
+      }
+    }
+  }
+
+  // { boolean, byte, short, int, long, float, double } ->
+  // { byte, short, int, long, float, double }
+  template <typename FileTypeBatch, typename ReadTypeBatch, typename ReadType>
+  class NumericConvertColumnReader : public ConvertColumnReader {
+   public:
+    NumericConvertColumnReader(const Type& _readType, const Type& fileType, StripeStreams& stripe,
+                               bool _throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override {
+      ConvertColumnReader::next(rowBatch, numValues, notNull);
+      const auto& srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get());
+      auto& dstBatch = *SafeCastBatchTo<ReadTypeBatch*>(&rowBatch);
+      if (rowBatch.hasNulls) {
+        for (uint64_t i = 0; i < rowBatch.numElements; ++i) {
+          if (rowBatch.notNull[i]) {
+            convertNumericElement<ReadType>(srcBatch.data[i], dstBatch.data[i], rowBatch, i,
+                                            throwOnOverflow);
+          }
+        }
+      } else {
+        for (uint64_t i = 0; i < rowBatch.numElements; ++i) {
+          convertNumericElement<ReadType>(srcBatch.data[i], dstBatch.data[i], rowBatch, i,
+                                          throwOnOverflow);
+        }
+      }
+    }
+  };
+
+  // { boolean, byte, short, int, long, float, double } -> { boolean }
+  template <typename FileTypeBatch>
+  class NumericConvertColumnReader<FileTypeBatch, BooleanVectorBatch, bool>
+      : public ConvertColumnReader {
+   public:
+    NumericConvertColumnReader(const Type& _readType, const Type& fileType, StripeStreams& stripe,
+                               bool _throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override {
+      ConvertColumnReader::next(rowBatch, numValues, notNull);
+      const auto& srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get());
+      auto& dstBatch = *SafeCastBatchTo<BooleanVectorBatch*>(&rowBatch);
+      if (rowBatch.hasNulls) {
+        for (uint64_t i = 0; i < rowBatch.numElements; ++i) {
+          if (rowBatch.notNull[i]) {
+            dstBatch.data[i] = (static_cast<int64_t>(srcBatch.data[i]) == 0 ? 0 : 1);
+          }
+        }
+      } else {
+        for (uint64_t i = 0; i < rowBatch.numElements; ++i) {
+          dstBatch.data[i] = (static_cast<int64_t>(srcBatch.data[i]) == 0 ? 0 : 1);
+        }
+      }
+    }
+  };
+
+  class ConvertToStringVariantColumnReader : public ConvertColumnReader {
+   public:
+    ConvertToStringVariantColumnReader(const Type& _readType, const Type& fileType,
+                                       StripeStreams& stripe, bool _throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
+
+    virtual uint64_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) = 0;
+
+   protected:
+    std::vector<std::string> strBuffer;
+  };
+
+  void ConvertToStringVariantColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                                char* notNull) {
+    ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+    // cache converted string in the buffer
+    auto totalLength = convertToStrBuffer(rowBatch, numValues);
+
+    // contact string values to blob buffer of vector batch
+    auto& dstBatch = *SafeCastBatchTo<StringVectorBatch*>(&rowBatch);
+    dstBatch.blob.resize(totalLength);
+    char* blob = dstBatch.blob.data();
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+        const auto size = strBuffer[i].size();
+        ::memcpy(blob, strBuffer[i].c_str(), size);
+        dstBatch.data[i] = blob;
+        dstBatch.length[i] = static_cast<int32_t>(size);
+        blob += size;
+      }
+    }
+    strBuffer.clear();
+  }
+
+  class BooleanToStringVariantColumnReader : public ConvertToStringVariantColumnReader {
+   public:
+    BooleanToStringVariantColumnReader(const Type& _readType, const Type& fileType,
+                                       StripeStreams& stripe, bool _throwOnOverflow)
+        : ConvertToStringVariantColumnReader(_readType, fileType, stripe, _throwOnOverflow) {
+      trueValue = "TRUE";
+      falseValue = "FALSE";
+      if (readType.getKind() == CHAR || readType.getKind() == VARCHAR) {
+        if (readType.getMaximumLength() < 5) {
+          throw SchemaEvolutionError("Invalid maximum length for boolean type: " +
+                                     std::to_string(readType.getMaximumLength()));
+        }
+        if (readType.getKind() == CHAR) {
+          trueValue.resize(readType.getMaximumLength(), ' ');
+          falseValue.resize(readType.getMaximumLength(), ' ');
+        }
+      }
+    }
+
+    uint64_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) override;
+
+   private:
+    std::string trueValue;
+    std::string falseValue;
+  };
+
+  uint64_t BooleanToStringVariantColumnReader::convertToStrBuffer(ColumnVectorBatch& rowBatch,
+                                                                  uint64_t numValues) {
+    uint64_t size = 0;
+    strBuffer.resize(numValues);
+    const auto& srcBatch = *SafeCastBatchTo<const BooleanVectorBatch*>(data.get());
+    // cast the bool value to string
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+        strBuffer[i] = (srcBatch.data[i] ? trueValue : falseValue);
+        size += strBuffer[i].size();
+      }
+    }
+    return size;
+  }
+
+  template <typename FileTypeBatch>
+  class NumericToStringVariantColumnReader : public ConvertToStringVariantColumnReader {
+   public:
+    NumericToStringVariantColumnReader(const Type& _readType, const Type& fileType,
+                                       StripeStreams& stripe, bool _throwOnOverflow)
+        : ConvertToStringVariantColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+    uint64_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) override;
+  };
+
+  template <typename FileTypeBatch>
+  uint64_t NumericToStringVariantColumnReader<FileTypeBatch>::convertToStrBuffer(
+      ColumnVectorBatch& rowBatch, uint64_t numValues) {
+    uint64_t size = 0;
+    strBuffer.resize(numValues);
+    const auto& srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get());
+    if (readType.getKind() == STRING) {
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          strBuffer[i] = std::to_string(srcBatch.data[i]);
+          size += strBuffer[i].size();
+        }
+      }
+    } else if (readType.getKind() == VARCHAR) {
+      const auto maxLength = readType.getMaximumLength();
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          strBuffer[i] = std::to_string(srcBatch.data[i]);
+          if (strBuffer[i].size() > maxLength) {
+            handleOverflow<decltype(srcBatch.data[i]), std::string>(rowBatch, i, throwOnOverflow);
+          } else {
+            size += strBuffer[i].size();
+          }
+        }
+      }
+    } else if (readType.getKind() == CHAR) {
+      const auto maxLength = readType.getMaximumLength();
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          strBuffer[i] = std::to_string(srcBatch.data[i]);
+          if (strBuffer[i].size() > maxLength) {
+            handleOverflow<decltype(srcBatch.data[i]), std::string>(rowBatch, i, throwOnOverflow);
+          } else {
+            strBuffer[i].resize(maxLength, ' ');
+            size += strBuffer[i].size();
+          }
+        }
+      }
+    } else {
+      throw SchemaEvolutionError("Invalid type for numeric to string conversion: " +
+                                 readType.toString());
+    }
+    return size;
+  }
+
+  template <typename FileTypeBatch, typename ReadTypeBatch, bool isFloatingFileType>
+  class NumericToDecimalColumnReader : public ConvertColumnReader {
+   public:
+    NumericToDecimalColumnReader(const Type& _readType, const Type& fileType, StripeStreams& stripe,
+                                 bool _throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {
+      precision = static_cast<int32_t>(readType.getPrecision());
+      scale = static_cast<int32_t>(readType.getScale());
+      bool overflow = false;
+      upperBound = scaleUpInt128ByPowerOfTen(1, precision, overflow);
+    }
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override {
+      ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+      const auto& srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get());
+      auto& dstBatch = *SafeCastBatchTo<ReadTypeBatch*>(&rowBatch);
+      dstBatch.precision = precision;
+      dstBatch.scale = scale;
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          if constexpr (isFloatingFileType) {
+            convertDoubleToDecimal(dstBatch, i, srcBatch.data[i]);
+          } else {
+            convertIntegerToDecimal(dstBatch, i, srcBatch.data[i]);
+          }
+        }
+      }
+    }
+
+   private:
+    template <typename SrcType>
+    void convertDoubleToDecimal(ReadTypeBatch& dstBatch, uint64_t idx, SrcType value) {
+      const auto result = convertDecimal(value, precision, scale);
+      Int128 i128 = result.second;
+      if (result.first) {
+        handleOverflow<SrcType, decltype(dstBatch.values[idx])>(dstBatch, idx, throwOnOverflow);
+        return;
+      }
+
+      if constexpr (std::is_same<ReadTypeBatch, Decimal64VectorBatch>::value) {
+        if (!i128.fitsInLong()) {
+          handleOverflow<SrcType, decltype(dstBatch.values[idx])>(dstBatch, idx, throwOnOverflow);
+        } else {
+          dstBatch.values[idx] = i128.toLong();
+        }
+      } else {
+        dstBatch.values[idx] = i128;
+      }
+    }
+
+    template <typename SrcType>
+    void convertIntegerToDecimal(ReadTypeBatch& dstBatch, uint64_t idx, SrcType value) {
+      int fromScale = 0;
+      auto result = convertDecimal(value, fromScale, precision, scale);
+      if (result.first) {
+        handleOverflow<SrcType, decltype(dstBatch.values[idx])>(dstBatch, idx, throwOnOverflow);
+      } else {
+        if constexpr (std::is_same<ReadTypeBatch, Decimal64VectorBatch>::value) {
+          if (!result.second.fitsInLong()) {
+            handleOverflow<SrcType, decltype(dstBatch.values[idx])>(dstBatch, idx, throwOnOverflow);
+          } else {
+            dstBatch.values[idx] = result.second.toLong();
+          }
+        } else {
+          dstBatch.values[idx] = result.second;
+        }
+      }
+    }
+
+    int32_t precision;
+    int32_t scale;
+    int64_t scaleMultiplier;
+    Int128 upperBound;
+  };
+
+  class ConvertToTimestampColumnReader : public ConvertColumnReader {
+   public:
+    ConvertToTimestampColumnReader(const Type& _readType, const Type& fileType,
+                                   StripeStreams& stripe, bool _throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow),
+          readerTimezone(readType.getKind() == TIMESTAMP_INSTANT ? &getTimezoneByName("GMT")
+                                                                 : &stripe.getReaderTimezone()),
+          needConvertTimezone(readerTimezone != &getTimezoneByName("GMT")) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
+
+   protected:
+    const orc::Timezone* readerTimezone;
+    const bool needConvertTimezone;
+  };
+
+  // avoid emitting vtable in every translation unit
+  void ConvertToTimestampColumnReader::next(ColumnVectorBatch& rowBatch, uint64_t numValues,
+                                            char* notNull) {
+    ConvertColumnReader::next(rowBatch, numValues, notNull);
+  }
+
+  template <typename FileTypeBatch>
+  class NumericToTimestampColumnReader : public ConvertToTimestampColumnReader {
+   public:
+    NumericToTimestampColumnReader(const Type& _readType, const Type& fileType,
+                                   StripeStreams& stripe, bool _throwOnOverflow)
+        : ConvertToTimestampColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override {
+      ConvertToTimestampColumnReader::next(rowBatch, numValues, notNull);
+
+      const auto& srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get());
+      auto& dstBatch = *SafeCastBatchTo<TimestampVectorBatch*>(&rowBatch);
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          convertToTimestamp(dstBatch, i, srcBatch.data[i]);
+        }
+      }
+    }
+
+   private:
+    template <typename FileType>
+    void convertToTimestamp(TimestampVectorBatch& dstBatch, uint64_t idx, FileType value);
+  };
+
+  template <typename FileTypeBatch>
+  template <typename FileType>
+  void NumericToTimestampColumnReader<FileTypeBatch>::convertToTimestamp(
+      TimestampVectorBatch& dstBatch, uint64_t idx, FileType value) {
+    if constexpr (std::is_floating_point<FileType>::value) {
+      if (value > static_cast<FileType>(std::numeric_limits<int64_t>::max()) ||
+          value < static_cast<FileType>(std::numeric_limits<int64_t>::min())) {
+        handleOverflow<FileType, int64_t>(dstBatch, idx, throwOnOverflow);
+        return;
+      }
+      dstBatch.data[idx] = static_cast<int64_t>(value);
+      dstBatch.nanoseconds[idx] = static_cast<int32_t>(
+          static_cast<double>(value - static_cast<FileType>(dstBatch.data[idx])) * 1e9);
+      if (dstBatch.nanoseconds[idx] < 0) {
+        dstBatch.data[idx] -= 1;
+        dstBatch.nanoseconds[idx] += static_cast<int32_t>(1e9);
+      }
+    } else {
+      dstBatch.data[idx] = value;
+      dstBatch.nanoseconds[idx] = 0;
+    }
+    if (needConvertTimezone) {
+      dstBatch.data[idx] = readerTimezone->convertFromUTC(dstBatch.data[idx]);
+    }
+  }
+
+  template <typename FileTypeBatch, typename ReadTypeBatch, typename ReadType>
+  class DecimalToNumericColumnReader : public ConvertColumnReader {
+   public:
+    DecimalToNumericColumnReader(const Type& _readType, const Type& fileType, StripeStreams& stripe,
+                                 bool _throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {
+      precision = fileType.getPrecision();
+      scale = fileType.getScale();
+      factor = 1;
+      for (int i = 0; i < scale; i++) {
+        factor *= 10;
+      }
+    }
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override {
+      ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+      const auto& srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get());
+      auto& dstBatch = *SafeCastBatchTo<ReadTypeBatch*>(&rowBatch);
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          if constexpr (std::is_floating_point_v<ReadType>) {
+            convertDecimalToDouble(dstBatch, i, srcBatch);
+          } else {
+            convertDecimalToInteger(dstBatch, i, srcBatch);
+          }
+        }
+      }
+    }
+
+   private:
+    void convertDecimalToInteger(ReadTypeBatch& dstBatch, uint64_t idx,
+                                 const FileTypeBatch& srcBatch) {
+      using FileType = decltype(srcBatch.values[idx]);
+      Int128 result = scaleDownInt128ByPowerOfTen(srcBatch.values[idx], scale);
+      if (!result.fitsInLong()) {
+        handleOverflow<FileType, ReadType>(dstBatch, idx, throwOnOverflow);
+        return;
+      }
+      convertNumericElement<ReadType, int64_t>(result.toLong(), dstBatch.data[idx], dstBatch, idx,
+                                               throwOnOverflow);
+    }
+
+    void convertDecimalToDouble(ReadTypeBatch& dstBatch, uint64_t idx,
+                                const FileTypeBatch& srcBatch) {
+      double doubleValue = Int128(srcBatch.values[idx]).toDouble();
+      dstBatch.data[idx] = static_cast<ReadType>(doubleValue) / static_cast<ReadType>(factor);
+    }
+
+    int32_t precision;
+    int32_t scale;
+    int64_t factor;
+  };
+
+  template <typename FileTypeBatch>
+  class DecimalToNumericColumnReader<FileTypeBatch, BooleanVectorBatch, bool>
+      : public ConvertColumnReader {
+   public:
+    DecimalToNumericColumnReader(const Type& _readType, const Type& fileType, StripeStreams& stripe,
+                                 bool _throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override {
+      ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+      const auto& srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get());
+      auto& dstBatch = *SafeCastBatchTo<BooleanVectorBatch*>(&rowBatch);
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          dstBatch.data[i] = srcBatch.values[i] == 0 ? 0 : 1;
+        }
+      }
+    }
+  };
+
+  template <typename FileTypeBatch, typename ReadTypeBatch>
+  class DecimalConvertColumnReader : public ConvertColumnReader {
+   public:
+    DecimalConvertColumnReader(const Type& _readType, const Type& fileType, StripeStreams& stripe,
+                               bool _throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {
+      fromPrecision = fileType.getPrecision();
+      fromScale = fileType.getScale();
+      toPrecision = _readType.getPrecision();
+      toScale = _readType.getScale();
+    }
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override {
+      ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+      const auto& srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get());
+      auto& dstBatch = *SafeCastBatchTo<ReadTypeBatch*>(&rowBatch);
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          convertDecimalToDecimal(dstBatch, i, srcBatch);
+        }
+      }
+    }
+
+   private:
+    void convertDecimalToDecimal(ReadTypeBatch& dstBatch, uint64_t idx,
+                                 const FileTypeBatch& srcBatch) {
+      using FileType = decltype(srcBatch.values[idx]);
+      using ReadType = decltype(dstBatch.values[idx]);
+
+      auto [overflows, resultI128] =
+          convertDecimal(srcBatch.values[idx], fromScale, toPrecision, toScale);
+      if (overflows) {
+        handleOverflow<FileType, ReadType>(dstBatch, idx, throwOnOverflow);
+      }
+      if constexpr (std::is_same_v<ReadTypeBatch, Decimal64VectorBatch>) {
+        if (!resultI128.fitsInLong()) {
+          handleOverflow<FileType, ReadType>(dstBatch, idx, throwOnOverflow);
+        } else {
+          dstBatch.values[idx] = resultI128.toLong();
+        }
+      } else {
+        dstBatch.values[idx] = resultI128;
+      }
+    }
+
+    int32_t fromPrecision;
+    int32_t fromScale;
+    int32_t toPrecision;
+    int32_t toScale;
+  };
+
+#define DEFINE_NUMERIC_CONVERT_READER(FROM, TO, TYPE) \
+  using FROM##To##TO##ColumnReader =                  \
+      NumericConvertColumnReader<FROM##VectorBatch, TO##VectorBatch, TYPE>;
+
+#define DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(FROM, TO) \
+  using FROM##To##TO##ColumnReader = NumericToStringVariantColumnReader<FROM##VectorBatch>;
+
+#define DEFINE_NUMERIC_CONVERT_TO_DECIMAL_READER(FROM, IS_FROM_FLOATING)                       \
+  using FROM##To##Decimal64##ColumnReader =                                                    \
+      NumericToDecimalColumnReader<FROM##VectorBatch, Decimal64VectorBatch, IS_FROM_FLOATING>; \
+  using FROM##To##Decimal128##ColumnReader =                                                   \
+      NumericToDecimalColumnReader<FROM##VectorBatch, Decimal128VectorBatch, IS_FROM_FLOATING>;
+
+#define DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER(FROM) \
+  using FROM##ToTimestampColumnReader = NumericToTimestampColumnReader<FROM##VectorBatch>;
+
+#define DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(TO, TYPE)                       \
+  using Decimal64##To##TO##ColumnReader =                                        \
+      DecimalToNumericColumnReader<Decimal64VectorBatch, TO##VectorBatch, TYPE>; \
+  using Decimal128##To##TO##ColumnReader =                                       \
+      DecimalToNumericColumnReader<Decimal128VectorBatch, TO##VectorBatch, TYPE>;
+
+#define DEFINE_DECIMAL_CONVERT_TO_DECIMAL_READER(TO)                     \
+  using Decimal64##To##TO##ColumnReader =                                \
+      DecimalConvertColumnReader<Decimal64VectorBatch, TO##VectorBatch>; \
+  using Decimal128##To##TO##ColumnReader =                               \
+      DecimalConvertColumnReader<Decimal128VectorBatch, TO##VectorBatch>;
+
+  DEFINE_NUMERIC_CONVERT_READER(Boolean, Byte, int8_t)
+  DEFINE_NUMERIC_CONVERT_READER(Boolean, Short, int16_t)
+  DEFINE_NUMERIC_CONVERT_READER(Boolean, Int, int32_t)
+  DEFINE_NUMERIC_CONVERT_READER(Boolean, Long, int64_t)
+  DEFINE_NUMERIC_CONVERT_READER(Byte, Short, int16_t)
+  DEFINE_NUMERIC_CONVERT_READER(Byte, Int, int32_t)
+  DEFINE_NUMERIC_CONVERT_READER(Byte, Long, int64_t)
+  DEFINE_NUMERIC_CONVERT_READER(Short, Int, int32_t)
+  DEFINE_NUMERIC_CONVERT_READER(Short, Long, int64_t)
+  DEFINE_NUMERIC_CONVERT_READER(Int, Long, int64_t)
+  DEFINE_NUMERIC_CONVERT_READER(Float, Double, double)
+  DEFINE_NUMERIC_CONVERT_READER(Byte, Boolean, bool)
+  DEFINE_NUMERIC_CONVERT_READER(Short, Boolean, bool)
+  DEFINE_NUMERIC_CONVERT_READER(Short, Byte, int8_t)
+  DEFINE_NUMERIC_CONVERT_READER(Int, Boolean, bool)
+  DEFINE_NUMERIC_CONVERT_READER(Int, Byte, int8_t)
+  DEFINE_NUMERIC_CONVERT_READER(Int, Short, int16_t)
+  DEFINE_NUMERIC_CONVERT_READER(Long, Boolean, bool)
+  DEFINE_NUMERIC_CONVERT_READER(Long, Byte, int8_t)
+  DEFINE_NUMERIC_CONVERT_READER(Long, Short, int16_t)
+  DEFINE_NUMERIC_CONVERT_READER(Long, Int, int32_t)
+  DEFINE_NUMERIC_CONVERT_READER(Double, Float, float)
+  // Floating to integer
+  DEFINE_NUMERIC_CONVERT_READER(Float, Boolean, bool)
+  DEFINE_NUMERIC_CONVERT_READER(Float, Byte, int8_t)
+  DEFINE_NUMERIC_CONVERT_READER(Float, Short, int16_t)
+  DEFINE_NUMERIC_CONVERT_READER(Float, Int, int32_t)
+  DEFINE_NUMERIC_CONVERT_READER(Float, Long, int64_t)
+  DEFINE_NUMERIC_CONVERT_READER(Double, Boolean, bool)
+  DEFINE_NUMERIC_CONVERT_READER(Double, Byte, int8_t)
+  DEFINE_NUMERIC_CONVERT_READER(Double, Short, int16_t)
+  DEFINE_NUMERIC_CONVERT_READER(Double, Int, int32_t)
+  DEFINE_NUMERIC_CONVERT_READER(Double, Long, int64_t)
+  // Integer to Floating
+  DEFINE_NUMERIC_CONVERT_READER(Boolean, Float, float)
+  DEFINE_NUMERIC_CONVERT_READER(Byte, Float, float)
+  DEFINE_NUMERIC_CONVERT_READER(Short, Float, float)
+  DEFINE_NUMERIC_CONVERT_READER(Int, Float, float)
+  DEFINE_NUMERIC_CONVERT_READER(Long, Float, float)
+  DEFINE_NUMERIC_CONVERT_READER(Boolean, Double, double)
+  DEFINE_NUMERIC_CONVERT_READER(Byte, Double, double)
+  DEFINE_NUMERIC_CONVERT_READER(Short, Double, double)
+  DEFINE_NUMERIC_CONVERT_READER(Int, Double, double)
+  DEFINE_NUMERIC_CONVERT_READER(Long, Double, double)
+
+  // Numeric to String/Char
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Byte, String)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Short, String)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Int, String)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Long, String)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Float, String)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Double, String)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Byte, Char)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Short, Char)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Int, Char)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Long, Char)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Float, Char)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Double, Char)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Byte, Varchar)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Short, Varchar)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Int, Varchar)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Long, Varchar)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Float, Varchar)
+  DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER(Double, Varchar)
+  using BooleanToStringColumnReader = BooleanToStringVariantColumnReader;
+  using BooleanToCharColumnReader = BooleanToStringVariantColumnReader;
+  using BooleanToVarcharColumnReader = BooleanToStringVariantColumnReader;
+
+  // Numeric to Decimal
+  DEFINE_NUMERIC_CONVERT_TO_DECIMAL_READER(Boolean, false)
+  DEFINE_NUMERIC_CONVERT_TO_DECIMAL_READER(Byte, false)
+  DEFINE_NUMERIC_CONVERT_TO_DECIMAL_READER(Short, false)
+  DEFINE_NUMERIC_CONVERT_TO_DECIMAL_READER(Int, false)
+  DEFINE_NUMERIC_CONVERT_TO_DECIMAL_READER(Long, false)
+  DEFINE_NUMERIC_CONVERT_TO_DECIMAL_READER(Float, true)
+  DEFINE_NUMERIC_CONVERT_TO_DECIMAL_READER(Double, true)
+
+  // Numeric to Timestamp
+  DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER(Boolean)
+  DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER(Byte)
+  DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER(Short)
+  DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER(Int)
+  DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER(Long)
+  DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER(Float)
+  DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER(Double)
+
+  // Decimal to Numeric
+  DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Boolean, bool)
+  DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Byte, int8_t)
+  DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Short, int16_t)
+  DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Int, int32_t)
+  DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Long, int64_t)
+  DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Float, float)
+  DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Double, double)
+
+  // Decimal to Decimal
+  DEFINE_DECIMAL_CONVERT_TO_DECIMAL_READER(Decimal64)
+  DEFINE_DECIMAL_CONVERT_TO_DECIMAL_READER(Decimal128)
+
+#define CREATE_READER(NAME) \
+  return std::make_unique<NAME>(_readType, fileType, stripe, throwOnOverflow);
+
+#define CASE_CREATE_READER(TYPE, CONVERT) \
+  case TYPE:                              \
+    CREATE_READER(CONVERT##ColumnReader)
+
+  const static int32_t MAX_PRECISION_64 = 18;
+
+  static inline bool isDecimal64(const Type& type) {
+    return type.getPrecision() > 0 && type.getPrecision() <= MAX_PRECISION_64;
+  }
+
+#define CASE_CREATE_FROM_DECIMAL_READER(TYPE, TO)   \
+  case TYPE: {                                      \
+    if (isDecimal64(fileType)) {                    \
+      CREATE_READER(Decimal64To##TO##ColumnReader)  \
+    } else {                                        \
+      CREATE_READER(Decimal128To##TO##ColumnReader) \
+    }                                               \
+  }
+
+#define CASE_CREATE_DECIMAL_READER(FROM)            \
+  case DECIMAL: {                                   \
+    if (isDecimal64(_readType)) {                   \
+      CREATE_READER(FROM##ToDecimal64ColumnReader)  \
+    } else {                                        \
+      CREATE_READER(FROM##ToDecimal128ColumnReader) \
+    }                                               \
+  }
+
+#define CASE_EXCEPTION                                                                 \
+  default:                                                                             \
+    throw SchemaEvolutionError("Cannot convert from " + fileType.toString() + " to " + \
+                               _readType.toString());
+
+  std::unique_ptr<ColumnReader> buildConvertReader(const Type& fileType, StripeStreams& stripe,
+                                                   bool useTightNumericVector,
+                                                   bool throwOnOverflow) {
+    if (!useTightNumericVector) {
+      throw SchemaEvolutionError(
+          "SchemaEvolution only support tight vector, please create ColumnVectorBatch with "
+          "option useTightNumericVector");
+    }
+    const auto& _readType = *stripe.getSchemaEvolution()->getReadType(fileType);
+
+    switch (fileType.getKind()) {
+      case BOOLEAN: {
+        switch (_readType.getKind()) {
+          CASE_CREATE_READER(BYTE, BooleanToByte)
+          CASE_CREATE_READER(SHORT, BooleanToShort)
+          CASE_CREATE_READER(INT, BooleanToInt)
+          CASE_CREATE_READER(LONG, BooleanToLong)
+          CASE_CREATE_READER(FLOAT, BooleanToFloat)
+          CASE_CREATE_READER(DOUBLE, BooleanToDouble)
+          CASE_CREATE_READER(STRING, BooleanToString)
+          CASE_CREATE_READER(CHAR, BooleanToChar)
+          CASE_CREATE_READER(VARCHAR, BooleanToVarchar)
+          CASE_CREATE_DECIMAL_READER(Boolean)
+          CASE_CREATE_READER(TIMESTAMP, BooleanToTimestamp)
+          CASE_CREATE_READER(TIMESTAMP_INSTANT, BooleanToTimestamp)
+          case BOOLEAN:
+          case BINARY:
+          case LIST:
+          case MAP:
+          case STRUCT:
+          case UNION:
+          case DATE:
+            CASE_EXCEPTION
+        }
+      }
+      case BYTE: {
+        switch (_readType.getKind()) {
+          CASE_CREATE_READER(BOOLEAN, ByteToBoolean)
+          CASE_CREATE_READER(SHORT, ByteToShort)
+          CASE_CREATE_READER(INT, ByteToInt)
+          CASE_CREATE_READER(LONG, ByteToLong)
+          CASE_CREATE_READER(FLOAT, ByteToFloat)
+          CASE_CREATE_READER(DOUBLE, ByteToDouble)
+          CASE_CREATE_READER(STRING, ByteToString)
+          CASE_CREATE_READER(CHAR, ByteToChar)
+          CASE_CREATE_READER(VARCHAR, ByteToVarchar)
+          CASE_CREATE_DECIMAL_READER(Byte)
+          CASE_CREATE_READER(TIMESTAMP, ByteToTimestamp)
+          CASE_CREATE_READER(TIMESTAMP_INSTANT, ByteToTimestamp)
+          case BYTE:
+          case BINARY:
+          case LIST:
+          case MAP:
+          case STRUCT:
+          case UNION:
+          case DATE:
+            CASE_EXCEPTION
+        }
+      }
+      case SHORT: {
+        switch (_readType.getKind()) {
+          CASE_CREATE_READER(BOOLEAN, ShortToBoolean)
+          CASE_CREATE_READER(BYTE, ShortToByte)
+          CASE_CREATE_READER(INT, ShortToInt)
+          CASE_CREATE_READER(LONG, ShortToLong)
+          CASE_CREATE_READER(FLOAT, ShortToFloat)
+          CASE_CREATE_READER(DOUBLE, ShortToDouble)
+          CASE_CREATE_READER(STRING, ShortToString)
+          CASE_CREATE_READER(CHAR, ShortToChar)
+          CASE_CREATE_READER(VARCHAR, ShortToVarchar)
+          CASE_CREATE_DECIMAL_READER(Short)
+          CASE_CREATE_READER(TIMESTAMP, ShortToTimestamp)
+          CASE_CREATE_READER(TIMESTAMP_INSTANT, ShortToTimestamp)
+          case SHORT:
+          case BINARY:
+          case LIST:
+          case MAP:
+          case STRUCT:
+          case UNION:
+          case DATE:
+            CASE_EXCEPTION
+        }
+      }
+      case INT: {
+        switch (_readType.getKind()) {
+          CASE_CREATE_READER(BOOLEAN, IntToBoolean)
+          CASE_CREATE_READER(BYTE, IntToByte)
+          CASE_CREATE_READER(SHORT, IntToShort)
+          CASE_CREATE_READER(LONG, IntToLong)
+          CASE_CREATE_READER(FLOAT, IntToFloat)
+          CASE_CREATE_READER(DOUBLE, IntToDouble)
+          CASE_CREATE_READER(STRING, IntToString)
+          CASE_CREATE_READER(CHAR, IntToChar)
+          CASE_CREATE_READER(VARCHAR, IntToVarchar)
+          CASE_CREATE_DECIMAL_READER(Int)
+          CASE_CREATE_READER(TIMESTAMP, IntToTimestamp)
+          CASE_CREATE_READER(TIMESTAMP_INSTANT, IntToTimestamp)
+          case INT:
+          case BINARY:
+          case LIST:
+          case MAP:
+          case STRUCT:
+          case UNION:
+          case DATE:
+            CASE_EXCEPTION
+        }
+      }
+      case LONG: {
+        switch (_readType.getKind()) {
+          CASE_CREATE_READER(BOOLEAN, LongToBoolean)
+          CASE_CREATE_READER(BYTE, LongToByte)
+          CASE_CREATE_READER(SHORT, LongToShort)
+          CASE_CREATE_READER(INT, LongToInt)
+          CASE_CREATE_READER(FLOAT, LongToFloat)
+          CASE_CREATE_READER(DOUBLE, LongToDouble)
+          CASE_CREATE_READER(STRING, LongToString)
+          CASE_CREATE_READER(CHAR, LongToChar)
+          CASE_CREATE_READER(VARCHAR, LongToVarchar)
+          CASE_CREATE_DECIMAL_READER(Long)
+          CASE_CREATE_READER(TIMESTAMP, LongToTimestamp)
+          CASE_CREATE_READER(TIMESTAMP_INSTANT, LongToTimestamp)
+          case LONG:
+          case BINARY:
+          case LIST:
+          case MAP:
+          case STRUCT:
+          case UNION:
+          case DATE:
+            CASE_EXCEPTION
+        }
+      }
+      case FLOAT: {
+        switch (_readType.getKind()) {
+          CASE_CREATE_READER(BOOLEAN, FloatToBoolean)
+          CASE_CREATE_READER(BYTE, FloatToByte)
+          CASE_CREATE_READER(SHORT, FloatToShort)
+          CASE_CREATE_READER(INT, FloatToInt)
+          CASE_CREATE_READER(LONG, FloatToLong)
+          CASE_CREATE_READER(DOUBLE, FloatToDouble)
+          CASE_CREATE_READER(STRING, FloatToString)
+          CASE_CREATE_READER(CHAR, FloatToChar)
+          CASE_CREATE_READER(VARCHAR, FloatToVarchar)
+          CASE_CREATE_DECIMAL_READER(Float)
+          CASE_CREATE_READER(TIMESTAMP, FloatToTimestamp)
+          CASE_CREATE_READER(TIMESTAMP_INSTANT, FloatToTimestamp)
+          case FLOAT:
+          case BINARY:
+          case LIST:
+          case MAP:
+          case STRUCT:
+          case UNION:
+          case DATE:
+            CASE_EXCEPTION
+        }
+      }
+      case DOUBLE: {
+        switch (_readType.getKind()) {
+          CASE_CREATE_READER(BOOLEAN, DoubleToBoolean)
+          CASE_CREATE_READER(BYTE, DoubleToByte)
+          CASE_CREATE_READER(SHORT, DoubleToShort)
+          CASE_CREATE_READER(INT, DoubleToInt)
+          CASE_CREATE_READER(LONG, DoubleToLong)
+          CASE_CREATE_READER(FLOAT, DoubleToFloat)
+          CASE_CREATE_READER(STRING, DoubleToString)
+          CASE_CREATE_READER(CHAR, DoubleToChar)
+          CASE_CREATE_READER(VARCHAR, DoubleToVarchar)
+          CASE_CREATE_DECIMAL_READER(Double)
+          CASE_CREATE_READER(TIMESTAMP, DoubleToTimestamp)
+          CASE_CREATE_READER(TIMESTAMP_INSTANT, DoubleToTimestamp)
+          case DOUBLE:
+          case BINARY:
+          case LIST:
+          case MAP:
+          case STRUCT:
+          case UNION:
+          case DATE:
+            CASE_EXCEPTION
+        }
+      }
+      case STRING:
+      case BINARY:
+      case TIMESTAMP:
+      case LIST:
+      case MAP:
+      case STRUCT:
+      case UNION:
+      case DECIMAL: {
+        switch (_readType.getKind()) {
+          CASE_CREATE_FROM_DECIMAL_READER(BOOLEAN, Boolean)
+          CASE_CREATE_FROM_DECIMAL_READER(BYTE, Byte)
+          CASE_CREATE_FROM_DECIMAL_READER(SHORT, Short)
+          CASE_CREATE_FROM_DECIMAL_READER(INT, Int)
+          CASE_CREATE_FROM_DECIMAL_READER(LONG, Long)
+          CASE_CREATE_FROM_DECIMAL_READER(FLOAT, Float)
+          CASE_CREATE_FROM_DECIMAL_READER(DOUBLE, Double)
+          case DECIMAL: {
+            if (isDecimal64(fileType)) {
+              if (isDecimal64(_readType)) {
+                CREATE_READER(Decimal64ToDecimal64ColumnReader)
+              } else {
+                CREATE_READER(Decimal64ToDecimal128ColumnReader)
+              }
+            } else {
+              if (isDecimal64(_readType)) {
+                CREATE_READER(Decimal128ToDecimal64ColumnReader)
+              } else {
+                CREATE_READER(Decimal128ToDecimal128ColumnReader)
+              }
+            }
+          }
+          case STRING:
+          case CHAR:
+          case VARCHAR:
+          case TIMESTAMP:
+          case TIMESTAMP_INSTANT:
+          case BINARY:
+          case LIST:
+          case MAP:
+          case STRUCT:
+          case UNION:
+          case DATE:
+            CASE_EXCEPTION
+        }
+      }
+      case DATE:
+      case VARCHAR:
+      case CHAR:
+      case TIMESTAMP_INSTANT:
+        CASE_EXCEPTION
+    }
+  }
+
+#undef DEFINE_NUMERIC_CONVERT_READER
+#undef DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER
+#undef DEFINE_NUMERIC_CONVERT_TO_DECIMAL_READER
+#undef DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER
+#undef DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER
+#undef DEFINE_DECIMAL_CONVERT_TO_DECIMAL_READER
+#undef CASE_CREATE_FROM_DECIMAL_READER
+#undef CASE_CREATE_READER
+#undef CASE_EXCEPTION
+
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/ConvertColumnReader.hh b/contrib/libs/apache/orc/c++/src/ConvertColumnReader.hh
new file mode 100644
index 0000000000..6ed4d0170d
--- /dev/null
+++ b/contrib/libs/apache/orc/c++/src/ConvertColumnReader.hh
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_CONVERT_COLUMN_READER_HH
+#define ORC_CONVERT_COLUMN_READER_HH
+
+#include "ColumnReader.hh"
+#include "SchemaEvolution.hh"
+
+namespace orc {
+
+  class ConvertColumnReader : public ColumnReader {
+   public:
+    ConvertColumnReader(const Type& readType, const Type& fileType, StripeStreams& stripe,
+                        bool throwOnOverflow);
+
+    // override next() to implement convert logic
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
+
+    uint64_t skip(uint64_t numValues) override;
+
+    void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& positions) override;
+
+   protected:
+    bool useTightNumericVector;
+    const Type& readType;
+    std::unique_ptr<ColumnReader> reader;
+    std::unique_ptr<ColumnVectorBatch> data;
+    const bool throwOnOverflow;
+  };
+
+  std::unique_ptr<ColumnReader> buildConvertReader(const Type& fileType, StripeStreams& stripe,
+                                                   bool useTightNumericVector,
+                                                   bool throwOnOverflow);
+
+}  // namespace orc
+
+#endif  // ORC_CONVERT_COLUMN_READER_HH
diff --git a/contrib/libs/apache/orc/c++/src/CpuInfoUtil.cc b/contrib/libs/apache/orc/c++/src/CpuInfoUtil.cc
new file mode 100644
index 0000000000..7e6958deef
--- /dev/null
+++ b/contrib/libs/apache/orc/c++/src/CpuInfoUtil.cc
@@ -0,0 +1,589 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CpuInfoUtil.cc is from Apache Arrow as of 2023-03-21
+ */
+
+#include "CpuInfoUtil.hh"
+
+#ifdef __APPLE__
+#include <sys/sysctl.h>
+#endif
+
+#ifndef _MSC_VER
+#include <unistd.h>
+#endif
+
+#ifdef _WIN32
+#define NOMINMAX
+#include <Windows.h>
+#include <intrin.h>
+#endif
+
+#include <algorithm>
+#include <array>
+#include <bitset>
+#include <cstdint>
+#include <fstream>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "orc/Exceptions.hh"
+
+#undef CPUINFO_ARCH_X86
+#undef CPUINFO_ARCH_ARM
+#undef CPUINFO_ARCH_PPC
+
+#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#define CPUINFO_ARCH_X86
+#ifndef ORC_HAVE_RUNTIME_AVX512
+#define UNUSED(x) (void)(x)
+#endif
+#elif defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__)
+#define CPUINFO_ARCH_ARM
+#elif defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__)
+#define CPUINFO_ARCH_PPC
+#endif
+
+namespace orc {
+
+  namespace {
+
+    constexpr int kCacheLevels = static_cast<int>(CpuInfo::CacheLevel::Last) + 1;
+
+    //============================== OS Dependent ==============================//
+
+#if defined(_WIN32)
+    //------------------------------ WINDOWS ------------------------------//
+    void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+      PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
+      PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer_position = nullptr;
+      DWORD buffer_size = 0;
+      size_t offset = 0;
+      typedef BOOL(WINAPI * GetLogicalProcessorInformationFuncPointer)(void*, void*);
+      GetLogicalProcessorInformationFuncPointer func_pointer =
+          (GetLogicalProcessorInformationFuncPointer)GetProcAddress(
+              GetModuleHandle("kernel32"), "GetLogicalProcessorInformation");
+
+      if (!func_pointer) {
+        throw ParseError("Failed to find procedure GetLogicalProcessorInformation");
+      }
+
+      // Get buffer size
+      if (func_pointer(buffer, &buffer_size) && GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+        throw ParseError("Failed to get size of processor information buffer");
+      }
+
+      buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(buffer_size);
+      if (!buffer) {
+        return;
+      }
+
+      if (!func_pointer(buffer, &buffer_size)) {
+        free(buffer);
+        throw ParseError("Failed to get processor information");
+      }
+
+      buffer_position = buffer;
+      while (offset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= buffer_size) {
+        if (RelationCache == buffer_position->Relationship) {
+          PCACHE_DESCRIPTOR cache = &buffer_position->Cache;
+          if (cache->Level >= 1 && cache->Level <= kCacheLevels) {
+            const int64_t current = (*cache_sizes)[cache->Level - 1];
+            (*cache_sizes)[cache->Level - 1] = std::max<int64_t>(current, cache->Size);
+          }
+        }
+        offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+        buffer_position++;
+      }
+
+      free(buffer);
+    }
+
+#if defined(CPUINFO_ARCH_X86)
+    // On x86, get CPU features by cpuid, https://en.wikipedia.org/wiki/CPUID
+
+#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
+    void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
+      __asm__ __volatile__("cpuid"
+                           : "=a"(CPUInfo[0]), "=b"(CPUInfo[1]), "=c"(CPUInfo[2]), "=d"(CPUInfo[3])
+                           : "a"(function_id), "c"(subfunction_id));
+    }
+
+    int64_t _xgetbv(int xcr) {
+      int out = 0;
+      __asm__ __volatile__("xgetbv" : "=a"(out) : "c"(xcr) : "%edx");
+      return out;
+    }
+#endif  // MINGW
+
+    void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+                           std::string* model_name) {
+      int register_EAX_id = 1;
+      int highest_valid_id = 0;
+      int highest_extended_valid_id = 0;
+      std::bitset<32> features_ECX;
+      std::array<int, 4> cpu_info;
+
+      // Get highest valid id
+      __cpuid(cpu_info.data(), 0);
+      highest_valid_id = cpu_info[0];
+      // HEX of "GenuineIntel": 47656E75 696E6549 6E74656C
+      // HEX of "AuthenticAMD": 41757468 656E7469 63414D44
+      if (cpu_info[1] == 0x756e6547 && cpu_info[3] == 0x49656e69 && cpu_info[2] == 0x6c65746e) {
+        *vendor = CpuInfo::Vendor::Intel;
+      } else if (cpu_info[1] == 0x68747541 && cpu_info[3] == 0x69746e65 &&
+                 cpu_info[2] == 0x444d4163) {
+        *vendor = CpuInfo::Vendor::AMD;
+      }
+
+      if (highest_valid_id <= register_EAX_id) {
+        return;
+      }
+
+      // EAX=1: Processor Info and Feature Bits
+      __cpuidex(cpu_info.data(), register_EAX_id, 0);
+      features_ECX = cpu_info[2];
+
+      // Get highest extended id
+      __cpuid(cpu_info.data(), 0x80000000);
+      highest_extended_valid_id = cpu_info[0];
+
+      // Retrieve CPU model name
+      if (highest_extended_valid_id >= static_cast<int>(0x80000004)) {
+        model_name->clear();
+        for (int i = 0x80000002; i <= static_cast<int>(0x80000004); ++i) {
+          __cpuidex(cpu_info.data(), i, 0);
+          *model_name += std::string(reinterpret_cast<char*>(cpu_info.data()), sizeof(cpu_info));
+        }
+      }
+
+      bool zmm_enabled = false;
+      if (features_ECX[27]) {  // OSXSAVE
+        // Query if the OS supports saving ZMM registers when switching contexts
+        int64_t xcr0 = _xgetbv(0);
+        zmm_enabled = (xcr0 & 0xE0) == 0xE0;
+      }
+
+      if (features_ECX[9]) *hardware_flags |= CpuInfo::SSSE3;
+      if (features_ECX[19]) *hardware_flags |= CpuInfo::SSE4_1;
+      if (features_ECX[20]) *hardware_flags |= CpuInfo::SSE4_2;
+      if (features_ECX[23]) *hardware_flags |= CpuInfo::POPCNT;
+      if (features_ECX[28]) *hardware_flags |= CpuInfo::AVX;
+
+      // cpuid with EAX=7, ECX=0: Extended Features
+      register_EAX_id = 7;
+      if (highest_valid_id > register_EAX_id) {
+        __cpuidex(cpu_info.data(), register_EAX_id, 0);
+        std::bitset<32> features_EBX = cpu_info[1];
+
+        if (features_EBX[3]) *hardware_flags |= CpuInfo::BMI1;
+        if (features_EBX[5]) *hardware_flags |= CpuInfo::AVX2;
+        if (features_EBX[8]) *hardware_flags |= CpuInfo::BMI2;
+        if (zmm_enabled) {
+          if (features_EBX[16]) *hardware_flags |= CpuInfo::AVX512F;
+          if (features_EBX[17]) *hardware_flags |= CpuInfo::AVX512DQ;
+          if (features_EBX[28]) *hardware_flags |= CpuInfo::AVX512CD;
+          if (features_EBX[30]) *hardware_flags |= CpuInfo::AVX512BW;
+          if (features_EBX[31]) *hardware_flags |= CpuInfo::AVX512VL;
+        }
+      }
+    }
+
+#elif defined(CPUINFO_ARCH_ARM)
+    // Windows on Arm
+    void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+                           std::string* model_name) {
+      *hardware_flags |= CpuInfo::ASIMD;
+      // TODO: vendor, model_name
+    }
+#endif
+
+#elif defined(__APPLE__)
+    //------------------------------ MACOS ------------------------------//
+    std::optional<int64_t> IntegerSysCtlByName(const char* name) {
+      size_t len = sizeof(int64_t);
+      int64_t data = 0;
+      if (sysctlbyname(name, &data, &len, nullptr, 0) == 0) {
+        return data;
+      }
+      // ENOENT is the official errno value for non-existing sysctl's,
+      // but EINVAL and ENOTSUP have been seen in the wild.
+      if (errno != ENOENT && errno != EINVAL && errno != ENOTSUP) {
+        std::ostringstream ss;
+        ss << "sysctlbyname failed for '" << name << "'";
+        throw ParseError(ss.str());
+      }
+      return std::nullopt;
+    }
+
+    void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+      static_assert(kCacheLevels >= 3, "");
+      auto c = IntegerSysCtlByName("hw.l1dcachesize");
+      if (c.has_value()) {
+        (*cache_sizes)[0] = *c;
+      }
+      c = IntegerSysCtlByName("hw.l2cachesize");
+      if (c.has_value()) {
+        (*cache_sizes)[1] = *c;
+      }
+      c = IntegerSysCtlByName("hw.l3cachesize");
+      if (c.has_value()) {
+        (*cache_sizes)[2] = *c;
+      }
+    }
+
+    void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+                           std::string* model_name) {
+      // hardware_flags
+      struct SysCtlCpuFeature {
+        const char* name;
+        int64_t flag;
+      };
+      std::vector<SysCtlCpuFeature> features = {
+#if defined(CPUINFO_ARCH_X86)
+        {"hw.optional.sse4_2",
+         CpuInfo::SSSE3 | CpuInfo::SSE4_1 | CpuInfo::SSE4_2 | CpuInfo::POPCNT},
+        {"hw.optional.avx1_0", CpuInfo::AVX},
+        {"hw.optional.avx2_0", CpuInfo::AVX2},
+        {"hw.optional.bmi1", CpuInfo::BMI1},
+        {"hw.optional.bmi2", CpuInfo::BMI2},
+        {"hw.optional.avx512f", CpuInfo::AVX512F},
+        {"hw.optional.avx512cd", CpuInfo::AVX512CD},
+        {"hw.optional.avx512dq", CpuInfo::AVX512DQ},
+        {"hw.optional.avx512bw", CpuInfo::AVX512BW},
+        {"hw.optional.avx512vl", CpuInfo::AVX512VL},
+#elif defined(CPUINFO_ARCH_ARM)
+        // ARM64 (note that this is exposed under Rosetta as well)
+        {"hw.optional.neon", CpuInfo::ASIMD},
+#endif
+      };
+      for (const auto& feature : features) {
+        auto v = IntegerSysCtlByName(feature.name);
+        if (v.value_or(0)) {
+          *hardware_flags |= feature.flag;
+        }
+      }
+
+      // TODO: vendor, model_name
+      *vendor = CpuInfo::Vendor::Unknown;
+      *model_name = "Unknown";
+    }
+
+#else
+    //------------------------------ LINUX ------------------------------//
+    // Get cache size, return 0 on error
+    int64_t LinuxGetCacheSize(int level) {
+      // get cache size by sysconf()
+#ifdef _SC_LEVEL1_DCACHE_SIZE
+      const int kCacheSizeConf[] = {
+          _SC_LEVEL1_DCACHE_SIZE,
+          _SC_LEVEL2_CACHE_SIZE,
+          _SC_LEVEL3_CACHE_SIZE,
+      };
+      static_assert(sizeof(kCacheSizeConf) / sizeof(kCacheSizeConf[0]) == kCacheLevels, "");
+
+      errno = 0;
+      const int64_t cache_size = sysconf(kCacheSizeConf[level]);
+      if (errno == 0 && cache_size > 0) {
+        return cache_size;
+      }
+#endif
+
+      // get cache size from sysfs if sysconf() fails or not supported
+      const char* kCacheSizeSysfs[] = {
+          "/sys/devices/system/cpu/cpu0/cache/index0/size",  // l1d (index1 is l1i)
+          "/sys/devices/system/cpu/cpu0/cache/index2/size",  // l2
+          "/sys/devices/system/cpu/cpu0/cache/index3/size",  // l3
+      };
+      static_assert(sizeof(kCacheSizeSysfs) / sizeof(kCacheSizeSysfs[0]) == kCacheLevels, "");
+
+      std::ifstream cacheinfo(kCacheSizeSysfs[level], std::ios::in);
+      if (!cacheinfo) {
+        return 0;
+      }
+      // cacheinfo is one line like: 65536, 64K, 1M, etc.
+      uint64_t size = 0;
+      char unit = '\0';
+      cacheinfo >> size >> unit;
+      if (unit == 'K') {
+        size <<= 10;
+      } else if (unit == 'M') {
+        size <<= 20;
+      } else if (unit == 'G') {
+        size <<= 30;
+      } else if (unit != '\0') {
+        return 0;
+      }
+      return static_cast<int64_t>(size);
+    }
+
+    // Helper function to parse for hardware flags from /proc/cpuinfo
+    // values contains a list of space-separated flags.  check to see if the flags we
+    // care about are present.
+    // Returns a bitmap of flags.
+    int64_t LinuxParseCpuFlags(const std::string& values) {
+      const struct {
+        std::string name;
+        int64_t flag;
+      } flag_mappings[] = {
+#if defined(CPUINFO_ARCH_X86)
+        {"ssse3", CpuInfo::SSSE3},
+        {"sse4_1", CpuInfo::SSE4_1},
+        {"sse4_2", CpuInfo::SSE4_2},
+        {"popcnt", CpuInfo::POPCNT},
+        {"avx", CpuInfo::AVX},
+        {"avx2", CpuInfo::AVX2},
+        {"avx512f", CpuInfo::AVX512F},
+        {"avx512cd", CpuInfo::AVX512CD},
+        {"avx512vl", CpuInfo::AVX512VL},
+        {"avx512dq", CpuInfo::AVX512DQ},
+        {"avx512bw", CpuInfo::AVX512BW},
+        {"bmi1", CpuInfo::BMI1},
+        {"bmi2", CpuInfo::BMI2},
+#elif defined(CPUINFO_ARCH_ARM)
+        {"asimd", CpuInfo::ASIMD},
+#endif
+      };
+      const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
+
+      int64_t flags = 0;
+      for (int i = 0; i < num_flags; ++i) {
+        if (values.find(flag_mappings[i].name) != std::string::npos) {
+          flags |= flag_mappings[i].flag;
+        }
+      }
+      return flags;
+    }
+
+    void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+      for (int i = 0; i < kCacheLevels; ++i) {
+        const int64_t cache_size = LinuxGetCacheSize(i);
+        if (cache_size > 0) {
+          (*cache_sizes)[i] = cache_size;
+        }
+      }
+    }
+
+    static constexpr bool IsWhitespace(char c) {
+      return c == ' ' || c == '\t';
+    }
+
+    std::string TrimString(std::string value) {
+      size_t ltrim_chars = 0;
+      while (ltrim_chars < value.size() && IsWhitespace(value[ltrim_chars])) {
+        ++ltrim_chars;
+      }
+      value.erase(0, ltrim_chars);
+      size_t rtrim_chars = 0;
+      while (rtrim_chars < value.size() && IsWhitespace(value[value.size() - 1 - rtrim_chars])) {
+        ++rtrim_chars;
+      }
+      value.erase(value.size() - rtrim_chars, rtrim_chars);
+      return value;
+    }
+
+    // Read from /proc/cpuinfo
+    void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+                           std::string* model_name) {
+      std::ifstream cpuinfo("/proc/cpuinfo", std::ios::in);
+      while (cpuinfo) {
+        std::string line;
+        std::getline(cpuinfo, line);
+        const size_t colon = line.find(':');
+        if (colon != std::string::npos) {
+          const std::string name = TrimString(line.substr(0, colon - 1));
+          const std::string value = TrimString(line.substr(colon + 1, std::string::npos));
+          if (name.compare("flags") == 0 || name.compare("Features") == 0) {
+            *hardware_flags |= LinuxParseCpuFlags(value);
+          } else if (name.compare("model name") == 0) {
+            *model_name = value;
+          } else if (name.compare("vendor_id") == 0) {
+            if (value.compare("GenuineIntel") == 0) {
+              *vendor = CpuInfo::Vendor::Intel;
+            } else if (value.compare("AuthenticAMD") == 0) {
+              *vendor = CpuInfo::Vendor::AMD;
+            }
+          }
+        }
+      }
+    }
+#endif  // WINDOWS, MACOS, LINUX
+
+    //============================== Arch Dependent ==============================//
+
+#if defined(CPUINFO_ARCH_X86)
+    //------------------------------ X86_64 ------------------------------//
+    bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* hardware_flags) {
+      enum {
+        USER_SIMD_NONE,
+        USER_SIMD_AVX512,
+        USER_SIMD_MAX,
+      };
+
+      int level = USER_SIMD_MAX;
+      // Parse the level
+      if (simd_level == "AVX512") {
+        level = USER_SIMD_AVX512;
+      } else if (simd_level == "NONE") {
+        level = USER_SIMD_NONE;
+      } else {
+        return false;
+      }
+
+      // Disable feature as the level
+      if (level < USER_SIMD_AVX512) {
+        *hardware_flags &= ~CpuInfo::AVX512;
+      }
+      return true;
+    }
+
+    void ArchVerifyCpuRequirements(const CpuInfo* ci) {
+#if defined(ORC_HAVE_RUNTIME_AVX512)
+      if (!ci->isDetected(CpuInfo::AVX512)) {
+        throw ParseError("CPU does not support the Supplemental AVX512 instruction set");
+      }
+#else
+      UNUSED(ci);
+#endif
+    }
+
+#elif defined(CPUINFO_ARCH_ARM)
+    //------------------------------ AARCH64 ------------------------------//
+    bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* hardware_flags) {
+      if (simd_level == "NONE") {
+        *hardware_flags &= ~CpuInfo::ASIMD;
+        return true;
+      }
+      return false;
+    }
+
+    void ArchVerifyCpuRequirements(const CpuInfo* ci) {
+      if (!ci->isDetected(CpuInfo::ASIMD)) {
+        throw ParseError("CPU does not support the Armv8 Neon instruction set");
+      }
+    }
+
+#else
+    //------------------------------ PPC, ... ------------------------------//
+    bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* hardware_flags) {
+      return true;
+    }
+
+    void ArchVerifyCpuRequirements(const CpuInfo* ci) {}
+
+#endif  // X86, ARM, PPC
+
+  }  // namespace
+
+  struct CpuInfo::Impl {
+    int64_t hardware_flags = 0;
+    int numCores = 0;
+    int64_t original_hardware_flags = 0;
+    Vendor vendor = Vendor::Unknown;
+    std::string model_name = "Unknown";
+    std::array<int64_t, kCacheLevels> cache_sizes{};
+
+    Impl() {
+      OsRetrieveCacheSize(&cache_sizes);
+      OsRetrieveCpuInfo(&hardware_flags, &vendor, &model_name);
+      original_hardware_flags = hardware_flags;
+      numCores = std::max(static_cast<int>(std::thread::hardware_concurrency()), 1);
+
+      // parse user simd level
+      const auto maybe_env_var = std::getenv("ORC_USER_SIMD_LEVEL");
+      std::string userSimdLevel = maybe_env_var == nullptr ? "NONE" : std::string(maybe_env_var);
+      std::transform(userSimdLevel.begin(), userSimdLevel.end(), userSimdLevel.begin(),
+                     [](unsigned char c) { return std::toupper(c); });
+      if (!ArchParseUserSimdLevel(userSimdLevel, &hardware_flags)) {
+        throw ParseError("Invalid value for ORC_USER_SIMD_LEVEL: " + userSimdLevel);
+      }
+    }
+  };
+
+  CpuInfo::~CpuInfo() = default;
+
+  CpuInfo::CpuInfo() : impl_(new Impl) {}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wexit-time-destructors"
+#endif
+
+  const CpuInfo* CpuInfo::getInstance() {
+    static CpuInfo cpu_info;
+    return &cpu_info;
+  }
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+  int64_t CpuInfo::hardwareFlags() const {
+    return impl_->hardware_flags;
+  }
+
+  int CpuInfo::numCores() const {
+    return impl_->numCores <= 0 ? 1 : impl_->numCores;
+  }
+
+  CpuInfo::Vendor CpuInfo::vendor() const {
+    return impl_->vendor;
+  }
+
+  const std::string& CpuInfo::modelName() const {
+    return impl_->model_name;
+  }
+
+  int64_t CpuInfo::cacheSize(CacheLevel level) const {
+    constexpr int64_t kDefaultCacheSizes[] = {
+        32 * 1024,    // Level 1: 32K
+        256 * 1024,   // Level 2: 256K
+        3072 * 1024,  // Level 3: 3M
+    };
+    static_assert(sizeof(kDefaultCacheSizes) / sizeof(kDefaultCacheSizes[0]) == kCacheLevels, "");
+
+    static_assert(static_cast<int>(CacheLevel::L1) == 0, "");
+    const int i = static_cast<int>(level);
+    if (impl_->cache_sizes[i] > 0) return impl_->cache_sizes[i];
+    if (i == 0) return kDefaultCacheSizes[0];
+    // l3 may be not available, return maximum of l2 or default size
+    return std::max(kDefaultCacheSizes[i], impl_->cache_sizes[i - 1]);
+  }
+
+  bool CpuInfo::isSupported(int64_t flags) const {
+    return (impl_->hardware_flags & flags) == flags;
+  }
+
+  bool CpuInfo::isDetected(int64_t flags) const {
+    return (impl_->original_hardware_flags & flags) == flags;
+  }
+
+  void CpuInfo::verifyCpuRequirements() const {
+    return ArchVerifyCpuRequirements(this);
+  }
+
+}  // namespace orc
+
+#undef CPUINFO_ARCH_X86
+#undef CPUINFO_ARCH_ARM
+#undef CPUINFO_ARCH_PPC
diff --git a/contrib/libs/apache/orc/c++/src/CpuInfoUtil.hh b/contrib/libs/apache/orc/c++/src/CpuInfoUtil.hh
new file mode 100644
index 0000000000..5637053e6d
--- /dev/null
+++ b/contrib/libs/apache/orc/c++/src/CpuInfoUtil.hh
@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CpuInfoUtil.hh is from Apache Arrow as of 2023-03-21
+ */
+
+#ifndef ORC_CPUINFOUTIL_HH
+#define ORC_CPUINFOUTIL_HH
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+namespace orc {
+
+  /**
+   * CpuInfo is an interface to query for cpu information at runtime.  The caller can
+   * ask for the sizes of the caches and what hardware features are supported.
+   * On Linux, this information is pulled from a couple of sys files (/proc/cpuinfo and
+   * /sys/devices)
+   */
+  class CpuInfo {
+   public:
+    ~CpuInfo();
+
+    // x86 features
+    static constexpr int64_t SSSE3 = (1LL << 0);
+    static constexpr int64_t SSE4_1 = (1LL << 1);
+    static constexpr int64_t SSE4_2 = (1LL << 2);
+    static constexpr int64_t POPCNT = (1LL << 3);
+    static constexpr int64_t AVX = (1LL << 4);
+    static constexpr int64_t AVX2 = (1LL << 5);
+    static constexpr int64_t AVX512F = (1LL << 6);
+    static constexpr int64_t AVX512CD = (1LL << 7);
+    static constexpr int64_t AVX512VL = (1LL << 8);
+    static constexpr int64_t AVX512DQ = (1LL << 9);
+    static constexpr int64_t AVX512BW = (1LL << 10);
+    static constexpr int64_t AVX512 = AVX512F | AVX512CD | AVX512VL | AVX512DQ | AVX512BW;
+    static constexpr int64_t BMI1 = (1LL << 11);
+    static constexpr int64_t BMI2 = (1LL << 12);
+
+    /// Arm features
+    static constexpr int64_t ASIMD = (1LL << 32);
+
+    // Cache enums for L1 (data), L2 and L3
+    enum class CacheLevel { L1 = 0, L2, L3, Last = L3 };
+
+    // CPU vendors
+    enum class Vendor { Unknown, Intel, AMD };
+
+    static const CpuInfo* getInstance();
+
+    // Returns all the flags for this cpu
+    int64_t hardwareFlags() const;
+
+    // Returns the number of cores (including hyper-threaded) on this machine.
+    int numCores() const;
+
+    // Returns the vendor of the cpu.
+    Vendor vendor() const;
+
+    // Returns the model name of the cpu (e.g. Intel i7-2600)
+    const std::string& modelName() const;
+
+    // Returns the size of the cache in KB at this cache level
+    int64_t cacheSize(CacheLevel level) const;
+
+    /**
+     * Returns whether or not the given feature is enabled.
+     * isSupported() is true if isDetected() is also true and the feature
+     * wasn't disabled by the user (for example by setting the ORC_USER_SIMD_LEVEL
+     * environment variable).
+     */
+    bool isSupported(int64_t flags) const;
+
+    // Returns whether or not the given feature is available on the CPU.
+    bool isDetected(int64_t flags) const;
+
+    // Determine if the CPU meets the minimum CPU requirements and if not, issue an error
+    // and terminate.
+    void verifyCpuRequirements() const;
+
+    bool hasEfficientBmi2() const {
+      // BMI2 (pext, pdep) is only efficient on Intel X86 processors.
+      return vendor() == Vendor::Intel && isSupported(BMI2);
+    }
+
+   private:
+    CpuInfo();
+
+    struct Impl;
+    std::unique_ptr<Impl> impl_;
+  };
+
+}  // namespace orc
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/Dispatch.hh b/contrib/libs/apache/orc/c++/src/Dispatch.hh
new file mode 100644
index 0000000000..489317b28a
--- /dev/null
+++ b/contrib/libs/apache/orc/c++/src/Dispatch.hh
@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_DISPATCH_HH
+#define ORC_DISPATCH_HH
+
+#include <utility>
+#include <vector>
+
+#include "CpuInfoUtil.hh"
+
+namespace orc {
+  enum class DispatchLevel : int {
+    // These dispatch levels, corresponding to instruction set features,
+    // are sorted in increasing order of preference.
+    NONE = 0,
+    AVX512,
+    MAX
+  };
+
+  /**
+   * A facility for dynamic dispatch according to available DispatchLevel.
+   *
+   * Typical use:
+   *
+   *   static void my_function_default(...);
+   *   static void my_function_avx512(...);
+   *
+   *   struct MyDynamicFunction {
+   *     using FunctionType = decltype(&my_function_default);
+   *
+   *     static std::vector<std::pair<DispatchLevel, FunctionType>> implementations() {
+   *       return {
+   *         { DispatchLevel::NONE, my_function_default }
+   *   #if defined(ORC_HAVE_RUNTIME_AVX512)
+   *         , { DispatchLevel::AVX512, my_function_avx512 }
+   *   #endif
+   *       };
+   *     }
+   *   };
+   *
+   *   void my_function(...) {
+   *     static DynamicDispatch<MyDynamicFunction> dispatch;
+   *     return dispatch.func(...);
+   *   }
+   */
+  template <typename DynamicFunction>
+  class DynamicDispatch {
+   protected:
+    using FunctionType = typename DynamicFunction::FunctionType;
+    using Implementation = std::pair<DispatchLevel, FunctionType>;
+
+   public:
+    DynamicDispatch() {
+      Resolve(DynamicFunction::implementations());
+    }
+
+    FunctionType func = {};
+
+   protected:
+    // Use the Implementation with the highest DispatchLevel
+    void Resolve(const std::vector<Implementation>& implementations) {
+      Implementation cur{DispatchLevel::NONE, {}};
+
+      for (const auto& impl : implementations) {
+        if (impl.first >= cur.first && levelSupported(impl.first)) {
+          // Higher (or same) level than current
+          cur = impl;
+        }
+      }
+
+      if (!cur.second) {
+        throw InvalidArgument("No appropriate implementation found");
+      }
+      func = cur.second;
+    }
+
+   private:
+    bool levelSupported(DispatchLevel level) const {
+      static const auto cpu_info = CpuInfo::getInstance();
+
+      switch (level) {
+        case DispatchLevel::NONE:
+          return true;
+        case DispatchLevel::AVX512:
+        case DispatchLevel::MAX:
+          return cpu_info->isSupported(CpuInfo::AVX512);
+        default:
+          return false;
+      }
+    }
+  };
+}  // namespace orc
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/Exceptions.cc b/contrib/libs/apache/orc/c++/src/Exceptions.cc
index 2077b27df4..23703ff324 100644
--- a/contrib/libs/apache/orc/c++/src/Exceptions.cc
+++ b/contrib/libs/apache/orc/c++/src/Exceptions.cc
@@ -20,59 +20,68 @@
 
 namespace orc {
 
-  NotImplementedYet::NotImplementedYet(const std::string& what_arg
-                                       ) : logic_error(what_arg) {
+  NotImplementedYet::NotImplementedYet(const std::string& what_arg) : logic_error(what_arg) {
     // PASS
   }
 
-  NotImplementedYet::NotImplementedYet(const char* what_arg
-                                       ) :logic_error(what_arg) {
+  NotImplementedYet::NotImplementedYet(const char* what_arg) : logic_error(what_arg) {
     // PASS
   }
 
-  NotImplementedYet::NotImplementedYet(const NotImplementedYet& error
-                                       ): logic_error(error) {
+  NotImplementedYet::NotImplementedYet(const NotImplementedYet& error) : logic_error(error) {
     // PASS
   }
 
-  NotImplementedYet::~NotImplementedYet() ORC_NOEXCEPT {
+  NotImplementedYet::~NotImplementedYet() noexcept {
     // PASS
   }
 
-  ParseError::ParseError(const std::string& what_arg
-                         ): runtime_error(what_arg) {
+  ParseError::ParseError(const std::string& what_arg) : runtime_error(what_arg) {
     // PASS
   }
 
-  ParseError::ParseError(const char* what_arg
-                         ): runtime_error(what_arg) {
+  ParseError::ParseError(const char* what_arg) : runtime_error(what_arg) {
     // PASS
   }
 
-  ParseError::ParseError(const ParseError& error): runtime_error(error) {
+  ParseError::ParseError(const ParseError& error) : runtime_error(error) {
     // PASS
   }
 
-  ParseError::~ParseError() ORC_NOEXCEPT {
+  ParseError::~ParseError() noexcept {
     // PASS
   }
 
-  InvalidArgument::InvalidArgument(const std::string& what_arg
-                                   ): runtime_error(what_arg) {
+  InvalidArgument::InvalidArgument(const std::string& what_arg) : runtime_error(what_arg) {
     // PASS
   }
 
-  InvalidArgument::InvalidArgument(const char* what_arg
-                                   ): runtime_error(what_arg) {
+  InvalidArgument::InvalidArgument(const char* what_arg) : runtime_error(what_arg) {
     // PASS
   }
 
-  InvalidArgument::InvalidArgument(const InvalidArgument& error
-                                   ): runtime_error(error) {
+  InvalidArgument::InvalidArgument(const InvalidArgument& error) : runtime_error(error) {
     // PASS
   }
 
-  InvalidArgument::~InvalidArgument() ORC_NOEXCEPT {
+  InvalidArgument::~InvalidArgument() noexcept {
     // PASS
   }
-}
+
+  SchemaEvolutionError::SchemaEvolutionError(const std::string& what_arg) : logic_error(what_arg) {
+    // PASS
+  }
+
+  SchemaEvolutionError::SchemaEvolutionError(const char* what_arg) : logic_error(what_arg) {
+    // PASS
+  }
+
+  SchemaEvolutionError::SchemaEvolutionError(const SchemaEvolutionError& error)
+      : logic_error(error) {
+    // PASS
+  }
+
+  SchemaEvolutionError::~SchemaEvolutionError() noexcept {
+    // PASS
+  }
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/Int128.cc b/contrib/libs/apache/orc/c++/src/Int128.cc
index 4ff500fbac..3c159f3775 100644
--- a/contrib/libs/apache/orc/c++/src/Int128.cc
+++ b/contrib/libs/apache/orc/c++/src/Int128.cc
@@ -45,7 +45,7 @@ namespace orc {
         size_t group = std::min(static_cast<size_t>(18), length - posn);
         int64_t chunk = std::stoll(str.substr(posn, group));
         int64_t multiple = 1;
-        for(size_t i=0; i < group; ++i) {
+        for (size_t i = 0; i < group; ++i) {
           multiple *= 10;
         }
         *this *= multiple;
@@ -58,7 +58,7 @@ namespace orc {
     }
   }
 
-  Int128& Int128::operator*=(const Int128 &right) {
+  Int128& Int128::operator*=(const Int128& right) {
     const uint64_t INT_MASK = 0xffffffff;
     const uint64_t CARRY_BIT = INT_MASK + 1;
 
@@ -100,7 +100,7 @@ namespace orc {
    * @param wasNegative a flag for whether the value was original negative
    * @result the output length of the array
    */
-  int64_t Int128::fillInArray(uint32_t* array, bool &wasNegative) const {
+  int64_t Int128::fillInArray(uint32_t* array, bool& wasNegative) const {
     uint64_t high;
     uint64_t low;
     if (highbits < 0) {
@@ -140,7 +140,6 @@ namespace orc {
     }
   }
 
-
   /**
    * Find last set bit in a 32 bit integer. Bit 1 is the LSB and bit 32 is
    * the MSB. We can replace this with bsrq asm instruction on x64.
@@ -162,10 +161,10 @@ namespace orc {
    */
   void shiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) {
     if (length > 0 && bits != 0) {
-      for(int64_t i=0; i < length-1; ++i) {
-        array[i] = (array[i] << bits) | (array[i+1] >> (32 - bits));
+      for (int64_t i = 0; i < length - 1; ++i) {
+        array[i] = (array[i] << bits) | (array[i + 1] >> (32 - bits));
       }
-      array[length-1] <<= bits;
+      array[length - 1] <<= bits;
     }
   }
 
@@ -177,8 +176,8 @@ namespace orc {
    */
   void shiftArrayRight(uint32_t* array, int64_t length, int64_t bits) {
     if (length > 0 && bits != 0) {
-      for(int64_t i=length-1; i > 0; --i) {
-        array[i] = (array[i] >> bits) | (array[i-1] << (32 - bits));
+      for (int64_t i = length - 1; i > 0; --i) {
+        array[i] = (array[i] >> bits) | (array[i - 1] << (32 - bits));
       }
       array[0] >>= bits;
     }
@@ -188,8 +187,8 @@ namespace orc {
    * Fix the signs of the result and remainder at the end of the division
    * based on the signs of the dividend and divisor.
    */
-  void fixDivisionSigns(Int128 &result, Int128 &remainder,
-                        bool dividendWasNegative, bool divisorWasNegative) {
+  void fixDivisionSigns(Int128& result, Int128& remainder, bool dividendWasNegative,
+                        bool divisorWasNegative) {
     if (dividendWasNegative != divisorWasNegative) {
       result.negate();
     }
@@ -203,44 +202,42 @@ namespace orc {
    */
   void buildFromArray(Int128& value, uint32_t* array, int64_t length) {
     switch (length) {
-    case 0:
-      value = 0;
-      break;
-    case 1:
-      value = array[0];
-      break;
-    case 2:
-      value = Int128(0, (static_cast<uint64_t>(array[0]) << 32) + array[1]);
-      break;
-    case 3:
-      value = Int128(array[0],
-                     (static_cast<uint64_t>(array[1]) << 32) + array[2]);
-      break;
-    case 4:
-      value = Int128((static_cast<int64_t>(array[0]) << 32) + array[1],
-                     (static_cast<uint64_t>(array[2]) << 32) + array[3]);
-      break;
-    case 5:
-      if (array[0] != 0) {
-        throw std::logic_error("Can't build Int128 with 5 ints.");
-      }
-      value = Int128((static_cast<int64_t>(array[1]) << 32) + array[2],
-                     (static_cast<uint64_t>(array[3]) << 32) + array[4]);
-      break;
-    default:
-      throw std::logic_error("Unsupported length for building Int128");
+      case 0:
+        value = 0;
+        break;
+      case 1:
+        value = array[0];
+        break;
+      case 2:
+        value = Int128(0, (static_cast<uint64_t>(array[0]) << 32) + array[1]);
+        break;
+      case 3:
+        value = Int128(array[0], (static_cast<uint64_t>(array[1]) << 32) + array[2]);
+        break;
+      case 4:
+        value = Int128((static_cast<int64_t>(array[0]) << 32) + array[1],
+                       (static_cast<uint64_t>(array[2]) << 32) + array[3]);
+        break;
+      case 5:
+        if (array[0] != 0) {
+          throw std::logic_error("Can't build Int128 with 5 ints.");
+        }
+        value = Int128((static_cast<int64_t>(array[1]) << 32) + array[2],
+                       (static_cast<uint64_t>(array[3]) << 32) + array[4]);
+        break;
+      default:
+        throw std::logic_error("Unsupported length for building Int128");
     }
   }
 
   /**
    * Do a division where the divisor fits into a single 32 bit value.
    */
-  Int128 singleDivide(uint32_t* dividend, int64_t dividendLength,
-                      uint32_t divisor, Int128& remainder,
-                      bool dividendWasNegative, bool divisorWasNegative) {
+  Int128 singleDivide(uint32_t* dividend, int64_t dividendLength, uint32_t divisor,
+                      Int128& remainder, bool dividendWasNegative, bool divisorWasNegative) {
     uint64_t r = 0;
     uint32_t resultArray[5];
-    for(int64_t j=0; j < dividendLength; j++) {
+    for (int64_t j = 0; j < dividendLength; j++) {
       r <<= 32;
       r += dividend[j];
       resultArray[j] = static_cast<uint32_t>(r / divisor);
@@ -249,12 +246,11 @@ namespace orc {
     Int128 result;
     buildFromArray(result, resultArray, dividendLength);
     remainder = static_cast<int64_t>(r);
-    fixDivisionSigns(result, remainder, dividendWasNegative,
-                     divisorWasNegative);
+    fixDivisionSigns(result, remainder, dividendWasNegative, divisorWasNegative);
     return result;
   }
 
-  Int128 Int128::divide(const Int128 &divisor, Int128 &remainder) const {
+  Int128 Int128::divide(const Int128& divisor, Int128& remainder) const {
     // Split the dividend and divisor into integer pieces so that we can
     // work on them.
     uint32_t dividendArray[5];
@@ -263,7 +259,7 @@ namespace orc {
     bool divisorWasNegative;
     // leave an extra zero before the dividend
     dividendArray[0] = 0;
-    int64_t dividendLength = fillInArray(dividendArray + 1, dividendWasNegative)+1;
+    int64_t dividendLength = fillInArray(dividendArray + 1, dividendWasNegative) + 1;
     int64_t divisorLength = divisor.fillInArray(divisorArray, divisorWasNegative);
 
     // Handle some of the easy cases.
@@ -273,8 +269,8 @@ namespace orc {
     } else if (divisorLength == 0) {
       throw std::range_error("Division by 0 in Int128");
     } else if (divisorLength == 1) {
-      return singleDivide(dividendArray, dividendLength, divisorArray[0],
-                          remainder, dividendWasNegative, divisorWasNegative);
+      return singleDivide(dividendArray, dividendLength, divisorArray[0], remainder,
+                          dividendWasNegative, divisorWasNegative);
     }
 
     int64_t resultLength = dividendLength - divisorLength;
@@ -288,11 +284,10 @@ namespace orc {
     shiftArrayLeft(dividendArray, dividendLength, normalizeBits);
 
     // compute each digit in the result
-    for(int64_t j=0; j < resultLength; ++j) {
+    for (int64_t j = 0; j < resultLength; ++j) {
       // Guess the next digit. At worst it is two too large
       uint32_t guess = UINT32_MAX;
-      uint64_t highDividend = static_cast<uint64_t>(dividendArray[j]) << 32 |
-        dividendArray[j+1];
+      uint64_t highDividend = static_cast<uint64_t>(dividendArray[j]) << 32 | dividendArray[j + 1];
       if (dividendArray[j] != divisorArray[0]) {
         guess = static_cast<uint32_t>(highDividend / divisorArray[0]);
       }
@@ -300,10 +295,9 @@ namespace orc {
       // catch all of the cases where guess is two too large and most of the
       // cases where it is one too large
       uint32_t rhat =
-        static_cast<uint32_t>(highDividend - guess *
-                              static_cast<uint64_t>(divisorArray[0]));
+          static_cast<uint32_t>(highDividend - guess * static_cast<uint64_t>(divisorArray[0]));
       while (static_cast<uint64_t>(divisorArray[1]) * guess >
-             (static_cast<uint64_t>(rhat) << 32) + dividendArray[j+2]) {
+             (static_cast<uint64_t>(rhat) << 32) + dividendArray[j + 2]) {
         guess -= 1;
         rhat += divisorArray[0];
         if (static_cast<uint64_t>(rhat) < divisorArray[0]) {
@@ -313,12 +307,12 @@ namespace orc {
 
       // subtract off the guess * divisor from the dividend
       uint64_t mult = 0;
-      for(int64_t i=divisorLength-1; i >= 0; --i) {
+      for (int64_t i = divisorLength - 1; i >= 0; --i) {
         mult += static_cast<uint64_t>(guess) * divisorArray[i];
-        uint32_t prev = dividendArray[j+i+1];
-        dividendArray[j+i+1] -= static_cast<uint32_t>(mult);
+        uint32_t prev = dividendArray[j + i + 1];
+        dividendArray[j + i + 1] -= static_cast<uint32_t>(mult);
         mult >>= 32;
-        if (dividendArray[j+i+1] > prev) {
+        if (dividendArray[j + i + 1] > prev) {
           mult += 1;
         }
       }
@@ -329,10 +323,9 @@ namespace orc {
       if (dividendArray[j] > prev) {
         guess -= 1;
         uint32_t carry = 0;
-        for(int64_t i=divisorLength-1; i >= 0; --i) {
-          uint64_t sum = static_cast<uint64_t>(divisorArray[i]) +
-            dividendArray[j+i+1] + carry;
-          dividendArray[j+i+1] = static_cast<uint32_t>(sum);
+        for (int64_t i = divisorLength - 1; i >= 0; --i) {
+          uint64_t sum = static_cast<uint64_t>(divisorArray[i]) + dividendArray[j + i + 1] + carry;
+          dividendArray[j + i + 1] = static_cast<uint32_t>(sum);
           carry = static_cast<uint32_t>(sum >> 32);
         }
         dividendArray[j] += carry;
@@ -348,8 +341,7 @@ namespace orc {
     Int128 result;
     buildFromArray(result, resultArray, resultLength);
     buildFromArray(remainder, dividendArray, dividendLength);
-    fixDivisionSigns(result, remainder,
-                     dividendWasNegative, divisorWasNegative);
+    fixDivisionSigns(result, remainder, dividendWasNegative, divisorWasNegative);
     return result;
   }
 
@@ -400,8 +392,7 @@ namespace orc {
       int32_t len = static_cast<int32_t>(str.length());
       if (len - 1 > scale) {
         result = str.substr(0, static_cast<size_t>(len - scale)) + "." +
-                 str.substr(static_cast<size_t>(len - scale),
-                            static_cast<size_t>(len));
+                 str.substr(static_cast<size_t>(len - scale), static_cast<size_t>(len));
       } else if (len - 1 == scale) {
         result = "-0." + str.substr(1, std::string::npos);
       } else {
@@ -415,8 +406,7 @@ namespace orc {
       int32_t len = static_cast<int32_t>(str.length());
       if (len > scale) {
         result = str.substr(0, static_cast<size_t>(len - scale)) + "." +
-                 str.substr(static_cast<size_t>(len - scale),
-                            static_cast<size_t>(len));
+                 str.substr(static_cast<size_t>(len - scale), static_cast<size_t>(len));
       } else if (len == scale) {
         result = "0." + str;
       } else {
@@ -440,37 +430,41 @@ namespace orc {
 
   std::string Int128::toHexString() const {
     std::stringstream buf;
-    buf << std::hex << "0x"
-        << std::setw(16) << std::setfill('0') << highbits
-        << std::setw(16) << std::setfill('0') << lowbits;
+    buf << std::hex << "0x" << std::setw(16) << std::setfill('0') << highbits << std::setw(16)
+        << std::setfill('0') << lowbits;
     return buf.str();
   }
 
+  double Int128::toDouble() const {
+    if (fitsInLong()) {
+      return static_cast<double>(toLong());
+    }
+    return static_cast<double>(lowbits) + std::ldexp(static_cast<double>(highbits), 64);
+  }
+
   const static int32_t MAX_PRECISION_64 = 18;
-  const static int64_t POWERS_OF_TEN[MAX_PRECISION_64 + 1] =
-    {1,
-     10,
-     100,
-     1000,
-     10000,
-     100000,
-     1000000,
-     10000000,
-     100000000,
-     1000000000,
-     10000000000,
-     100000000000,
-     1000000000000,
-     10000000000000,
-     100000000000000,
-     1000000000000000,
-     10000000000000000,
-     100000000000000000,
-     1000000000000000000};
-
-  Int128 scaleUpInt128ByPowerOfTen(Int128 value,
-                                   int32_t power,
-                                   bool &overflow) {
+  const static int32_t MAX_PRECISION_128 = 38;
+  const static int64_t POWERS_OF_TEN[MAX_PRECISION_64 + 1] = {1,
+                                                              10,
+                                                              100,
+                                                              1000,
+                                                              10000,
+                                                              100000,
+                                                              1000000,
+                                                              10000000,
+                                                              100000000,
+                                                              1000000000,
+                                                              10000000000,
+                                                              100000000000,
+                                                              1000000000000,
+                                                              10000000000000,
+                                                              100000000000000,
+                                                              1000000000000000,
+                                                              10000000000000000,
+                                                              100000000000000000,
+                                                              1000000000000000000};
+
+  Int128 scaleUpInt128ByPowerOfTen(Int128 value, int32_t power, bool& overflow) {
     overflow = false;
     Int128 remainder;
 
@@ -479,7 +473,8 @@ namespace orc {
       if (value > 0 && Int128::maximumValue().divide(POWERS_OF_TEN[step], remainder) < value) {
         overflow = true;
         return Int128::maximumValue();
-      } else if (value < 0 && Int128::minimumValue().divide(POWERS_OF_TEN[step], remainder) > value) {
+      } else if (value < 0 &&
+                 Int128::minimumValue().divide(POWERS_OF_TEN[step], remainder) > value) {
         overflow = true;
         return Int128::minimumValue();
       }
@@ -501,4 +496,100 @@ namespace orc {
     return value;
   }
 
-}
+  std::pair<bool, Int128> convertDecimal(Int128 value, int32_t fromScale, int32_t toPrecision,
+                                         int32_t toScale, bool round) {
+    if (toPrecision > MAX_PRECISION_128 || toPrecision < 1 || toScale < 0 ||
+        toScale > toPrecision || fromScale < 0 ||
+        std::abs(fromScale - toScale) > MAX_PRECISION_128) {
+      std::stringstream buf;
+      buf << "Invalid argument: fromScale=" << fromScale << ", toPrecision=" << toPrecision
+          << ", toScale=" << toScale;
+      throw std::invalid_argument(buf.str());
+    }
+    std::pair<bool, Int128> result;
+    bool negative = value < 0;
+    result.second = value.abs();
+    result.first = false;
+
+    Int128 upperBound = scaleUpInt128ByPowerOfTen(1, toPrecision, result.first);
+    int8_t roundOffset = 0;
+    int32_t deltaScale = fromScale - toScale;
+
+    if (deltaScale > 0) {
+      Int128 scale = scaleUpInt128ByPowerOfTen(1, deltaScale, result.first), remainder;
+      result.second = result.second.divide(scale, remainder);
+      remainder *= 2;
+      if (round && remainder >= scale) {
+        upperBound -= 1;
+        roundOffset = 1;
+      }
+    } else if (deltaScale < 0) {
+      if (result.second > upperBound) {
+        result.first = true;
+        return result;
+      }
+      result.second = scaleUpInt128ByPowerOfTen(result.second, -deltaScale, result.first);
+    }
+
+    if (result.second > upperBound) {
+      result.first = true;
+      return result;
+    }
+
+    result.second += roundOffset;
+    if (negative) {
+      result.second *= -1;
+    }
+    return result;
+  }
+
+  template <typename T>
+  std::enable_if_t<std::is_floating_point_v<T>, std::pair<bool, Int128>> convertDecimal(
+      T value, int32_t precision, int32_t scale) {
+    const static T upperbound = std::ldexp(static_cast<T>(1), 127);
+    const static T lowerbound = -upperbound;
+
+    std::pair<bool, Int128> result = {false, 0};
+    if (precision > MAX_PRECISION_128 || precision < 1 || scale > precision || scale < 0) {
+      result.first = true;
+      return result;
+    }
+
+    if (std::isnan(value) || value <= lowerbound || value >= upperbound) {
+      result.first = true;
+      return result;
+    }
+
+    bool isNegative = (value < 0);
+    Int128 i128, remainder;
+    value = std::fabs(value);
+    if (value >= std::ldexp(static_cast<T>(1.0), 64)) {
+      int64_t hi = static_cast<int64_t>(std::ldexp(value, -64));
+      uint64_t lo = static_cast<uint64_t>(value - std::ldexp(static_cast<T>(hi), 64));
+      i128 = Int128(hi, lo);
+    } else {
+      i128 = Int128(0, static_cast<uint64_t>(value));
+    }
+    value = value - std::floor(value);
+
+    bool overflow = false;
+    i128 = scaleUpInt128ByPowerOfTen(i128, scale, overflow);
+    if (overflow || i128 >= scaleUpInt128ByPowerOfTen(1, precision, overflow)) {
+      result.first = true;
+      return result;
+    }
+
+    value = value * static_cast<T>(pow(10, scale));
+    i128 += static_cast<int64_t>(std::round(value));
+    if (isNegative) {
+      i128 = i128.negate();
+    }
+    result.second = i128;
+    return result;
+  }
+
+  template std::pair<bool, Int128> convertDecimal(float value, int32_t precision, int32_t scale);
+
+  template std::pair<bool, Int128> convertDecimal(double value, int32_t precision, int32_t scale);
+
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/LzoDecompressor.cc b/contrib/libs/apache/orc/c++/src/LzoDecompressor.cc
index 21bf194fed..f494f4b651 100644
--- a/contrib/libs/apache/orc/c++/src/LzoDecompressor.cc
+++ b/contrib/libs/apache/orc/c++/src/LzoDecompressor.cc
@@ -1,15 +1,20 @@
 /*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 #include "Adaptor.hh"
@@ -24,8 +29,8 @@ namespace orc {
   static const int32_t DEC_64_TABLE[] = {0, 0, 0, -1, 0, 1, 2, 3};
 
   static const int32_t SIZE_OF_SHORT = 2;
-  static const int32_t SIZE_OF_INT   = 4;
-  static const int32_t SIZE_OF_LONG  = 8;
+  static const int32_t SIZE_OF_INT = 4;
+  static const int32_t SIZE_OF_LONG = 8;
 
   static std::string toHex(uint64_t val) {
     std::ostringstream out;
@@ -39,45 +44,37 @@ namespace orc {
     return out.str();
   }
 
-  class MalformedInputException: public ParseError {
-  public:
-    MalformedInputException(int64_t off
-                            ) :ParseError("MalformedInputException at " +
-                                          toString(off)) {
-    }
+  class MalformedInputException : public ParseError {
+   public:
+    MalformedInputException(int64_t off)
+        : ParseError("MalformedInputException at " + toString(off)) {}
 
-    MalformedInputException(int64_t off, const std::string& msg
-                            ): ParseError("MalformedInputException " + msg +
-                                          " at " + toString(off)) {
-    }
+    MalformedInputException(int64_t off, const std::string& msg)
+        : ParseError("MalformedInputException " + msg + " at " + toString(off)) {}
 
-    MalformedInputException(const MalformedInputException& other
-                            ): ParseError(other.what()) {
-    }
+    MalformedInputException(const MalformedInputException& other) : ParseError(other.what()) {}
 
-    virtual ~MalformedInputException() noexcept;
+    ~MalformedInputException() noexcept override;
   };
 
   MalformedInputException::~MalformedInputException() noexcept {
     // PASS
   }
 
-  uint64_t lzoDecompress(const char *inputAddress,
-                         const char *inputLimit,
-                         char *outputAddress,
-                         char *outputLimit) {
+  uint64_t lzoDecompress(const char* inputAddress, const char* inputLimit, char* outputAddress,
+                         char* outputLimit) {
     // nothing compresses to nothing
     if (inputAddress == inputLimit) {
       return 0;
     }
 
     // maximum offset in buffers to which it's safe to write long-at-a-time
-    char * const fastOutputLimit = outputLimit - SIZE_OF_LONG;
+    char* const fastOutputLimit = outputLimit - SIZE_OF_LONG;
 
     // LZO can concat two blocks together so, decode until the input data is
     // consumed
-    const char *input = inputAddress;
-    char *output = outputAddress;
+    const char* input = inputAddress;
+    char* output = outputAddress;
     while (input < inputLimit) {
       //
       // Note: For safety some of the code below may stop decoding early or
@@ -127,8 +124,7 @@ namespace orc {
               literalLength = 0xf;
 
               uint32_t nextByte = 0;
-              while (input < inputLimit &&
-                     (nextByte = *(input++) & 0xFF) == 0) {
+              while (input < inputLimit && (nextByte = *(input++) & 0xFF) == 0) {
                 literalLength += 0xff;
               }
               literalLength += nextByte;
@@ -191,8 +187,7 @@ namespace orc {
             matchLength = 0x7;
 
             int32_t nextByte = 0;
-            while (input < inputLimit &&
-                   (nextByte = *(input++) & 0xFF) == 0) {
+            while (input < inputLimit && (nextByte = *(input++) & 0xFF) == 0) {
               matchLength += 0xff;
             }
             matchLength += nextByte;
@@ -231,8 +226,7 @@ namespace orc {
             matchLength = 0x1f;
 
             int nextByte = 0;
-            while (input < inputLimit &&
-                   (nextByte = *(input++) & 0xFF) == 0) {
+            while (input < inputLimit && (nextByte = *(input++) & 0xFF) == 0) {
               matchLength += 0xff;
             }
             matchLength += nextByte;
@@ -276,8 +270,7 @@ namespace orc {
           literalLength = (command & 0x3);
         } else {
           throw MalformedInputException(input - inputAddress - 1,
-                                        "Invalid LZO command " +
-                                        toHex(command));
+                                        "Invalid LZO command " + toHex(command));
         }
         firstCommand = false;
 
@@ -286,12 +279,11 @@ namespace orc {
           // lzo encodes match offset minus one
           matchOffset++;
 
-          char *matchAddress = output - matchOffset;
-          if (matchAddress < outputAddress ||
-              output + matchLength > outputLimit) {
+          char* matchAddress = output - matchOffset;
+          if (matchAddress < outputAddress || output + matchLength > outputLimit) {
             throw MalformedInputException(input - inputAddress);
           }
-          char *matchOutputLimit = output + matchLength;
+          char* matchOutputLimit = output + matchLength;
 
           if (output > fastOutputLimit) {
             // slow match copy
@@ -343,11 +335,11 @@ namespace orc {
               }
             }
           }
-          output = matchOutputLimit; // correction in case we over-copied
+          output = matchOutputLimit;  // correction in case we over-copied
         }
 
         // copy literal
-        char *literalOutputLimit = output + literalLength;
+        char* literalOutputLimit = output + literalLength;
         if (literalOutputLimit > fastOutputLimit ||
             input + literalLength > inputLimit - SIZE_OF_LONG) {
           if (literalOutputLimit > outputLimit) {
@@ -373,8 +365,7 @@ namespace orc {
         lastLiteralLength = literalLength;
       }
 
-      if (input + SIZE_OF_SHORT > inputLimit &&
-          *reinterpret_cast<const int16_t*>(input) != 0) {
+      if (input + SIZE_OF_SHORT > inputLimit && *reinterpret_cast<const int16_t*>(input) != 0) {
         throw MalformedInputException(input - inputAddress);
       }
       input += SIZE_OF_SHORT;
@@ -383,4 +374,4 @@ namespace orc {
     return static_cast<uint64_t>(output - outputAddress);
   }
 
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/LzoDecompressor.hh b/contrib/libs/apache/orc/c++/src/LzoDecompressor.hh
index 9de8537dd8..a37ce8e582 100644
--- a/contrib/libs/apache/orc/c++/src/LzoDecompressor.hh
+++ b/contrib/libs/apache/orc/c++/src/LzoDecompressor.hh
@@ -33,10 +33,8 @@ namespace orc {
    * @param outputLimit one past the last byte of the output buffer
    * @result the number of bytes decompressed
    */
-  uint64_t lzoDecompress(const char *inputAddress,
-                         const char *inputLimit,
-                         char *outputAddress,
-                         char *outputLimit);
-}
+  uint64_t lzoDecompress(const char* inputAddress, const char* inputLimit, char* outputAddress,
+                         char* outputLimit);
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/src/MemoryPool.cc b/contrib/libs/apache/orc/c++/src/MemoryPool.cc
index ecfb295bae..8c8837aa64 100644
--- a/contrib/libs/apache/orc/c++/src/MemoryPool.cc
+++ b/contrib/libs/apache/orc/c++/src/MemoryPool.cc
@@ -16,14 +16,14 @@
  * limitations under the License.
  */
 
-#include "orc/Int128.hh"
 #include "orc/MemoryPool.hh"
+#include "orc/Int128.hh"
 
 #include "Adaptor.hh"
 
+#include <string.h>
 #include <cstdlib>
 #include <iostream>
-#include <string.h>
 
 namespace orc {
 
@@ -31,8 +31,8 @@ namespace orc {
     // PASS
   }
 
-  class MemoryPoolImpl: public MemoryPool {
-  public:
+  class MemoryPoolImpl : public MemoryPool {
+   public:
     virtual ~MemoryPoolImpl() override;
 
     char* malloc(uint64_t size) override;
@@ -52,30 +52,26 @@ namespace orc {
   }
 
   template <class T>
-  DataBuffer<T>::DataBuffer(MemoryPool& pool,
-                            uint64_t newSize
-                            ): memoryPool(pool),
-                               buf(nullptr),
-                               currentSize(0),
-                               currentCapacity(0) {
-    resize(newSize);
+  DataBuffer<T>::DataBuffer(MemoryPool& pool, uint64_t newSize)
+      : memoryPool(pool), buf(nullptr), currentSize(0), currentCapacity(0) {
+    reserve(newSize);
+    currentSize = newSize;
   }
 
   template <class T>
-  DataBuffer<T>::DataBuffer(DataBuffer<T>&& buffer
-                      ) noexcept:
-                      memoryPool(buffer.memoryPool),
-                      buf(buffer.buf),
-                      currentSize(buffer.currentSize),
-                      currentCapacity(buffer.currentCapacity)  {
+  DataBuffer<T>::DataBuffer(DataBuffer<T>&& buffer) noexcept
+      : memoryPool(buffer.memoryPool),
+        buf(buffer.buf),
+        currentSize(buffer.currentSize),
+        currentCapacity(buffer.currentCapacity) {
     buffer.buf = nullptr;
     buffer.currentSize = 0;
     buffer.currentCapacity = 0;
   }
 
   template <class T>
-  DataBuffer<T>::~DataBuffer(){
-    for(uint64_t i=currentSize; i > 0; --i) {
+  DataBuffer<T>::~DataBuffer() {
+    for (uint64_t i = currentSize; i > 0; --i) {
       (buf + i - 1)->~T();
     }
     if (buf) {
@@ -87,11 +83,11 @@ namespace orc {
   void DataBuffer<T>::resize(uint64_t newSize) {
     reserve(newSize);
     if (currentSize > newSize) {
-      for(uint64_t i=currentSize; i > newSize; --i) {
+      for (uint64_t i = currentSize; i > newSize; --i) {
         (buf + i - 1)->~T();
       }
     } else if (newSize > currentSize) {
-      for(uint64_t i=currentSize; i < newSize; ++i) {
+      for (uint64_t i = currentSize; i < newSize; ++i) {
         new (buf + i) T();
       }
     }
@@ -99,7 +95,7 @@ namespace orc {
   }
 
   template <class T>
-  void DataBuffer<T>::reserve(uint64_t newCapacity){
+  void DataBuffer<T>::reserve(uint64_t newCapacity) {
     if (newCapacity > currentCapacity || !buf) {
       if (buf) {
         T* buf_old = buf;
@@ -113,10 +109,23 @@ namespace orc {
     }
   }
 
+  template <class T>
+  void DataBuffer<T>::zeroOut() {
+    memset(buf, 0, sizeof(T) * currentCapacity);
+  }
+
+  // Specializations for Int128
+  template <>
+  void DataBuffer<Int128>::zeroOut() {
+    for (uint64_t i = 0; i < currentCapacity; ++i) {
+      new (buf + i) Int128();
+    }
+  }
+
   // Specializations for char
 
   template <>
-  DataBuffer<char>::~DataBuffer(){
+  DataBuffer<char>::~DataBuffer() {
     if (buf) {
       memoryPool.free(reinterpret_cast<char*>(buf));
     }
@@ -134,7 +143,7 @@ namespace orc {
   // Specializations for char*
 
   template <>
-  DataBuffer<char*>::~DataBuffer(){
+  DataBuffer<char*>::~DataBuffer() {
     if (buf) {
       memoryPool.free(reinterpret_cast<char*>(buf));
     }
@@ -152,7 +161,7 @@ namespace orc {
   // Specializations for double
 
   template <>
-  DataBuffer<double>::~DataBuffer(){
+  DataBuffer<double>::~DataBuffer() {
     if (buf) {
       memoryPool.free(reinterpret_cast<char*>(buf));
     }
@@ -167,10 +176,28 @@ namespace orc {
     currentSize = newSize;
   }
 
+  // Specializations for float
+
+  template <>
+  DataBuffer<float>::~DataBuffer() {
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<float>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(float));
+    }
+    currentSize = newSize;
+  }
+
   // Specializations for int64_t
 
   template <>
-  DataBuffer<int64_t>::~DataBuffer(){
+  DataBuffer<int64_t>::~DataBuffer() {
     if (buf) {
       memoryPool.free(reinterpret_cast<char*>(buf));
     }
@@ -185,10 +212,64 @@ namespace orc {
     currentSize = newSize;
   }
 
+  // Specializations for int32_t
+
+  template <>
+  DataBuffer<int32_t>::~DataBuffer() {
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<int32_t>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(int32_t));
+    }
+    currentSize = newSize;
+  }
+
+  // Specializations for int16_t
+
+  template <>
+  DataBuffer<int16_t>::~DataBuffer() {
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<int16_t>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(int16_t));
+    }
+    currentSize = newSize;
+  }
+
+  // Specializations for int8_t
+
+  template <>
+  DataBuffer<int8_t>::~DataBuffer() {
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<int8_t>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(int8_t));
+    }
+    currentSize = newSize;
+  }
+
   // Specializations for uint64_t
 
   template <>
-  DataBuffer<uint64_t>::~DataBuffer(){
+  DataBuffer<uint64_t>::~DataBuffer() {
     if (buf) {
       memoryPool.free(reinterpret_cast<char*>(buf));
     }
@@ -206,7 +287,7 @@ namespace orc {
   // Specializations for unsigned char
 
   template <>
-  DataBuffer<unsigned char>::~DataBuffer(){
+  DataBuffer<unsigned char>::~DataBuffer() {
     if (buf) {
       memoryPool.free(reinterpret_cast<char*>(buf));
     }
@@ -221,24 +302,28 @@ namespace orc {
     currentSize = newSize;
   }
 
-  #ifdef __clang__
-    #pragma clang diagnostic ignored "-Wweak-template-vtables"
-  #endif
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wweak-template-vtables"
+#endif
 
   template class DataBuffer<char>;
   template class DataBuffer<char*>;
   template class DataBuffer<double>;
+  template class DataBuffer<float>;
   template class DataBuffer<Int128>;
   template class DataBuffer<int64_t>;
+  template class DataBuffer<int32_t>;
+  template class DataBuffer<int16_t>;
+  template class DataBuffer<int8_t>;
   template class DataBuffer<uint64_t>;
   template class DataBuffer<unsigned char>;
 
-  #ifdef __clang__
-    #pragma clang diagnostic ignored "-Wexit-time-destructors"
-  #endif
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wexit-time-destructors"
+#endif
 
   MemoryPool* getDefaultPool() {
     static MemoryPoolImpl internal;
     return &internal;
   }
-} // namespace orc
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/Murmur3.cc b/contrib/libs/apache/orc/c++/src/Murmur3.cc
index b45bd6d492..518e5e6de5 100644
--- a/contrib/libs/apache/orc/c++/src/Murmur3.cc
+++ b/contrib/libs/apache/orc/c++/src/Murmur3.cc
@@ -16,14 +16,14 @@
  * limitations under the License.
  */
 
-#include "Adaptor.hh"
 #include "Murmur3.hh"
+#include "Adaptor.hh"
 
 #define ROTL64(x, r) ((x << r) | (x >> (64 - r)))
 
 namespace orc {
 
-  inline uint64_t rotl64 ( uint64_t x, int8_t r ) {
+  inline uint64_t rotl64(uint64_t x, int8_t r) {
     return (x << r) | (x >> (64 - r));
   }
 
@@ -36,17 +36,17 @@ namespace orc {
     return value;
   }
 
-  uint64_t Murmur3::hash64(const uint8_t *data, uint32_t len) {
+  uint64_t Murmur3::hash64(const uint8_t* data, uint32_t len) {
     return hash64(data, len, DEFAULT_SEED);
   }
 
   DIAGNOSTIC_PUSH
 
 #if defined(__clang__)
-    DIAGNOSTIC_IGNORE("-Wimplicit-fallthrough")
+  DIAGNOSTIC_IGNORE("-Wimplicit-fallthrough")
 #endif
 
-  uint64_t Murmur3::hash64(const uint8_t *data, uint32_t len, uint32_t seed) {
+  uint64_t Murmur3::hash64(const uint8_t* data, uint32_t len, uint32_t seed) {
     uint64_t h = seed;
     uint32_t blocks = len >> 3;
 
@@ -69,16 +69,22 @@ namespace orc {
     switch (len - idx) {
       case 7:
         k ^= static_cast<uint64_t>(data[idx + 6]) << 48;
+        [[fallthrough]];
       case 6:
         k ^= static_cast<uint64_t>(data[idx + 5]) << 40;
+        [[fallthrough]];
       case 5:
         k ^= static_cast<uint64_t>(data[idx + 4]) << 32;
+        [[fallthrough]];
       case 4:
         k ^= static_cast<uint64_t>(data[idx + 3]) << 24;
+        [[fallthrough]];
       case 3:
         k ^= static_cast<uint64_t>(data[idx + 2]) << 16;
+        [[fallthrough]];
       case 2:
         k ^= static_cast<uint64_t>(data[idx + 1]) << 8;
+        [[fallthrough]];
       case 1:
         k ^= static_cast<uint64_t>(data[idx + 0]);
 
@@ -95,4 +101,4 @@ namespace orc {
 
   DIAGNOSTIC_POP
 
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/Murmur3.hh b/contrib/libs/apache/orc/c++/src/Murmur3.hh
index 02391811b0..e3db8654bf 100644
--- a/contrib/libs/apache/orc/c++/src/Murmur3.hh
+++ b/contrib/libs/apache/orc/c++/src/Murmur3.hh
@@ -24,17 +24,17 @@
 namespace orc {
 
   class Murmur3 {
-  public:
+   public:
     static const uint32_t DEFAULT_SEED = 104729;
     static const uint64_t NULL_HASHCODE = 2862933555777941757LL;
 
-    static uint64_t hash64(const uint8_t *data, uint32_t len);
+    static uint64_t hash64(const uint8_t* data, uint32_t len);
 
-  private:
+   private:
     static uint64_t fmix64(uint64_t value);
     static uint64_t hash64(const uint8_t* data, uint32_t len, uint32_t seed);
   };
 
-}
+}  // namespace orc
 
-#endif //ORC_MURMUR3_HH
+#endif  // ORC_MURMUR3_HH
diff --git a/contrib/libs/apache/orc/c++/src/Options.hh b/contrib/libs/apache/orc/c++/src/Options.hh
index d8331b3c0a..51cd8efd64 100644
--- a/contrib/libs/apache/orc/c++/src/Options.hh
+++ b/contrib/libs/apache/orc/c++/src/Options.hh
@@ -34,31 +34,30 @@ namespace orc {
     ColumnSelection_TYPE_IDS = 3,
   };
 
-/**
- * ReaderOptions Implementation
- */
+  /**
+   * ReaderOptions Implementation
+   */
   struct ReaderOptionsPrivate {
     uint64_t tailLocation;
     std::ostream* errorStream;
     MemoryPool* memoryPool;
     std::string serializedTail;
+    ReaderMetrics* metrics;
 
     ReaderOptionsPrivate() {
       tailLocation = std::numeric_limits<uint64_t>::max();
       errorStream = &std::cerr;
       memoryPool = getDefaultPool();
+      metrics = nullptr;
     }
   };
 
-  ReaderOptions::ReaderOptions():
-    privateBits(std::unique_ptr<ReaderOptionsPrivate>
-                (new ReaderOptionsPrivate())) {
+  ReaderOptions::ReaderOptions() : privateBits(std::make_unique<ReaderOptionsPrivate>()) {
     // PASS
   }
 
-  ReaderOptions::ReaderOptions(const ReaderOptions& rhs):
-    privateBits(std::unique_ptr<ReaderOptionsPrivate>
-                (new ReaderOptionsPrivate(*(rhs.privateBits.get())))) {
+  ReaderOptions::ReaderOptions(const ReaderOptions& rhs)
+      : privateBits(std::make_unique<ReaderOptionsPrivate>(*(rhs.privateBits.get()))) {
     // PASS
   }
 
@@ -83,10 +82,19 @@ namespace orc {
     return *this;
   }
 
-  MemoryPool* ReaderOptions::getMemoryPool() const{
+  MemoryPool* ReaderOptions::getMemoryPool() const {
     return privateBits->memoryPool;
   }
 
+  ReaderOptions& ReaderOptions::setReaderMetrics(ReaderMetrics* metrics) {
+    privateBits->metrics = metrics;
+    return *this;
+  }
+
+  ReaderMetrics* ReaderOptions::getReaderMetrics() const {
+    return privateBits->metrics;
+  }
+
   ReaderOptions& ReaderOptions::setTailLocation(uint64_t offset) {
     privateBits->tailLocation = offset;
     return *this;
@@ -96,8 +104,7 @@ namespace orc {
     return privateBits->tailLocation;
   }
 
-  ReaderOptions& ReaderOptions::setSerializedFileTail(const std::string& value
-                                                      ) {
+  ReaderOptions& ReaderOptions::setSerializedFileTail(const std::string& value) {
     privateBits->serializedTail = value;
     return *this;
   }
@@ -115,9 +122,9 @@ namespace orc {
     return privateBits->errorStream;
   }
 
-/**
- * RowReaderOptions Implementation
- */
+  /**
+   * RowReaderOptions Implementation
+   */
 
   struct RowReaderOptionsPrivate {
     ColumnSelection selection;
@@ -131,6 +138,9 @@ namespace orc {
     std::shared_ptr<SearchArgument> sargs;
     std::string readerTimezone;
     RowReaderOptions::IdReadIntentMap idReadIntentMap;
+    bool useTightNumericVector;
+    std::shared_ptr<Type> readType;
+    bool throwOnSchemaEvolutionOverflow;
 
     RowReaderOptionsPrivate() {
       selection = ColumnSelection_NONE;
@@ -140,18 +150,17 @@ namespace orc {
       forcedScaleOnHive11Decimal = 6;
       enableLazyDecoding = false;
       readerTimezone = "GMT";
+      useTightNumericVector = false;
+      throwOnSchemaEvolutionOverflow = false;
     }
   };
 
-  RowReaderOptions::RowReaderOptions():
-    privateBits(std::unique_ptr<RowReaderOptionsPrivate>
-                (new RowReaderOptionsPrivate())) {
+  RowReaderOptions::RowReaderOptions() : privateBits(std::make_unique<RowReaderOptionsPrivate>()) {
     // PASS
   }
 
-  RowReaderOptions::RowReaderOptions(const RowReaderOptions& rhs):
-    privateBits(std::unique_ptr<RowReaderOptionsPrivate>
-                (new RowReaderOptionsPrivate(*(rhs.privateBits.get())))) {
+  RowReaderOptions::RowReaderOptions(const RowReaderOptions& rhs)
+      : privateBits(std::make_unique<RowReaderOptionsPrivate>(*(rhs.privateBits.get()))) {
     // PASS
   }
 
@@ -195,8 +204,8 @@ namespace orc {
     return *this;
   }
 
-  RowReaderOptions&
-  RowReaderOptions::includeTypesWithIntents(const IdReadIntentMap& idReadIntentMap) {
+  RowReaderOptions& RowReaderOptions::includeTypesWithIntents(
+      const IdReadIntentMap& idReadIntentMap) {
     privateBits->selection = ColumnSelection_TYPE_IDS;
     privateBits->includedColumnIndexes.clear();
     privateBits->idReadIntentMap.clear();
@@ -242,7 +251,7 @@ namespace orc {
     return privateBits->dataLength;
   }
 
-  RowReaderOptions& RowReaderOptions::throwOnHive11DecimalOverflow(bool shouldThrow){
+  RowReaderOptions& RowReaderOptions::throwOnHive11DecimalOverflow(bool shouldThrow) {
     privateBits->throwOnHive11DecimalOverflow = shouldThrow;
     return *this;
   }
@@ -251,8 +260,16 @@ namespace orc {
     return privateBits->throwOnHive11DecimalOverflow;
   }
 
-  RowReaderOptions& RowReaderOptions::forcedScaleOnHive11Decimal(int32_t forcedScale
-                                                           ) {
+  RowReaderOptions& RowReaderOptions::throwOnSchemaEvolutionOverflow(bool shouldThrow) {
+    privateBits->throwOnSchemaEvolutionOverflow = shouldThrow;
+    return *this;
+  }
+
+  bool RowReaderOptions::getThrowOnSchemaEvolutionOverflow() const {
+    return privateBits->throwOnSchemaEvolutionOverflow;
+  }
+
+  RowReaderOptions& RowReaderOptions::forcedScaleOnHive11Decimal(int32_t forcedScale) {
     privateBits->forcedScaleOnHive11Decimal = forcedScale;
     return *this;
   }
@@ -288,10 +305,27 @@ namespace orc {
     return privateBits->readerTimezone;
   }
 
-  const RowReaderOptions::IdReadIntentMap
-  RowReaderOptions::getIdReadIntentMap() const {
+  const RowReaderOptions::IdReadIntentMap RowReaderOptions::getIdReadIntentMap() const {
     return privateBits->idReadIntentMap;
   }
-}
+
+  RowReaderOptions& RowReaderOptions::setUseTightNumericVector(bool useTightNumericVector) {
+    privateBits->useTightNumericVector = useTightNumericVector;
+    return *this;
+  }
+
+  bool RowReaderOptions::getUseTightNumericVector() const {
+    return privateBits->useTightNumericVector;
+  }
+
+  RowReaderOptions& RowReaderOptions::setReadType(std::shared_ptr<Type> type) {
+    privateBits->readType = std::move(type);
+    return *this;
+  }
+
+  std::shared_ptr<Type>& RowReaderOptions::getReadType() const {
+    return privateBits->readType;
+  }
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/src/OrcFile.cc b/contrib/libs/apache/orc/c++/src/OrcFile.cc
index a0158bbadf..d4b6a86e2f 100644
--- a/contrib/libs/apache/orc/c++/src/OrcFile.cc
+++ b/contrib/libs/apache/orc/c++/src/OrcFile.cc
@@ -16,15 +16,16 @@
  * limitations under the License.
  */
 
-#include "Adaptor.hh"
 #include "orc/OrcFile.hh"
+#include "Adaptor.hh"
+#include "Utils.hh"
 #include "orc/Exceptions.hh"
 
 #include <errno.h>
 #include <fcntl.h>
 #include <stdio.h>
-#include <sys/stat.h>
 #include <string.h>
+#include <sys/stat.h>
 
 #ifdef _MSC_VER
 #include <io.h>
@@ -32,6 +33,7 @@
 #define S_IWUSR _S_IWRITE
 #define stat _stat64
 #define fstat _fstat64
+#define fsync _commit
 #else
 #include <unistd.h>
 #define O_BINARY 0
@@ -39,15 +41,22 @@
 
 namespace orc {
 
+  DIAGNOSTIC_PUSH
+
+#ifdef __clang__
+  DIAGNOSTIC_IGNORE("-Wunused-private-field")
+#endif
+
   class FileInputStream : public InputStream {
-  private:
+   private:
     std::string filename;
     int file;
     uint64_t totalLength;
+    ReaderMetrics* metrics;
 
-  public:
-    FileInputStream(std::string _filename) {
-      filename = _filename;
+   public:
+    FileInputStream(std::string _filename, ReaderMetrics* _metrics)
+        : filename(_filename), metrics(_metrics) {
       file = open(filename.c_str(), O_BINARY | O_RDONLY);
       if (file == -1) {
         throw ParseError("Can't open " + filename);
@@ -69,9 +78,8 @@ namespace orc {
       return 128 * 1024;
     }
 
-    void read(void* buf,
-              uint64_t length,
-              uint64_t offset) override {
+    void read(void* buf, uint64_t length, uint64_t offset) override {
+      SCOPED_STOPWATCH(metrics, IOBlockingLatencyUs, IOCount);
       if (!buf) {
         throw ParseError("Buffer is null");
       }
@@ -94,42 +102,41 @@ namespace orc {
     close(file);
   }
 
-  std::unique_ptr<InputStream> readFile(const std::string& path) {
+  std::unique_ptr<InputStream> readFile(const std::string& path, ReaderMetrics* metrics) {
 #ifdef BUILD_LIBHDFSPP
-    if(strncmp (path.c_str(), "hdfs://", 7) == 0){
-      return orc::readHdfsFile(std::string(path));
+    if (strncmp(path.c_str(), "hdfs://", 7) == 0) {
+      return orc::readHdfsFile(std::string(path), metrics);
     } else {
 #endif
-      return orc::readLocalFile(std::string(path));
+      return orc::readLocalFile(std::string(path), metrics);
 #ifdef BUILD_LIBHDFSPP
-      }
+    }
 #endif
   }
 
-  std::unique_ptr<InputStream> readLocalFile(const std::string& path) {
-      return std::unique_ptr<InputStream>(new FileInputStream(path));
+  DIAGNOSTIC_POP
+
+  std::unique_ptr<InputStream> readLocalFile(const std::string& path, ReaderMetrics* metrics) {
+    return std::make_unique<FileInputStream>(path, metrics);
   }
 
-  OutputStream::~OutputStream() {
+  OutputStream::~OutputStream(){
       // PASS
   };
 
   class FileOutputStream : public OutputStream {
-  private:
+   private:
     std::string filename;
     int file;
     uint64_t bytesWritten;
     bool closed;
 
-  public:
+   public:
     FileOutputStream(std::string _filename) {
       bytesWritten = 0;
       filename = _filename;
       closed = false;
-      file = open(
-                  filename.c_str(),
-                  O_BINARY | O_CREAT | O_WRONLY | O_TRUNC,
-                  S_IRUSR | S_IWUSR);
+      file = open(filename.c_str(), O_BINARY | O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR);
       if (file == -1) {
         throw ParseError("Can't open " + filename);
       }
@@ -169,6 +176,12 @@ namespace orc {
         closed = true;
       }
     }
+
+    void flush() override {
+      if (!closed) {
+        ::fsync(file);
+      }
+    }
   };
 
   FileOutputStream::~FileOutputStream() {
@@ -179,6 +192,6 @@ namespace orc {
   }
 
   std::unique_ptr<OutputStream> writeLocalFile(const std::string& path) {
-    return std::unique_ptr<OutputStream>(new FileOutputStream(path));
+    return std::make_unique<FileOutputStream>(path);
   }
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/RLE.cc b/contrib/libs/apache/orc/c++/src/RLE.cc
index 21f9082216..89aca6a10e 100644
--- a/contrib/libs/apache/orc/c++/src/RLE.cc
+++ b/contrib/libs/apache/orc/c++/src/RLE.cc
@@ -1,20 +1,20 @@
 /**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #include "RLEv1.hh"
 #include "RLEv2.hh"
@@ -30,52 +30,53 @@ namespace orc {
     // PASS
   }
 
-  std::unique_ptr<RleEncoder> createRleEncoder
-                         (std::unique_ptr<BufferedOutputStream> output,
-                          bool isSigned,
-                          RleVersion version,
-                          MemoryPool&,
-                          bool alignedBitpacking) {
+  std::unique_ptr<RleEncoder> createRleEncoder(std::unique_ptr<BufferedOutputStream> output,
+                                               bool isSigned, RleVersion version, MemoryPool&,
+                                               bool alignedBitpacking) {
     switch (static_cast<int64_t>(version)) {
-    case RleVersion_1:
-      // We don't have std::make_unique() yet.
-      return std::unique_ptr<RleEncoder>(new RleEncoderV1(std::move(output),
-                                                          isSigned));
-    case RleVersion_2:
-      return std::unique_ptr<RleEncoder>(new RleEncoderV2(std::move(output),
-                                                            isSigned, alignedBitpacking));
-    default:
-      throw NotImplementedYet("Not implemented yet");
+      case RleVersion_1:
+        return std::make_unique<RleEncoderV1>(std::move(output), isSigned);
+      case RleVersion_2:
+        return std::make_unique<RleEncoderV2>(std::move(output), isSigned, alignedBitpacking);
+      default:
+        throw NotImplementedYet("Not implemented yet");
     }
   }
 
-  std::unique_ptr<RleDecoder> createRleDecoder
-                         (std::unique_ptr<SeekableInputStream> input,
-                          bool isSigned,
-                          RleVersion version,
-                          MemoryPool& pool) {
+  std::unique_ptr<RleDecoder> createRleDecoder(std::unique_ptr<SeekableInputStream> input,
+                                               bool isSigned, RleVersion version, MemoryPool& pool,
+                                               ReaderMetrics* metrics) {
     switch (static_cast<int64_t>(version)) {
-    case RleVersion_1:
-      // We don't have std::make_unique() yet.
-      return std::unique_ptr<RleDecoder>(new RleDecoderV1(std::move(input),
-                                                          isSigned));
-    case RleVersion_2:
-      return std::unique_ptr<RleDecoder>(new RleDecoderV2(std::move(input),
-                                                          isSigned, pool));
-    default:
-      throw NotImplementedYet("Not implemented yet");
+      case RleVersion_1:
+        return std::make_unique<RleDecoderV1>(std::move(input), isSigned, metrics);
+      case RleVersion_2:
+        return std::make_unique<RleDecoderV2>(std::move(input), isSigned, pool, metrics);
+      default:
+        throw NotImplementedYet("Not implemented yet");
     }
   }
 
-  void RleEncoder::add(const int64_t* data, uint64_t numValues,
-                         const char* notNull) {
+  template <typename T>
+  void RleEncoder::add(const T* data, uint64_t numValues, const char* notNull) {
     for (uint64_t i = 0; i < numValues; ++i) {
       if (!notNull || notNull[i]) {
-        write(data[i]);
+        write(static_cast<int64_t>(data[i]));
       }
     }
   }
 
+  void RleEncoder::add(const int64_t* data, uint64_t numValues, const char* notNull) {
+    add<int64_t>(data, numValues, notNull);
+  }
+
+  void RleEncoder::add(const int32_t* data, uint64_t numValues, const char* notNull) {
+    add<int32_t>(data, numValues, notNull);
+  }
+
+  void RleEncoder::add(const int16_t* data, uint64_t numValues, const char* notNull) {
+    add<int16_t>(data, numValues, notNull);
+  }
+
   void RleEncoder::writeVslong(int64_t val) {
     writeVulong((val << 1) ^ (val >> 63));
   }
@@ -96,7 +97,7 @@ namespace orc {
   void RleEncoder::writeByte(char c) {
     if (bufferPosition == bufferLength) {
       int addedSize = 0;
-      if (!outputStream->Next(reinterpret_cast<void **>(&buffer), &addedSize)) {
+      if (!outputStream->Next(reinterpret_cast<void**>(&buffer), &addedSize)) {
         throw std::bad_alloc();
       }
       bufferPosition = 0;
diff --git a/contrib/libs/apache/orc/c++/src/RLE.hh b/contrib/libs/apache/orc/c++/src/RLE.hh
index 6822bd812e..51f9b6f58a 100644
--- a/contrib/libs/apache/orc/c++/src/RLE.hh
+++ b/contrib/libs/apache/orc/c++/src/RLE.hh
@@ -35,20 +35,18 @@ namespace orc {
   }
 
   class RleEncoder {
-  public:
+   public:
     // must be non-inline!
     virtual ~RleEncoder();
 
-    RleEncoder(
-            std::unique_ptr<BufferedOutputStream> outStream,
-            bool hasSigned):
-            outputStream(std::move(outStream)),
-            bufferPosition(0),
-            bufferLength(0),
-            numLiterals(0),
-            isSigned(hasSigned),
-            buffer(nullptr){
-      //pass
+    RleEncoder(std::unique_ptr<BufferedOutputStream> outStream, bool hasSigned)
+        : outputStream(std::move(outStream)),
+          bufferPosition(0),
+          bufferLength(0),
+          numLiterals(0),
+          isSigned(hasSigned),
+          buffer(nullptr) {
+      // pass
     }
 
     /**
@@ -58,14 +56,19 @@ namespace orc {
      * @param notNull If the pointer is null, all values are read. If the
      *    pointer is not null, positions that are false are skipped.
      */
-    virtual void add(const int64_t* data, uint64_t numValues,
-                      const char* notNull);
+    template <typename T>
+    void add(const T* data, uint64_t numValues, const char* notNull);
 
+    virtual void add(const int64_t* data, uint64_t numValues, const char* notNull);
+
+    virtual void add(const int32_t* data, uint64_t numValues, const char* notNull);
+
+    virtual void add(const int16_t* data, uint64_t numValues, const char* notNull);
     /**
      * Get size of buffer used so far.
      */
     uint64_t getBufferSize() const {
-        return outputStream->getSize();
+      return outputStream->getSize();
     }
 
     /**
@@ -81,7 +84,7 @@ namespace orc {
 
     virtual void write(int64_t val) = 0;
 
-  protected:
+   protected:
     std::unique_ptr<BufferedOutputStream> outputStream;
     size_t bufferPosition;
     size_t bufferLength;
@@ -98,10 +101,14 @@ namespace orc {
   };
 
   class RleDecoder {
-  public:
+   public:
     // must be non-inline!
     virtual ~RleDecoder();
 
+    RleDecoder(ReaderMetrics* _metrics) : metrics(_metrics) {
+      // pass
+    }
+
     /**
      * Seek to a particular spot.
      */
@@ -119,8 +126,14 @@ namespace orc {
      * @param notNull If the pointer is null, all values are read. If the
      *    pointer is not null, positions that are false are skipped.
      */
-    virtual void next(int64_t* data, uint64_t numValues,
-                      const char* notNull) = 0;
+    virtual void next(int64_t* data, uint64_t numValues, const char* notNull) = 0;
+
+    virtual void next(int32_t* data, uint64_t numValues, const char* notNull) = 0;
+
+    virtual void next(int16_t* data, uint64_t numValues, const char* notNull) = 0;
+
+   protected:
+    ReaderMetrics* metrics;
   };
 
   /**
@@ -130,12 +143,9 @@ namespace orc {
    * @param version version of RLE decoding to do
    * @param pool memory pool to use for allocation
    */
-  std::unique_ptr<RleEncoder> createRleEncoder
-                         (std::unique_ptr<BufferedOutputStream> output,
-                          bool isSigned,
-                          RleVersion version,
-                          MemoryPool& pool,
-                          bool alignedBitpacking);
+  std::unique_ptr<RleEncoder> createRleEncoder(std::unique_ptr<BufferedOutputStream> output,
+                                               bool isSigned, RleVersion version, MemoryPool& pool,
+                                               bool alignedBitpacking);
 
   /**
    * Create an RLE decoder.
@@ -144,11 +154,9 @@ namespace orc {
    * @param version version of RLE decoding to do
    * @param pool memory pool to use for allocation
    */
-  std::unique_ptr<RleDecoder> createRleDecoder
-                      (std::unique_ptr<SeekableInputStream> input,
-                       bool isSigned,
-                       RleVersion version,
-                       MemoryPool& pool);
+  std::unique_ptr<RleDecoder> createRleDecoder(std::unique_ptr<SeekableInputStream> input,
+                                               bool isSigned, RleVersion version, MemoryPool& pool,
+                                               ReaderMetrics* metrics);
 
 }  // namespace orc
 
diff --git a/contrib/libs/apache/orc/c++/src/RLEV2Util.cc b/contrib/libs/apache/orc/c++/src/RLEV2Util.cc
index 12e2d057cd..be2c6e2875 100644
--- a/contrib/libs/apache/orc/c++/src/RLEV2Util.cc
+++ b/contrib/libs/apache/orc/c++/src/RLEV2Util.cc
@@ -1,19 +1,20 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
- * distributed with option work for additional information
- * regarding copyright ownership.  The ASF licenses option file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use option file except in compliance
+ * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 #include "RLEV2Util.hh"
@@ -21,50 +22,44 @@
 namespace orc {
 
   // Map FBS enum to bit width value.
-  const uint8_t FBSToBitWidthMap[FixedBitSizes::SIZE] = {
-    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
-    26, 28, 30, 32, 40, 48, 56, 64
-  };
+  const uint8_t FBSToBitWidthMap[FixedBitSizes::SIZE] = {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                                                         12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+                                                         23, 24, 26, 28, 30, 32, 40, 48, 56, 64};
 
   // Map bit length i to closest fixed bit width that can contain i bits.
   const uint8_t ClosestFixedBitsMap[65] = {
-    1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
-    26, 26, 28, 28, 30, 30, 32, 32,
-    40, 40, 40, 40, 40, 40, 40, 40,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    56, 56, 56, 56, 56, 56, 56, 56,
-    64, 64, 64, 64, 64, 64, 64, 64
-  };
+      1,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+      22, 23, 24, 26, 26, 28, 28, 30, 30, 32, 32, 40, 40, 40, 40, 40, 40, 40, 40, 48, 48, 48,
+      48, 48, 48, 48, 48, 56, 56, 56, 56, 56, 56, 56, 56, 64, 64, 64, 64, 64, 64, 64, 64};
 
   // Map bit length i to closest aligned fixed bit width that can contain i bits.
   const uint8_t ClosestAlignedFixedBitsMap[65] = {
-      1, 1, 2, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24, 24,
-      32, 32, 32, 32, 32, 32, 32, 32,
-      40, 40, 40, 40, 40, 40, 40, 40,
-      48, 48, 48, 48, 48, 48, 48, 48,
-      56, 56, 56, 56, 56, 56, 56, 56,
-      64, 64, 64, 64, 64, 64, 64, 64
-  };
+      1,  1,  2,  4,  4,  8,  8,  8,  8,  16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24,
+      24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 40, 40, 40, 48, 48, 48,
+      48, 48, 48, 48, 48, 56, 56, 56, 56, 56, 56, 56, 56, 64, 64, 64, 64, 64, 64, 64, 64};
 
   // Map bit width to FBS enum.
   const uint8_t BitWidthToFBSMap[65] = {
-      FixedBitSizes::ONE, FixedBitSizes::ONE, FixedBitSizes::TWO, FixedBitSizes::THREE, FixedBitSizes::FOUR,
-      FixedBitSizes::FIVE, FixedBitSizes::SIX, FixedBitSizes::SEVEN, FixedBitSizes::EIGHT,
-      FixedBitSizes::NINE, FixedBitSizes::TEN, FixedBitSizes::ELEVEN, FixedBitSizes::TWELVE,
-      FixedBitSizes::THIRTEEN, FixedBitSizes::FOURTEEN, FixedBitSizes::FIFTEEN, FixedBitSizes::SIXTEEN,
-      FixedBitSizes::SEVENTEEN, FixedBitSizes::EIGHTEEN, FixedBitSizes::NINETEEN, FixedBitSizes::TWENTY,
-      FixedBitSizes::TWENTYONE, FixedBitSizes::TWENTYTWO, FixedBitSizes::TWENTYTHREE, FixedBitSizes::TWENTYFOUR,
-      FixedBitSizes::TWENTYSIX, FixedBitSizes::TWENTYSIX,
-      FixedBitSizes::TWENTYEIGHT, FixedBitSizes::TWENTYEIGHT,
-      FixedBitSizes::THIRTY, FixedBitSizes::THIRTY,
-      FixedBitSizes::THIRTYTWO, FixedBitSizes::THIRTYTWO,
-      FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY,
-      FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY,
-      FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT,
-      FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT,
-      FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX,
-      FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX,
-      FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR,
-      FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR
-  };
-}
+      FixedBitSizes::ONE,         FixedBitSizes::ONE,         FixedBitSizes::TWO,
+      FixedBitSizes::THREE,       FixedBitSizes::FOUR,        FixedBitSizes::FIVE,
+      FixedBitSizes::SIX,         FixedBitSizes::SEVEN,       FixedBitSizes::EIGHT,
+      FixedBitSizes::NINE,        FixedBitSizes::TEN,         FixedBitSizes::ELEVEN,
+      FixedBitSizes::TWELVE,      FixedBitSizes::THIRTEEN,    FixedBitSizes::FOURTEEN,
+      FixedBitSizes::FIFTEEN,     FixedBitSizes::SIXTEEN,     FixedBitSizes::SEVENTEEN,
+      FixedBitSizes::EIGHTEEN,    FixedBitSizes::NINETEEN,    FixedBitSizes::TWENTY,
+      FixedBitSizes::TWENTYONE,   FixedBitSizes::TWENTYTWO,   FixedBitSizes::TWENTYTHREE,
+      FixedBitSizes::TWENTYFOUR,  FixedBitSizes::TWENTYSIX,   FixedBitSizes::TWENTYSIX,
+      FixedBitSizes::TWENTYEIGHT, FixedBitSizes::TWENTYEIGHT, FixedBitSizes::THIRTY,
+      FixedBitSizes::THIRTY,      FixedBitSizes::THIRTYTWO,   FixedBitSizes::THIRTYTWO,
+      FixedBitSizes::FORTY,       FixedBitSizes::FORTY,       FixedBitSizes::FORTY,
+      FixedBitSizes::FORTY,       FixedBitSizes::FORTY,       FixedBitSizes::FORTY,
+      FixedBitSizes::FORTY,       FixedBitSizes::FORTY,       FixedBitSizes::FORTYEIGHT,
+      FixedBitSizes::FORTYEIGHT,  FixedBitSizes::FORTYEIGHT,  FixedBitSizes::FORTYEIGHT,
+      FixedBitSizes::FORTYEIGHT,  FixedBitSizes::FORTYEIGHT,  FixedBitSizes::FORTYEIGHT,
+      FixedBitSizes::FORTYEIGHT,  FixedBitSizes::FIFTYSIX,    FixedBitSizes::FIFTYSIX,
+      FixedBitSizes::FIFTYSIX,    FixedBitSizes::FIFTYSIX,    FixedBitSizes::FIFTYSIX,
+      FixedBitSizes::FIFTYSIX,    FixedBitSizes::FIFTYSIX,    FixedBitSizes::FIFTYSIX,
+      FixedBitSizes::SIXTYFOUR,   FixedBitSizes::SIXTYFOUR,   FixedBitSizes::SIXTYFOUR,
+      FixedBitSizes::SIXTYFOUR,   FixedBitSizes::SIXTYFOUR,   FixedBitSizes::SIXTYFOUR,
+      FixedBitSizes::SIXTYFOUR,   FixedBitSizes::SIXTYFOUR};
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/RLEV2Util.hh b/contrib/libs/apache/orc/c++/src/RLEV2Util.hh
index 95a6826eaa..89c6913400 100644
--- a/contrib/libs/apache/orc/c++/src/RLEV2Util.hh
+++ b/contrib/libs/apache/orc/c++/src/RLEV2Util.hh
@@ -1,20 +1,20 @@
 /**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #ifndef ORC_RLEV2UTIL_HH
 #define ORC_RLEV2UTIL_HH
@@ -74,8 +74,8 @@ namespace orc {
   }
 
   inline uint32_t RleEncoderV2::getOpCode(EncodingType encoding) {
-    return static_cast<uint32_t >(encoding << 6);
+    return static_cast<uint32_t>(encoding << 6);
   }
-}
+}  // namespace orc
 
-#endif //ORC_RLEV2UTIL_HH
+#endif  // ORC_RLEV2UTIL_HH
diff --git a/contrib/libs/apache/orc/c++/src/RLEv1.cc b/contrib/libs/apache/orc/c++/src/RLEv1.cc
index fe333978db..b221e8b8aa 100644
--- a/contrib/libs/apache/orc/c++/src/RLEv1.cc
+++ b/contrib/libs/apache/orc/c++/src/RLEv1.cc
@@ -16,287 +16,295 @@
  * limitations under the License.
  */
 
+#include "RLEv1.hh"
 #include "Adaptor.hh"
 #include "Compression.hh"
+#include "Utils.hh"
 #include "orc/Exceptions.hh"
-#include "RLEv1.hh"
 
 #include <algorithm>
 
 namespace orc {
 
-const uint64_t MINIMUM_REPEAT = 3;
-const uint64_t MAXIMUM_REPEAT = 127 + MINIMUM_REPEAT;
+  const uint64_t MINIMUM_REPEAT = 3;
+  const uint64_t MAXIMUM_REPEAT = 127 + MINIMUM_REPEAT;
 
-const int64_t BASE_128_MASK = 0x7f;
+  const int64_t BASE_128_MASK = 0x7f;
 
-const int64_t MAX_DELTA = 127;
-const int64_t MIN_DELTA = -128;
-const uint64_t MAX_LITERAL_SIZE = 128;
+  const int64_t MAX_DELTA = 127;
+  const int64_t MIN_DELTA = -128;
+  const uint64_t MAX_LITERAL_SIZE = 128;
 
-RleEncoderV1::RleEncoderV1(
-                          std::unique_ptr<BufferedOutputStream> outStream,
-                          bool hasSigned):
-                          RleEncoder(std::move(outStream), hasSigned) {
-  literals = new int64_t[MAX_LITERAL_SIZE];
-  delta = 0;
-  repeat = false;
-  tailRunLength = 0;
-}
+  RleEncoderV1::RleEncoderV1(std::unique_ptr<BufferedOutputStream> outStream, bool hasSigned)
+      : RleEncoder(std::move(outStream), hasSigned) {
+    literals = new int64_t[MAX_LITERAL_SIZE];
+    delta = 0;
+    repeat = false;
+    tailRunLength = 0;
+  }
 
-RleEncoderV1::~RleEncoderV1() {
-  delete [] literals;
-}
+  RleEncoderV1::~RleEncoderV1() {
+    delete[] literals;
+  }
 
-void RleEncoderV1::writeValues() {
-  if (numLiterals != 0) {
-    if (repeat) {
-      writeByte(static_cast<char>
-                (static_cast<uint64_t>(numLiterals) - MINIMUM_REPEAT));
-      writeByte(static_cast<char>(delta));
-      if (isSigned) {
-        writeVslong(literals[0]);
-      } else {
-        writeVulong(literals[0]);
-      }
-    } else {
-      writeByte(static_cast<char>(-numLiterals));
-      for(size_t i=0; i < numLiterals; ++i) {
+  void RleEncoderV1::writeValues() {
+    if (numLiterals != 0) {
+      if (repeat) {
+        writeByte(static_cast<char>(static_cast<uint64_t>(numLiterals) - MINIMUM_REPEAT));
+        writeByte(static_cast<char>(delta));
         if (isSigned) {
-          writeVslong(literals[i]);
+          writeVslong(literals[0]);
         } else {
-          writeVulong(literals[i]);
+          writeVulong(literals[0]);
+        }
+      } else {
+        writeByte(static_cast<char>(-numLiterals));
+        for (size_t i = 0; i < numLiterals; ++i) {
+          if (isSigned) {
+            writeVslong(literals[i]);
+          } else {
+            writeVulong(literals[i]);
+          }
         }
       }
+      repeat = false;
+      numLiterals = 0;
+      tailRunLength = 0;
     }
-    repeat = false;
-    numLiterals = 0;
-    tailRunLength = 0;
   }
-}
 
-uint64_t RleEncoderV1::flush() {
-  writeValues();
-  outputStream->BackUp(static_cast<int>(bufferLength - bufferPosition));
-  uint64_t dataSize = outputStream->flush();
-  bufferLength = bufferPosition = 0;
-  return dataSize;
-}
+  uint64_t RleEncoderV1::flush() {
+    writeValues();
+    outputStream->BackUp(static_cast<int>(bufferLength - bufferPosition));
+    uint64_t dataSize = outputStream->flush();
+    bufferLength = bufferPosition = 0;
+    return dataSize;
+  }
 
-void RleEncoderV1::write(int64_t value) {
-  if (numLiterals == 0) {
-    literals[numLiterals++] = value;
-    tailRunLength = 1;
-  } else if (repeat) {
-    if (value == literals[0] + delta * static_cast<int64_t>(numLiterals)) {
-      numLiterals += 1;
-      if (numLiterals == MAXIMUM_REPEAT) {
-        writeValues();
-      }
-    } else {
-      writeValues();
+  void RleEncoderV1::write(int64_t value) {
+    if (numLiterals == 0) {
       literals[numLiterals++] = value;
       tailRunLength = 1;
-    }
-  } else {
-    if (tailRunLength == 1) {
-      delta = value - literals[numLiterals - 1];
-      if (delta < MIN_DELTA || delta > MAX_DELTA) {
-        tailRunLength = 1;
+    } else if (repeat) {
+      if (value == literals[0] + delta * static_cast<int64_t>(numLiterals)) {
+        numLiterals += 1;
+        if (numLiterals == MAXIMUM_REPEAT) {
+          writeValues();
+        }
       } else {
-        tailRunLength = 2;
+        writeValues();
+        literals[numLiterals++] = value;
+        tailRunLength = 1;
       }
-    } else if (value == literals[numLiterals - 1] + delta) {
-      tailRunLength += 1;
     } else {
-      delta = value - literals[numLiterals - 1];
-      if (delta < MIN_DELTA || delta > MAX_DELTA) {
-        tailRunLength = 1;
+      if (tailRunLength == 1) {
+        delta = value - literals[numLiterals - 1];
+        if (delta < MIN_DELTA || delta > MAX_DELTA) {
+          tailRunLength = 1;
+        } else {
+          tailRunLength = 2;
+        }
+      } else if (value == literals[numLiterals - 1] + delta) {
+        tailRunLength += 1;
       } else {
-        tailRunLength = 2;
+        delta = value - literals[numLiterals - 1];
+        if (delta < MIN_DELTA || delta > MAX_DELTA) {
+          tailRunLength = 1;
+        } else {
+          tailRunLength = 2;
+        }
       }
-    }
-    if (tailRunLength == MINIMUM_REPEAT) {
-      if (numLiterals + 1 == MINIMUM_REPEAT) {
-        repeat = true;
-        numLiterals += 1;
+      if (tailRunLength == MINIMUM_REPEAT) {
+        if (numLiterals + 1 == MINIMUM_REPEAT) {
+          repeat = true;
+          numLiterals += 1;
+        } else {
+          numLiterals -= static_cast<int>(MINIMUM_REPEAT - 1);
+          int64_t base = literals[numLiterals];
+          writeValues();
+          literals[0] = base;
+          repeat = true;
+          numLiterals = MINIMUM_REPEAT;
+        }
       } else {
-        numLiterals -= static_cast<int>(MINIMUM_REPEAT - 1);
-        int64_t base = literals[numLiterals];
-        writeValues();
-        literals[0] = base;
-        repeat = true;
-        numLiterals = MINIMUM_REPEAT;
+        literals[numLiterals++] = value;
+        if (numLiterals == MAX_LITERAL_SIZE) {
+          writeValues();
+        }
       }
-    } else {
-      literals[numLiterals++] = value;
-      if (numLiterals == MAX_LITERAL_SIZE) {
-        writeValues();
+    }
+  }
+
+  signed char RleDecoderV1::readByte() {
+    SCOPED_MINUS_STOPWATCH(metrics, DecodingLatencyUs);
+    if (bufferStart == bufferEnd) {
+      int bufferLength;
+      const void* bufferPointer;
+      if (!inputStream->Next(&bufferPointer, &bufferLength)) {
+        throw ParseError("bad read in readByte");
       }
+      bufferStart = static_cast<const char*>(bufferPointer);
+      bufferEnd = bufferStart + bufferLength;
     }
+    return static_cast<signed char>(*(bufferStart++));
   }
-}
 
-signed char RleDecoderV1::readByte() {
-  if (bufferStart == bufferEnd) {
-    int bufferLength;
-    const void* bufferPointer;
-    if (!inputStream->Next(&bufferPointer, &bufferLength)) {
-      throw ParseError("bad read in readByte");
+  uint64_t RleDecoderV1::readLong() {
+    uint64_t result = 0;
+    int64_t offset = 0;
+    signed char ch = readByte();
+    if (ch >= 0) {
+      result = static_cast<uint64_t>(ch);
+    } else {
+      result = static_cast<uint64_t>(ch) & BASE_128_MASK;
+      while ((ch = readByte()) < 0) {
+        offset += 7;
+        result |= (static_cast<uint64_t>(ch) & BASE_128_MASK) << offset;
+      }
+      result |= static_cast<uint64_t>(ch) << (offset + 7);
     }
-    bufferStart = static_cast<const char*>(bufferPointer);
-    bufferEnd = bufferStart + bufferLength;
+    return result;
   }
-  return *(bufferStart++);
-}
 
-uint64_t RleDecoderV1::readLong() {
-  uint64_t result = 0;
-  int64_t offset = 0;
-  signed char ch = readByte();
-  if (ch >= 0) {
-    result = static_cast<uint64_t>(ch);
-  } else {
-    result = static_cast<uint64_t>(ch) & BASE_128_MASK;
-    while ((ch = readByte()) < 0) {
-      offset += 7;
-      result |= (static_cast<uint64_t>(ch) & BASE_128_MASK) << offset;
+  void RleDecoderV1::skipLongs(uint64_t numValues) {
+    while (numValues > 0) {
+      if (readByte() >= 0) {
+        --numValues;
+      }
     }
-    result |= static_cast<uint64_t>(ch) << (offset + 7);
   }
-  return result;
-}
 
-void RleDecoderV1::skipLongs(uint64_t numValues) {
-  while (numValues > 0) {
-    if (readByte() >= 0) {
-      --numValues;
+  void RleDecoderV1::readHeader() {
+    signed char ch = readByte();
+    if (ch < 0) {
+      remainingValues = static_cast<uint64_t>(-ch);
+      repeating = false;
+    } else {
+      remainingValues = static_cast<uint64_t>(ch) + MINIMUM_REPEAT;
+      repeating = true;
+      delta = readByte();
+      value = isSigned ? unZigZag(readLong()) : static_cast<int64_t>(readLong());
     }
   }
-}
 
-void RleDecoderV1::readHeader() {
-  signed char ch = readByte();
-  if (ch < 0) {
-    remainingValues = static_cast<uint64_t>(-ch);
+  void RleDecoderV1::reset() {
+    remainingValues = 0;
+    value = 0;
+    bufferStart = nullptr;
+    bufferEnd = nullptr;
+    delta = 0;
     repeating = false;
-  } else {
-    remainingValues = static_cast<uint64_t>(ch) + MINIMUM_REPEAT;
-    repeating = true;
-    delta = readByte();
-    value = isSigned
-        ? unZigZag(readLong())
-        : static_cast<int64_t>(readLong());
   }
-}
-
-RleDecoderV1::RleDecoderV1(std::unique_ptr<SeekableInputStream> input,
-                           bool hasSigned)
-    : inputStream(std::move(input)),
-      isSigned(hasSigned),
-      remainingValues(0),
-      value(0),
-      bufferStart(nullptr),
-      bufferEnd(bufferStart),
-      delta(0),
-      repeating(false) {
-}
 
-void RleDecoderV1::seek(PositionProvider& location) {
-  // move the input stream
-  inputStream->seek(location);
-  // force a re-read from the stream
-  bufferEnd = bufferStart;
-  // read a new header
-  readHeader();
-  // skip ahead the given number of records
-  skip(location.next());
-}
+  RleDecoderV1::RleDecoderV1(std::unique_ptr<SeekableInputStream> input, bool hasSigned,
+                             ReaderMetrics* _metrics)
+      : RleDecoder(_metrics), inputStream(std::move(input)), isSigned(hasSigned) {
+    reset();
+  }
 
-void RleDecoderV1::skip(uint64_t numValues) {
-  while (numValues > 0) {
-    if (remainingValues == 0) {
-      readHeader();
-    }
-    uint64_t count = std::min(numValues, remainingValues);
-    remainingValues -= count;
-    numValues -= count;
-    if (repeating) {
-      value += delta * static_cast<int64_t>(count);
-    } else {
-      skipLongs(count);
-    }
+  void RleDecoderV1::seek(PositionProvider& location) {
+    // move the input stream
+    inputStream->seek(location);
+    // reset the decoder status and lazily call readHeader()
+    reset();
+    // skip ahead the given number of records
+    skip(location.next());
   }
-}
 
-void RleDecoderV1::next(int64_t* const data,
-                        const uint64_t numValues,
-                        const char* const notNull) {
-  uint64_t position = 0;
-  // skipNulls()
-  if (notNull) {
-    // Skip over null values.
-    while (position < numValues && !notNull[position]) {
-      ++position;
+  void RleDecoderV1::skip(uint64_t numValues) {
+    while (numValues > 0) {
+      if (remainingValues == 0) {
+        readHeader();
+      }
+      uint64_t count = std::min(numValues, remainingValues);
+      remainingValues -= count;
+      numValues -= count;
+      if (repeating) {
+        value += delta * static_cast<int64_t>(count);
+      } else {
+        skipLongs(count);
+      }
     }
   }
-  while (position < numValues) {
-    // If we are out of values, read more.
-    if (remainingValues == 0) {
-      readHeader();
+
+  template <typename T>
+  void RleDecoderV1::next(T* const data, const uint64_t numValues, const char* const notNull) {
+    SCOPED_STOPWATCH(metrics, DecodingLatencyUs, DecodingCall);
+    uint64_t position = 0;
+    // skipNulls()
+    if (notNull) {
+      // Skip over null values.
+      while (position < numValues && !notNull[position]) {
+        ++position;
+      }
     }
-    // How many do we read out of this block?
-    uint64_t count = std::min(numValues - position, remainingValues);
-    uint64_t consumed = 0;
-    if (repeating) {
-      if (notNull) {
-        for (uint64_t i = 0; i < count; ++i) {
-          if (notNull[position + i]) {
-            data[position + i] = value + static_cast<int64_t>(consumed) * delta;
-            consumed += 1;
-          }
-        }
-      } else {
-        for (uint64_t i = 0; i < count; ++i) {
-          data[position + i] = value + static_cast<int64_t>(i) * delta;
-        }
-        consumed = count;
+    while (position < numValues) {
+      // If we are out of values, read more.
+      if (remainingValues == 0) {
+        readHeader();
       }
-      value += static_cast<int64_t>(consumed) * delta;
-    } else {
-      if (notNull) {
-        for (uint64_t i = 0 ; i < count; ++i) {
-          if (notNull[position + i]) {
-            data[position + i] = isSigned
-                ? unZigZag(readLong())
-                : static_cast<int64_t>(readLong());
-            ++consumed;
+      // How many do we read out of this block?
+      uint64_t count = std::min(numValues - position, remainingValues);
+      uint64_t consumed = 0;
+      if (repeating) {
+        if (notNull) {
+          for (uint64_t i = 0; i < count; ++i) {
+            if (notNull[position + i]) {
+              data[position + i] = static_cast<T>(value + static_cast<int64_t>(consumed) * delta);
+              consumed += 1;
+            }
+          }
+        } else {
+          for (uint64_t i = 0; i < count; ++i) {
+            data[position + i] = static_cast<T>(value + static_cast<int64_t>(i) * delta);
           }
+          consumed = count;
         }
+        value += static_cast<int64_t>(consumed) * delta;
       } else {
-        if (isSigned) {
+        if (notNull) {
           for (uint64_t i = 0; i < count; ++i) {
-            data[position + i] = unZigZag(readLong());
+            if (notNull[position + i]) {
+              data[position + i] =
+                  isSigned ? static_cast<T>(unZigZag(readLong())) : static_cast<T>(readLong());
+              ++consumed;
+            }
           }
         } else {
-          for (uint64_t i = 0; i < count; ++i) {
-            data[position + i] = static_cast<int64_t>(readLong());
+          if (isSigned) {
+            for (uint64_t i = 0; i < count; ++i) {
+              data[position + i] = static_cast<T>(unZigZag(readLong()));
+            }
+          } else {
+            for (uint64_t i = 0; i < count; ++i) {
+              data[position + i] = static_cast<T>(readLong());
+            }
           }
+          consumed = count;
         }
-        consumed = count;
       }
-    }
-    remainingValues -= consumed;
-    position += count;
+      remainingValues -= consumed;
+      position += count;
 
-    // skipNulls()
-    if (notNull) {
-      // Skip over null values.
-      while (position < numValues && !notNull[position]) {
-        ++position;
+      // skipNulls()
+      if (notNull) {
+        // Skip over null values.
+        while (position < numValues && !notNull[position]) {
+          ++position;
+        }
       }
     }
   }
-}
 
+  void RleDecoderV1::next(int64_t* data, uint64_t numValues, const char* notNull) {
+    next<int64_t>(data, numValues, notNull);
+  }
+
+  void RleDecoderV1::next(int32_t* data, uint64_t numValues, const char* notNull) {
+    next<int32_t>(data, numValues, notNull);
+  }
+
+  void RleDecoderV1::next(int16_t* data, uint64_t numValues, const char* notNull) {
+    next<int16_t>(data, numValues, notNull);
+  }
 }  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/RLEv1.hh b/contrib/libs/apache/orc/c++/src/RLEv1.hh
index 8e31d70873..fbe6b0f9c6 100644
--- a/contrib/libs/apache/orc/c++/src/RLEv1.hh
+++ b/contrib/libs/apache/orc/c++/src/RLEv1.hh
@@ -1,20 +1,20 @@
 /**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #ifndef ORC_RLEV1_HH
 #define ORC_RLEV1_HH
@@ -26,11 +26,10 @@
 
 namespace orc {
 
-class RleEncoderV1 : public RleEncoder {
-public:
-    RleEncoderV1(std::unique_ptr<BufferedOutputStream> outStream,
-                 bool hasSigned);
-    ~RleEncoderV1() override ;
+  class RleEncoderV1 : public RleEncoder {
+   public:
+    RleEncoderV1(std::unique_ptr<BufferedOutputStream> outStream, bool hasSigned);
+    ~RleEncoderV1() override;
 
     /**
      * Flushing underlying BufferedOutputStream
@@ -39,36 +38,41 @@ public:
 
     void write(int64_t val) override;
 
-private:
+   private:
     int64_t delta;
     bool repeat;
     uint64_t tailRunLength;
 
     void writeValues();
-};
+  };
 
-class RleDecoderV1 : public RleDecoder {
-public:
-    RleDecoderV1(std::unique_ptr<SeekableInputStream> input,
-                 bool isSigned);
+  class RleDecoderV1 : public RleDecoder {
+   public:
+    RleDecoderV1(std::unique_ptr<SeekableInputStream> input, bool isSigned, ReaderMetrics* metrics);
 
     /**
-    * Seek to a particular spot.
-    */
+     * Seek to a particular spot.
+     */
     void seek(PositionProvider&) override;
 
     /**
-    * Seek over a given number of values.
-    */
+     * Seek over a given number of values.
+     */
     void skip(uint64_t numValues) override;
 
     /**
-    * Read a number of values into the batch.
-    */
-    void next(int64_t* data, uint64_t numValues,
-              const char* notNull) override;
+     * Read a number of values into the batch.
+     */
+    template <typename T>
+    void next(T* data, uint64_t numValues, const char* notNull);
+
+    void next(int64_t* data, uint64_t numValues, const char* notNull) override;
 
-private:
+    void next(int32_t* data, uint64_t numValues, const char* notNull) override;
+
+    void next(int16_t* data, uint64_t numValues, const char* notNull) override;
+
+   private:
     inline signed char readByte();
 
     inline void readHeader();
@@ -77,15 +81,17 @@ private:
 
     inline void skipLongs(uint64_t numValues);
 
+    inline void reset();
+
     const std::unique_ptr<SeekableInputStream> inputStream;
     const bool isSigned;
     uint64_t remainingValues;
     int64_t value;
-    const char *bufferStart;
-    const char *bufferEnd;
+    const char* bufferStart;
+    const char* bufferEnd;
     int64_t delta;
     bool repeating;
-};
+  };
 }  // namespace orc
 
 #endif  // ORC_RLEV1_HH
diff --git a/contrib/libs/apache/orc/c++/src/RLEv2.hh b/contrib/libs/apache/orc/c++/src/RLEv2.hh
index b1e68fb125..1cee59d0a6 100644
--- a/contrib/libs/apache/orc/c++/src/RLEv2.hh
+++ b/contrib/libs/apache/orc/c++/src/RLEv2.hh
@@ -1,27 +1,27 @@
 /**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #ifndef ORC_RLEV2_HH
 #define ORC_RLEV2_HH
 
 #include "Adaptor.hh"
-#include "orc/Exceptions.hh"
 #include "RLE.hh"
+#include "orc/Exceptions.hh"
 
 #include <vector>
 
@@ -30,46 +30,76 @@
 #define HIST_LEN 32
 namespace orc {
 
-struct FixedBitSizes {
+  struct FixedBitSizes {
     enum FBS {
-        ONE = 0, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, ELEVEN, TWELVE,
-        THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN,
-        TWENTY, TWENTYONE, TWENTYTWO, TWENTYTHREE, TWENTYFOUR, TWENTYSIX,
-        TWENTYEIGHT, THIRTY, THIRTYTWO, FORTY, FORTYEIGHT, FIFTYSIX, SIXTYFOUR, SIZE
+      ONE = 0,
+      TWO,
+      THREE,
+      FOUR,
+      FIVE,
+      SIX,
+      SEVEN,
+      EIGHT,
+      NINE,
+      TEN,
+      ELEVEN,
+      TWELVE,
+      THIRTEEN,
+      FOURTEEN,
+      FIFTEEN,
+      SIXTEEN,
+      SEVENTEEN,
+      EIGHTEEN,
+      NINETEEN,
+      TWENTY,
+      TWENTYONE,
+      TWENTYTWO,
+      TWENTYTHREE,
+      TWENTYFOUR,
+      TWENTYSIX,
+      TWENTYEIGHT,
+      THIRTY,
+      THIRTYTWO,
+      FORTY,
+      FORTYEIGHT,
+      FIFTYSIX,
+      SIXTYFOUR,
+      SIZE
     };
-};
-
-enum EncodingType { SHORT_REPEAT=0, DIRECT=1, PATCHED_BASE=2, DELTA=3 };
-
-struct EncodingOption {
-  EncodingType encoding;
-  int64_t fixedDelta;
-  int64_t gapVsPatchListCount;
-  int64_t zigzagLiteralsCount;
-  int64_t baseRedLiteralsCount;
-  int64_t adjDeltasCount;
-  uint32_t zzBits90p;
-  uint32_t zzBits100p;
-  uint32_t brBits95p;
-  uint32_t brBits100p;
-  uint32_t bitsDeltaMax;
-  uint32_t patchWidth;
-  uint32_t patchGapWidth;
-  uint32_t patchLength;
-  int64_t min;
-  bool isFixedDelta;
-};
-
-class RleEncoderV2 : public RleEncoder {
-public:
-    RleEncoderV2(std::unique_ptr<BufferedOutputStream> outStream, bool hasSigned, bool alignBitPacking = true);
+  };
+
+  enum EncodingType { SHORT_REPEAT = 0, DIRECT = 1, PATCHED_BASE = 2, DELTA = 3 };
+
+  struct EncodingOption {
+    EncodingType encoding;
+    int64_t fixedDelta;
+    int64_t gapVsPatchListCount;
+    int64_t zigzagLiteralsCount;
+    int64_t baseRedLiteralsCount;
+    int64_t adjDeltasCount;
+    uint32_t zzBits90p;
+    uint32_t zzBits100p;
+    uint32_t brBits95p;
+    uint32_t brBits100p;
+    uint32_t bitsDeltaMax;
+    uint32_t patchWidth;
+    uint32_t patchGapWidth;
+    uint32_t patchLength;
+    int64_t min;
+    bool isFixedDelta;
+  };
+
+  class RleEncoderV2 : public RleEncoder {
+   public:
+    RleEncoderV2(std::unique_ptr<BufferedOutputStream> outStream, bool hasSigned,
+                 bool alignBitPacking = true);
 
     ~RleEncoderV2() override {
-      delete [] literals;
-      delete [] gapVsPatchList;
-      delete [] zigzagLiterals;
-      delete [] baseRedLiterals;
-      delete [] adjDeltas;
+      delete[] literals;
+      delete[] gapVsPatchList;
+      delete[] zigzagLiterals;
+      delete[] baseRedLiterals;
+      delete[] adjDeltas;
     }
     /**
      * Flushing underlying BufferedOutputStream
@@ -78,20 +108,19 @@ public:
 
     void write(int64_t val) override;
 
-private:
-
+   private:
     const bool alignedBitPacking;
     uint32_t fixedRunLength;
     uint32_t variableRunLength;
     int64_t prevDelta;
     int32_t histgram[HIST_LEN];
 
-    // The four list below should actually belong to EncodingOption since it only holds temporal values in write(int64_t val),
-    // it is move here for performance consideration.
+    // The four list below should actually belong to EncodingOption since it only holds temporal
+    // values in write(int64_t val), it is move here for performance consideration.
     int64_t* gapVsPatchList;
-    int64_t*  zigzagLiterals;
-    int64_t*  baseRedLiterals;
-    int64_t*  adjDeltas;
+    int64_t* zigzagLiterals;
+    int64_t* baseRedLiterals;
+    int64_t* adjDeltas;
 
     uint32_t getOpCode(EncodingType encoding);
     int64_t* prepareForDirectOrPatchedBase(EncodingOption& option);
@@ -106,97 +135,155 @@ private:
     void writeDirectValues(EncodingOption& option);
     void writePatchedBasedValues(EncodingOption& option);
     void writeDeltaValues(EncodingOption& option);
-    uint32_t percentileBits(int64_t* data, size_t offset, size_t length, double p, bool reuseHist = false);
-};
-
-class RleDecoderV2 : public RleDecoder {
-public:
-  RleDecoderV2(std::unique_ptr<SeekableInputStream> input,
-               bool isSigned, MemoryPool& pool);
-
-  /**
-  * Seek to a particular spot.
-  */
-  void seek(PositionProvider&) override;
-
-  /**
-  * Seek over a given number of values.
-  */
-  void skip(uint64_t numValues) override;
-
-  /**
-  * Read a number of values into the batch.
-  */
-  void next(int64_t* data, uint64_t numValues,
-            const char* notNull) override;
-
-private:
-
-  /**
-   * Decode the next gap and patch from 'unpackedPatch' and update the index on it.
-   * Used by PATCHED_BASE.
-   *
-   * @param patchBitSize  bit size of the patch value
-   * @param patchMask     mask for the patch value
-   * @param resGap        result of gap
-   * @param resPatch      result of patch
-   * @param patchIdx      current index in the 'unpackedPatch' buffer
-   */
-  void adjustGapAndPatch(uint32_t patchBitSize, int64_t patchMask,
-                         int64_t* resGap, int64_t* resPatch, uint64_t* patchIdx);
-
-  void resetReadLongs() {
-    bitsLeft = 0;
-    curByte = 0;
-  }
+    uint32_t percentileBits(int64_t* data, size_t offset, size_t length, double p,
+                            bool reuseHist = false);
+  };
 
-  void resetRun() {
-    resetReadLongs();
-  }
+  class RleDecoderV2 : public RleDecoder {
+   public:
+    RleDecoderV2(std::unique_ptr<SeekableInputStream> input, bool isSigned, MemoryPool& pool,
+                 ReaderMetrics* metrics);
+
+    /**
+     * Seek to a particular spot.
+     */
+    void seek(PositionProvider&) override;
+
+    /**
+     * Seek over a given number of values.
+     */
+    void skip(uint64_t numValues) override;
+
+    /**
+     * Read a number of values into the batch.
+     */
+    template <typename T>
+    void next(T* data, uint64_t numValues, const char* notNull);
+
+    void next(int64_t* data, uint64_t numValues, const char* notNull) override;
+
+    void next(int32_t* data, uint64_t numValues, const char* notNull) override;
+
+    void next(int16_t* data, uint64_t numValues, const char* notNull) override;
+
+    unsigned char readByte();
+
+    void setBufStart(const char* start) {
+      bufferStart = const_cast<char*>(start);
+    }
 
-  unsigned char readByte();
-
-  int64_t readLongBE(uint64_t bsz);
-  int64_t readVslong();
-  uint64_t readVulong();
-  void readLongs(int64_t *data, uint64_t offset, uint64_t len, uint64_t fbs);
-  void plainUnpackLongs(int64_t *data, uint64_t offset, uint64_t len, uint64_t fbs);
-
-  void unrolledUnpack4(int64_t *data, uint64_t offset, uint64_t len);
-  void unrolledUnpack8(int64_t *data, uint64_t offset, uint64_t len);
-  void unrolledUnpack16(int64_t *data, uint64_t offset, uint64_t len);
-  void unrolledUnpack24(int64_t *data, uint64_t offset, uint64_t len);
-  void unrolledUnpack32(int64_t *data, uint64_t offset, uint64_t len);
-  void unrolledUnpack40(int64_t *data, uint64_t offset, uint64_t len);
-  void unrolledUnpack48(int64_t *data, uint64_t offset, uint64_t len);
-  void unrolledUnpack56(int64_t *data, uint64_t offset, uint64_t len);
-  void unrolledUnpack64(int64_t *data, uint64_t offset, uint64_t len);
-
-  uint64_t nextShortRepeats(int64_t* data, uint64_t offset, uint64_t numValues,
-                            const char* notNull);
-  uint64_t nextDirect(int64_t* data, uint64_t offset, uint64_t numValues,
-                      const char* notNull);
-  uint64_t nextPatched(int64_t* data, uint64_t offset, uint64_t numValues,
-                       const char* notNull);
-  uint64_t nextDelta(int64_t* data, uint64_t offset, uint64_t numValues,
-                     const char* notNull);
-
-  uint64_t copyDataFromBuffer(int64_t* data, uint64_t offset, uint64_t numValues,
-                              const char* notNull);
-
-  const std::unique_ptr<SeekableInputStream> inputStream;
-  const bool isSigned;
-
-  unsigned char firstByte;
-  uint64_t runLength; // Length of the current run
-  uint64_t runRead; // Number of returned values of the current run
-  const char *bufferStart;
-  const char *bufferEnd;
-  uint32_t bitsLeft; // Used by readLongs when bitSize < 8
-  uint32_t curByte; // Used by anything that uses readLongs
-  DataBuffer<int64_t> unpackedPatch; // Used by PATCHED_BASE
-  DataBuffer<int64_t> literals; // Values of the current run
-};
+    char* getBufStart() {
+      return bufferStart;
+    }
+
+    void setBufEnd(const char* end) {
+      bufferEnd = const_cast<char*>(end);
+    }
+
+    char* getBufEnd() {
+      return bufferEnd;
+    }
+
+    uint64_t bufLength() {
+      return bufferEnd - bufferStart;
+    }
+
+    void setBitsLeft(const uint32_t bits) {
+      bitsLeft = bits;
+    }
+
+    void setCurByte(const uint32_t byte) {
+      curByte = byte;
+    }
+
+    uint32_t getBitsLeft() {
+      return bitsLeft;
+    }
+
+    uint32_t getCurByte() {
+      return curByte;
+    }
+
+    /**
+     * Most hotspot of this function locates in saving stack, so inline this function to have
+     * performance gain.
+     */
+    inline void resetBufferStart(uint64_t len, bool resetBuf, uint32_t backupLen);
+
+   private:
+    /**
+     * Decode the next gap and patch from 'unpackedPatch' and update the index on it.
+     * Used by PATCHED_BASE.
+     *
+     * @param patchBitSize  bit size of the patch value
+     * @param patchMask     mask for the patch value
+     * @param resGap        result of gap
+     * @param resPatch      result of patch
+     * @param patchIdx      current index in the 'unpackedPatch' buffer
+     */
+    void adjustGapAndPatch(uint32_t patchBitSize, int64_t patchMask, int64_t* resGap,
+                           int64_t* resPatch, uint64_t* patchIdx);
+
+    void resetReadLongs() {
+      bitsLeft = 0;
+      curByte = 0;
+    }
+
+    void resetRun() {
+      resetReadLongs();
+    }
+
+    int64_t readLongBE(uint64_t bsz);
+    int64_t readVslong();
+    uint64_t readVulong();
+    void readLongs(int64_t* data, uint64_t offset, uint64_t len, uint64_t fbs);
+
+    template <typename T>
+    uint64_t nextShortRepeats(T* data, uint64_t offset, uint64_t numValues, const char* notNull);
+    template <typename T>
+    uint64_t nextDirect(T* data, uint64_t offset, uint64_t numValues, const char* notNull);
+    template <typename T>
+    uint64_t nextPatched(T* data, uint64_t offset, uint64_t numValues, const char* notNull);
+    template <typename T>
+    uint64_t nextDelta(T* data, uint64_t offset, uint64_t numValues, const char* notNull);
+    template <typename T>
+    uint64_t copyDataFromBuffer(T* data, uint64_t offset, uint64_t numValues, const char* notNull);
+
+    const std::unique_ptr<SeekableInputStream> inputStream;
+    const bool isSigned;
+    unsigned char firstByte;
+    char* bufferStart;
+    char* bufferEnd;
+    uint64_t runLength;                 // Length of the current run
+    uint64_t runRead;                   // Number of returned values of the current run
+    uint32_t bitsLeft;                  // Used by readLongs when bitSize < 8
+    uint32_t curByte;                   // Used by anything that uses readLongs
+    DataBuffer<int64_t> unpackedPatch;  // Used by PATCHED_BASE
+    DataBuffer<int64_t> literals;       // Values of the current run
+  };
+
+  inline void RleDecoderV2::resetBufferStart(uint64_t len, bool resetBuf, uint32_t backupByteLen) {
+    uint64_t remainingLen = bufLength();
+    int bufferLength = 0;
+    const void* bufferPointer = nullptr;
+
+    if (backupByteLen != 0) {
+      inputStream->BackUp(backupByteLen);
+    }
+
+    if (len >= remainingLen && resetBuf) {
+      if (!inputStream->Next(&bufferPointer, &bufferLength)) {
+        throw ParseError("bad read in RleDecoderV2::resetBufferStart");
+      }
+    }
+
+    if (bufferPointer == nullptr) {
+      bufferStart += len;
+    } else {
+      bufferStart = const_cast<char*>(static_cast<const char*>(bufferPointer));
+      bufferEnd = bufferStart + bufferLength;
+    }
+  }
 }  // namespace orc
 
 #endif  // ORC_RLEV2_HH
diff --git a/contrib/libs/apache/orc/c++/src/Reader.cc b/contrib/libs/apache/orc/c++/src/Reader.cc
index 6a9068f202..2cc88fbb80 100644
--- a/contrib/libs/apache/orc/c++/src/Reader.cc
+++ b/contrib/libs/apache/orc/c++/src/Reader.cc
@@ -16,43 +16,49 @@
  * limitations under the License.
  */
 
+#include "Reader.hh"
 #include "Adaptor.hh"
 #include "BloomFilter.hh"
 #include "Options.hh"
-#include "Reader.hh"
 #include "Statistics.hh"
 #include "StripeStream.hh"
+#include "Utils.hh"
 
 #include "wrap/coded-stream-wrapper.h"
 
 #include <algorithm>
 #include <iostream>
+#include <iterator>
 #include <memory>
+#include <set>
 #include <sstream>
 #include <string>
 #include <vector>
-#include <iterator>
-#include <set>
 
 namespace orc {
   // ORC files writen by these versions of cpp writers have inconsistent bloom filter
   // hashing. Bloom filters of them should not be used.
   static const char* BAD_CPP_BLOOM_FILTER_VERSIONS[] = {
-    "1.6.0", "1.6.1", "1.6.2", "1.6.3", "1.6.4", "1.6.5", "1.6.6", "1.6.7", "1.6.8",
-    "1.6.9", "1.6.10", "1.6.11", "1.7.0"};
+      "1.6.0", "1.6.1", "1.6.2", "1.6.3",  "1.6.4",  "1.6.5", "1.6.6",
+      "1.6.7", "1.6.8", "1.6.9", "1.6.10", "1.6.11", "1.7.0"};
+
+  ReaderMetrics* getDefaultReaderMetrics() {
+    static ReaderMetrics internal;
+    return &internal;
+  }
 
   const RowReaderOptions::IdReadIntentMap EMPTY_IDREADINTENTMAP() {
     return {};
   }
 
-  const WriterVersionImpl &WriterVersionImpl::VERSION_HIVE_8732() {
+  const WriterVersionImpl& WriterVersionImpl::VERSION_HIVE_8732() {
     static const WriterVersionImpl version(WriterVersion_HIVE_8732);
     return version;
   }
 
   uint64_t getCompressionBlockSize(const proto::PostScript& ps) {
-    if (ps.has_compressionblocksize()) {
-      return ps.compressionblocksize();
+    if (ps.has_compression_block_size()) {
+      return ps.compression_block_size();
     } else {
       return 256 * 1024;
     }
@@ -67,31 +73,29 @@ namespace orc {
   }
 
   std::string ColumnSelector::toDotColumnPath() {
-      if (columns.empty()) {
-          return std::string();
-      }
-      std::ostringstream columnStream;
-      std::copy(columns.begin(), columns.end(),
+    if (columns.empty()) {
+      return std::string();
+    }
+    std::ostringstream columnStream;
+    std::copy(columns.begin(), columns.end(),
               std::ostream_iterator<std::string>(columnStream, "."));
-      std::string columnPath = columnStream.str();
-      return columnPath.substr(0, columnPath.length() - 1);
+    std::string columnPath = columnStream.str();
+    return columnPath.substr(0, columnPath.length() - 1);
   }
 
-  WriterVersion getWriterVersionImpl(const FileContents * contents) {
-    if (!contents->postscript->has_writerversion()) {
+  WriterVersion getWriterVersionImpl(const FileContents* contents) {
+    if (!contents->postscript->has_writer_version()) {
       return WriterVersion_ORIGINAL;
     }
-    return static_cast<WriterVersion>(contents->postscript->writerversion());
+    return static_cast<WriterVersion>(contents->postscript->writer_version());
   }
 
   void ColumnSelector::selectChildren(std::vector<bool>& selectedColumns, const Type& type) {
     return selectChildren(selectedColumns, type, EMPTY_IDREADINTENTMAP());
   }
 
-  void ColumnSelector::selectChildren(
-      std::vector<bool> &selectedColumns,
-      const Type &type,
-      const RowReaderOptions::IdReadIntentMap& idReadIntentMap) {
+  void ColumnSelector::selectChildren(std::vector<bool>& selectedColumns, const Type& type,
+                                      const RowReaderOptions::IdReadIntentMap& idReadIntentMap) {
     size_t id = static_cast<size_t>(type.getColumnId());
     TypeKind kind = type.getKind();
     if (!selectedColumns[id]) {
@@ -99,8 +103,7 @@ namespace orc {
       bool selectChild = true;
       if (kind == TypeKind::LIST || kind == TypeKind::MAP || kind == TypeKind::UNION) {
         auto elem = idReadIntentMap.find(id);
-        if (elem != idReadIntentMap.end() &&
-            elem->second == ReadIntent_OFFSETS) {
+        if (elem != idReadIntentMap.end() && elem->second == ReadIntent_OFFSETS) {
           selectChild = false;
         }
       }
@@ -121,7 +124,7 @@ namespace orc {
     size_t id = static_cast<size_t>(type.getColumnId());
     bool result = selectedColumns[id];
     uint64_t numSubtypeSelected = 0;
-    for(uint64_t c=0; c < type.getSubtypeCount(); ++c) {
+    for (uint64_t c = 0; c < type.getSubtypeCount(); ++c) {
       if (selectParents(selectedColumns, *type.getSubtype(c))) {
         result = true;
         numSubtypeSelected++;
@@ -169,20 +172,19 @@ namespace orc {
                                       const RowReaderOptions& options) {
     selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), false);
     if (contents->schema->getKind() == STRUCT && options.getIndexesSet()) {
-      for(std::list<uint64_t>::const_iterator field = options.getInclude().begin();
-          field != options.getInclude().end(); ++field) {
+      for (std::list<uint64_t>::const_iterator field = options.getInclude().begin();
+           field != options.getInclude().end(); ++field) {
         updateSelectedByFieldId(selectedColumns, *field);
       }
     } else if (contents->schema->getKind() == STRUCT && options.getNamesSet()) {
-      for(std::list<std::string>::const_iterator field = options.getIncludeNames().begin();
-          field != options.getIncludeNames().end(); ++field) {
+      for (std::list<std::string>::const_iterator field = options.getIncludeNames().begin();
+           field != options.getIncludeNames().end(); ++field) {
         updateSelectedByName(selectedColumns, *field);
       }
     } else if (options.getTypeIdsSet()) {
-      const RowReaderOptions::IdReadIntentMap idReadIntentMap =
-          options.getIdReadIntentMap();
-      for(std::list<uint64_t>::const_iterator typeId = options.getInclude().begin();
-          typeId != options.getInclude().end(); ++typeId) {
+      const RowReaderOptions::IdReadIntentMap idReadIntentMap = options.getIdReadIntentMap();
+      for (std::list<uint64_t>::const_iterator typeId = options.getInclude().begin();
+           typeId != options.getInclude().end(); ++typeId) {
         updateSelectedByTypeId(selectedColumns, *typeId, idReadIntentMap);
       }
     } else {
@@ -190,7 +192,7 @@ namespace orc {
       std::fill(selectedColumns.begin(), selectedColumns.end(), true);
     }
     selectParents(selectedColumns, *contents->schema.get());
-    selectedColumns[0] = true; // column 0 is selected by default
+    selectedColumns[0] = true;  // column 0 is selected by default
   }
 
   void ColumnSelector::updateSelectedByFieldId(std::vector<bool>& selectedColumns,
@@ -210,16 +212,14 @@ namespace orc {
   }
 
   void ColumnSelector::updateSelectedByTypeId(
-      std::vector<bool> &selectedColumns,
-      uint64_t typeId,
+      std::vector<bool>& selectedColumns, uint64_t typeId,
       const RowReaderOptions::IdReadIntentMap& idReadIntentMap) {
     if (typeId < selectedColumns.size()) {
       const Type& type = *idTypeMap[typeId];
       selectChildren(selectedColumns, type, idReadIntentMap);
     } else {
       std::stringstream buffer;
-      buffer << "Invalid type id selected " << typeId << " out of "
-             << selectedColumns.size();
+      buffer << "Invalid type id selected " << typeId << " out of " << selectedColumns.size();
       throw ParseError(buffer.str());
     }
   }
@@ -242,36 +242,39 @@ namespace orc {
     }
   }
 
-  ColumnSelector::ColumnSelector(const FileContents* _contents): contents(_contents) {
+  ColumnSelector::ColumnSelector(const FileContents* _contents) : contents(_contents) {
     buildTypeNameIdMap(contents->schema.get());
   }
 
   RowReaderImpl::RowReaderImpl(std::shared_ptr<FileContents> _contents,
-                               const RowReaderOptions& opts
-                         ): localTimezone(getLocalTimezone()),
-                            contents(_contents),
-                            throwOnHive11DecimalOverflow(opts.getThrowOnHive11DecimalOverflow()),
-                            forcedScaleOnHive11Decimal(opts.getForcedScaleOnHive11Decimal()),
-                            footer(contents->footer.get()),
-                            firstRowOfStripe(*contents->pool, 0),
-                            enableEncodedBlock(opts.getEnableLazyDecoding()),
-                            readerTimezone(getTimezoneByName(opts.getTimezoneName())) {
+                               const RowReaderOptions& opts)
+      : localTimezone(getLocalTimezone()),
+        contents(_contents),
+        throwOnHive11DecimalOverflow(opts.getThrowOnHive11DecimalOverflow()),
+        forcedScaleOnHive11Decimal(opts.getForcedScaleOnHive11Decimal()),
+        footer(contents->footer.get()),
+        firstRowOfStripe(*contents->pool, 0),
+        enableEncodedBlock(opts.getEnableLazyDecoding()),
+        readerTimezone(getTimezoneByName(opts.getTimezoneName())),
+        schemaEvolution(opts.getReadType(), contents->schema.get()) {
     uint64_t numberOfStripes;
     numberOfStripes = static_cast<uint64_t>(footer->stripes_size());
     currentStripe = numberOfStripes;
     lastStripe = 0;
     currentRowInStripe = 0;
     rowsInCurrentStripe = 0;
+    numRowGroupsInStripeRange = 0;
+    useTightNumericVector = opts.getUseTightNumericVector();
+    throwOnSchemaEvolutionOverflow = opts.getThrowOnSchemaEvolutionOverflow();
     uint64_t rowTotal = 0;
 
     firstRowOfStripe.resize(numberOfStripes);
-    for(size_t i=0; i < numberOfStripes; ++i) {
+    for (size_t i = 0; i < numberOfStripes; ++i) {
       firstRowOfStripe[i] = rowTotal;
-      proto::StripeInformation stripeInfo =
-        footer->stripes(static_cast<int>(i));
-      rowTotal += stripeInfo.numberofrows();
+      proto::StripeInformation stripeInfo = footer->stripes(static_cast<int>(i));
+      rowTotal += stripeInfo.number_of_rows();
       bool isStripeInRange = stripeInfo.offset() >= opts.getOffset() &&
-        stripeInfo.offset() < opts.getOffset() + opts.getLength();
+                             stripeInfo.offset() < opts.getOffset() + opts.getLength();
       if (isStripeInRange) {
         if (i < currentStripe) {
           currentStripe = i;
@@ -279,28 +282,33 @@ namespace orc {
         if (i >= lastStripe) {
           lastStripe = i + 1;
         }
+        if (footer->row_index_stride() > 0) {
+          numRowGroupsInStripeRange +=
+              (stripeInfo.number_of_rows() + footer->row_index_stride() - 1) /
+              footer->row_index_stride();
+        }
       }
     }
     firstStripe = currentStripe;
+    processingStripe = lastStripe;
 
     if (currentStripe == 0) {
       previousRow = (std::numeric_limits<uint64_t>::max)();
     } else if (currentStripe == numberOfStripes) {
-      previousRow = footer->numberofrows();
+      previousRow = footer->number_of_rows();
     } else {
-      previousRow = firstRowOfStripe[firstStripe]-1;
+      previousRow = firstRowOfStripe[firstStripe] - 1;
     }
 
     ColumnSelector column_selector(contents.get());
     column_selector.updateSelected(selectedColumns, opts);
 
     // prepare SargsApplier if SearchArgument is available
-    if (opts.getSearchArgument() && footer->rowindexstride() > 0) {
+    if (opts.getSearchArgument() && footer->row_index_stride() > 0) {
       sargs = opts.getSearchArgument();
-      sargsApplier.reset(new SargsApplier(*contents->schema,
-                                          sargs.get(),
-                                          footer->rowindexstride(),
-                                          getWriterVersionImpl(_contents.get())));
+      sargsApplier.reset(
+          new SargsApplier(*contents->schema, sargs.get(), footer->row_index_stride(),
+                           getWriterVersionImpl(_contents.get()), contents->readerMetrics));
     }
 
     skipBloomFilters = hasBadBloomFilters();
@@ -314,9 +322,9 @@ namespace orc {
     // 1.6.x releases before 1.6.11 won't have it. On the other side, the C++ writer
     // supports writing bloom filters since 1.6.0. So files written by the C++ writer
     // and with 'softwareVersion' unset would have bad bloom filters.
-    if (!footer->has_softwareversion()) return true;
+    if (!footer->has_software_version()) return true;
 
-    const std::string &fullVersion = footer->softwareversion();
+    const std::string& fullVersion = footer->software_version();
     std::string version;
     // Deal with snapshot versions, e.g. 1.6.12-SNAPSHOT.
     if (fullVersion.find('-') != std::string::npos) {
@@ -324,7 +332,7 @@ namespace orc {
     } else {
       version = fullVersion;
     }
-    for (const char *v : BAD_CPP_BLOOM_FILTER_VERSIONS) {
+    for (const char* v : BAD_CPP_BLOOM_FILTER_VERSIONS) {
       if (version == v) {
         return true;
       }
@@ -346,8 +354,7 @@ namespace orc {
 
   const Type& RowReaderImpl::getSelectedType() const {
     if (selectedSchema.get() == nullptr) {
-      selectedSchema = buildSelectedType(contents->schema.get(),
-                                         selectedColumns);
+      selectedSchema = buildSelectedType(contents->schema.get(), selectedColumns);
     }
     return *(selectedSchema.get());
   }
@@ -369,49 +376,56 @@ namespace orc {
 
     // seeking past lastStripe
     uint64_t num_stripes = static_cast<uint64_t>(footer->stripes_size());
-    if ( (lastStripe == num_stripes
-            && rowNumber >= footer->numberofrows())  ||
-         (lastStripe < num_stripes
-            && rowNumber >= firstRowOfStripe[lastStripe])   ) {
+    if ((lastStripe == num_stripes && rowNumber >= footer->number_of_rows()) ||
+        (lastStripe < num_stripes && rowNumber >= firstRowOfStripe[lastStripe])) {
       currentStripe = num_stripes;
-      previousRow = footer->numberofrows();
+      previousRow = footer->number_of_rows();
       return;
     }
 
     uint64_t seekToStripe = 0;
-    while (seekToStripe+1 < lastStripe &&
-                  firstRowOfStripe[seekToStripe+1] <= rowNumber) {
+    while (seekToStripe + 1 < lastStripe && firstRowOfStripe[seekToStripe + 1] <= rowNumber) {
       seekToStripe++;
     }
 
     // seeking before the first stripe
     if (seekToStripe < firstStripe) {
       currentStripe = num_stripes;
-      previousRow = footer->numberofrows();
+      previousRow = footer->number_of_rows();
       return;
     }
 
-    currentStripe = seekToStripe;
-    currentRowInStripe = rowNumber - firstRowOfStripe[currentStripe];
     previousRow = rowNumber;
-    startNextStripe();
+    auto rowIndexStride = footer->row_index_stride();
+    if (!isCurrentStripeInited() || currentStripe != seekToStripe || rowIndexStride == 0 ||
+        currentStripeInfo.index_length() == 0) {
+      // current stripe is not initialized or
+      // target stripe is not current stripe or
+      // current stripe doesn't have row indexes
+      currentStripe = seekToStripe;
+      currentRowInStripe = rowNumber - firstRowOfStripe[currentStripe];
+      startNextStripe();
+      if (currentStripe >= lastStripe) {
+        return;
+      }
+    } else {
+      currentRowInStripe = rowNumber - firstRowOfStripe[currentStripe];
+      if (sargsApplier) {
+        // advance to selected row group if predicate pushdown is enabled
+        currentRowInStripe =
+            advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe,
+                                  footer->row_index_stride(), sargsApplier->getNextSkippedRows());
+      }
+    }
 
     uint64_t rowsToSkip = currentRowInStripe;
-    auto rowIndexStride = footer->rowindexstride();
     // seek to the target row group if row indexes exists
-    if (rowIndexStride > 0 && currentStripeInfo.indexlength() > 0) {
-      // when predicate push down is enabled, above call to startNextStripe()
-      // will move current row to 1st matching row group; here we only need
-      // to deal with the case when PPD is not enabled.
-      if (!sargsApplier) {
-        if (rowIndexes.empty()) {
-          loadStripeIndex();
-        }
-        auto rowGroupId = static_cast<uint32_t>(rowsToSkip / rowIndexStride);
-        if (rowGroupId != 0) {
-          seekToRowGroup(rowGroupId);
-        }
+    if (rowIndexStride > 0 && currentStripeInfo.index_length() > 0) {
+      if (rowIndexes.empty()) {
+        loadStripeIndex();
       }
+      // TODO(ORC-1175): process the failures of loadStripeIndex() call
+      seekToRowGroup(static_cast<uint32_t>(rowsToSkip / rowIndexStride));
       // skip leading rows in the target row group
       rowsToSkip %= rowIndexStride;
     }
@@ -432,19 +446,14 @@ namespace orc {
     for (int i = 0; i < currentStripeFooter.streams_size(); ++i) {
       const proto::Stream& pbStream = currentStripeFooter.streams(i);
       uint64_t colId = pbStream.column();
-      if (selectedColumns[colId] && pbStream.has_kind()
-          && (pbStream.kind() == proto::Stream_Kind_ROW_INDEX ||
-              pbStream.kind() == proto::Stream_Kind_BLOOM_FILTER_UTF8)) {
-        std::unique_ptr<SeekableInputStream> inStream =
-          createDecompressor(getCompression(),
-                             std::unique_ptr<SeekableInputStream>
-                               (new SeekableFileInputStream
-                                  (contents->stream.get(),
-                                   offset,
-                                   pbStream.length(),
-                                   *contents->pool)),
-                             getCompressionSize(),
-                             *contents->pool);
+      if (selectedColumns[colId] && pbStream.has_kind() &&
+          (pbStream.kind() == proto::Stream_Kind_ROW_INDEX ||
+           pbStream.kind() == proto::Stream_Kind_BLOOM_FILTER_UTF8)) {
+        std::unique_ptr<SeekableInputStream> inStream = createDecompressor(
+            getCompression(),
+            std::unique_ptr<SeekableInputStream>(new SeekableFileInputStream(
+                contents->stream.get(), offset, pbStream.length(), *contents->pool)),
+            getCompressionSize(), *contents->pool, contents->readerMetrics);
 
         if (pbStream.kind() == proto::Stream_Kind_ROW_INDEX) {
           proto::RowIndex rowIndex;
@@ -452,17 +461,16 @@ namespace orc {
             throw ParseError("Failed to parse the row index");
           }
           rowIndexes[colId] = rowIndex;
-        } else if (!skipBloomFilters) { // Stream_Kind_BLOOM_FILTER_UTF8
+        } else if (!skipBloomFilters) {  // Stream_Kind_BLOOM_FILTER_UTF8
           proto::BloomFilterIndex pbBFIndex;
           if (!pbBFIndex.ParseFromZeroCopyStream(inStream.get())) {
             throw ParseError("Failed to parse bloom filter index");
           }
           BloomFilterIndex bfIndex;
-          for (int j = 0; j < pbBFIndex.bloomfilter_size(); j++) {
+          for (int j = 0; j < pbBFIndex.bloom_filter_size(); j++) {
             bfIndex.entries.push_back(BloomFilterUTF8Utils::deserialize(
-              pbStream.kind(),
-              currentStripeFooter.columns(static_cast<int>(pbStream.column())),
-              pbBFIndex.bloomfilter(j)));
+                pbStream.kind(), currentStripeFooter.columns(static_cast<int>(pbStream.column())),
+                pbBFIndex.bloom_filter(j)));
           }
           // add bloom filters to result for one column
           bloomFilterIndex[pbStream.column()] = bfIndex;
@@ -478,11 +486,10 @@ namespace orc {
     // store position providers for selected colimns
     std::unordered_map<uint64_t, PositionProvider> positionProviders;
 
-    for (auto rowIndex = rowIndexes.cbegin();
-         rowIndex != rowIndexes.cend(); ++rowIndex) {
+    for (auto rowIndex = rowIndexes.cbegin(); rowIndex != rowIndexes.cend(); ++rowIndex) {
       uint64_t colId = rowIndex->first;
       const proto::RowIndexEntry& entry =
-        rowIndex->second.entry(static_cast<int32_t>(rowGroupEntryId));
+          rowIndex->second.entry(static_cast<int32_t>(rowGroupEntryId));
 
       // copy index positions for a specific column
       positions.push_back({});
@@ -514,22 +521,16 @@ namespace orc {
 
   proto::StripeFooter getStripeFooter(const proto::StripeInformation& info,
                                       const FileContents& contents) {
-    uint64_t stripeFooterStart = info.offset() + info.indexlength() +
-      info.datalength();
-    uint64_t stripeFooterLength = info.footerlength();
-    std::unique_ptr<SeekableInputStream> pbStream =
-      createDecompressor(contents.compression,
-                         std::unique_ptr<SeekableInputStream>
-                         (new SeekableFileInputStream(contents.stream.get(),
-                                                      stripeFooterStart,
-                                                      stripeFooterLength,
-                                                      *contents.pool)),
-                         contents.blockSize,
-                         *contents.pool);
+    uint64_t stripeFooterStart = info.offset() + info.index_length() + info.data_length();
+    uint64_t stripeFooterLength = info.footer_length();
+    std::unique_ptr<SeekableInputStream> pbStream = createDecompressor(
+        contents.compression,
+        std::make_unique<SeekableFileInputStream>(contents.stream.get(), stripeFooterStart,
+                                                  stripeFooterLength, *contents.pool),
+        contents.blockSize, *contents.pool, contents.readerMetrics);
     proto::StripeFooter result;
     if (!result.ParseFromZeroCopyStream(pbStream.get())) {
-      throw ParseError(std::string("bad StripeFooter from ") +
-                       pbStream->getName());
+      throw ParseError(std::string("bad StripeFooter from ") + pbStream->getName());
     }
     // Verify StripeFooter in case it's corrupt
     if (result.columns_size() != contents.footer->types_size()) {
@@ -541,31 +542,29 @@ namespace orc {
     return result;
   }
 
-  ReaderImpl::ReaderImpl(std::shared_ptr<FileContents> _contents,
-                         const ReaderOptions& opts,
-                         uint64_t _fileLength,
-                         uint64_t _postscriptLength
-                         ): contents(std::move(_contents)),
-                            options(opts),
-                            fileLength(_fileLength),
-                            postscriptLength(_postscriptLength),
-                            footer(contents->footer.get()) {
+  ReaderImpl::ReaderImpl(std::shared_ptr<FileContents> _contents, const ReaderOptions& opts,
+                         uint64_t _fileLength, uint64_t _postscriptLength)
+      : contents(std::move(_contents)),
+        options(opts),
+        fileLength(_fileLength),
+        postscriptLength(_postscriptLength),
+        footer(contents->footer.get()) {
     isMetadataLoaded = false;
     checkOrcVersion();
     numberOfStripes = static_cast<uint64_t>(footer->stripes_size());
-    contents->schema = REDUNDANT_MOVE(convertType(footer->types(0), *footer));
+    contents->schema = convertType(footer->types(0), *footer);
     contents->blockSize = getCompressionBlockSize(*contents->postscript);
-    contents->compression= convertCompressionKind(*contents->postscript);
+    contents->compression = convertCompressionKind(*contents->postscript);
   }
 
   std::string ReaderImpl::getSerializedFileTail() const {
     proto::FileTail tail;
-    proto::PostScript *mutable_ps = tail.mutable_postscript();
+    proto::PostScript* mutable_ps = tail.mutable_postscript();
     mutable_ps->CopyFrom(*contents->postscript);
-    proto::Footer *mutableFooter = tail.mutable_footer();
+    proto::Footer* mutableFooter = tail.mutable_footer();
     mutableFooter->CopyFrom(*footer);
-    tail.set_filelength(fileLength);
-    tail.set_postscriptlength(postscriptLength);
+    tail.set_file_length(fileLength);
+    tail.set_postscript_length(postscriptLength);
     TString result;
     if (!tail.SerializeToString(&result)) {
       throw ParseError("Failed to serialize file tail");
@@ -593,29 +592,21 @@ namespace orc {
     if (!isMetadataLoaded) {
       readMetadata();
     }
-    return contents->metadata == nullptr ? 0 :
-      static_cast<uint64_t>(contents->metadata->stripestats_size());
+    return contents->metadata == nullptr
+               ? 0
+               : static_cast<uint64_t>(contents->metadata->stripe_stats_size());
   }
 
-  std::unique_ptr<StripeInformation>
-  ReaderImpl::getStripe(uint64_t stripeIndex) const {
+  std::unique_ptr<StripeInformation> ReaderImpl::getStripe(uint64_t stripeIndex) const {
     if (stripeIndex > getNumberOfStripes()) {
       throw std::logic_error("stripe index out of range");
     }
-    proto::StripeInformation stripeInfo =
-      footer->stripes(static_cast<int>(stripeIndex));
+    proto::StripeInformation stripeInfo = footer->stripes(static_cast<int>(stripeIndex));
 
-    return std::unique_ptr<StripeInformation>
-      (new StripeInformationImpl
-       (stripeInfo.offset(),
-        stripeInfo.indexlength(),
-        stripeInfo.datalength(),
-        stripeInfo.footerlength(),
-        stripeInfo.numberofrows(),
-        contents->stream.get(),
-        *contents->pool,
-        contents->compression,
-        contents->blockSize));
+    return std::unique_ptr<StripeInformation>(new StripeInformationImpl(
+        stripeInfo.offset(), stripeInfo.index_length(), stripeInfo.data_length(),
+        stripeInfo.footer_length(), stripeInfo.number_of_rows(), contents->stream.get(),
+        *contents->pool, contents->compression, contents->blockSize, contents->readerMetrics));
   }
 
   FileVersion ReaderImpl::getFormatVersion() const {
@@ -626,16 +617,16 @@ namespace orc {
   }
 
   uint64_t ReaderImpl::getNumberOfRows() const {
-    return footer->numberofrows();
+    return footer->number_of_rows();
   }
 
   WriterId ReaderImpl::getWriterId() const {
     if (footer->has_writer()) {
       uint32_t id = footer->writer();
-      if (id > WriterId::TRINO_WRITER) {
+      if (id > WriterId::CUDF_WRITER) {
         return WriterId::UNKNOWN_WRITER;
       } else {
-	return static_cast<WriterId>(id);
+        return static_cast<WriterId>(id);
       }
     }
     return WriterId::ORC_JAVA_WRITER;
@@ -652,8 +643,8 @@ namespace orc {
   std::string ReaderImpl::getSoftwareVersion() const {
     std::ostringstream buffer;
     buffer << writerIdToString(getWriterIdValue());
-    if (footer->has_softwareversion()) {
-      buffer << " " << footer->softwareversion();
+    if (footer->has_software_version()) {
+      buffer << " " << footer->software_version();
     }
     return buffer.str();
   }
@@ -663,15 +654,15 @@ namespace orc {
   }
 
   uint64_t ReaderImpl::getContentLength() const {
-    return footer->contentlength();
+    return footer->content_length();
   }
 
   uint64_t ReaderImpl::getStripeStatisticsLength() const {
-    return contents->postscript->metadatalength();
+    return contents->postscript->metadata_length();
   }
 
   uint64_t ReaderImpl::getFileFooterLength() const {
-    return contents->postscript->footerlength();
+    return contents->postscript->footer_length();
   }
 
   uint64_t ReaderImpl::getFilePostscriptLength() const {
@@ -683,7 +674,7 @@ namespace orc {
   }
 
   uint64_t ReaderImpl::getRowIndexStride() const {
-    return footer->rowindexstride();
+    return footer->row_index_stride();
   }
 
   const std::string& ReaderImpl::getStreamName() const {
@@ -692,14 +683,14 @@ namespace orc {
 
   std::list<std::string> ReaderImpl::getMetadataKeys() const {
     std::list<std::string> result;
-    for(int i=0; i < footer->metadata_size(); ++i) {
+    for (int i = 0; i < footer->metadata_size(); ++i) {
       result.push_back(footer->metadata(i).name());
     }
     return result;
   }
 
   std::string ReaderImpl::getMetadataValue(const std::string& key) const {
-    for(int i=0; i < footer->metadata_size(); ++i) {
+    for (int i = 0; i < footer->metadata_size(); ++i) {
       if (footer->metadata(i).name() == TString(key)) {
         return footer->metadata(i).value();
       }
@@ -707,12 +698,13 @@ namespace orc {
     throw std::range_error("key not found");
   }
 
-  void ReaderImpl::getRowIndexStatistics(const proto::StripeInformation& stripeInfo,
-      uint64_t stripeIndex, const proto::StripeFooter& currentStripeFooter,
-      std::vector<std::vector<proto::ColumnStatistics> >* indexStats) const {
+  void ReaderImpl::getRowIndexStatistics(
+      const proto::StripeInformation& stripeInfo, uint64_t stripeIndex,
+      const proto::StripeFooter& currentStripeFooter,
+      std::vector<std::vector<proto::ColumnStatistics>>* indexStats) const {
     int num_streams = currentStripeFooter.streams_size();
     uint64_t offset = stripeInfo.offset();
-    uint64_t indexEnd = stripeInfo.offset() + stripeInfo.indexlength();
+    uint64_t indexEnd = stripeInfo.offset() + stripeInfo.index_length();
     for (int i = 0; i < num_streams; i++) {
       const proto::Stream& stream = currentStripeFooter.streams(i);
       StreamKind streamKind = static_cast<StreamKind>(stream.kind());
@@ -722,19 +714,15 @@ namespace orc {
           std::stringstream msg;
           msg << "Malformed RowIndex stream meta in stripe " << stripeIndex
               << ": streamOffset=" << offset << ", streamLength=" << length
-              << ", stripeOffset=" << stripeInfo.offset() << ", stripeIndexLength="
-              << stripeInfo.indexlength();
+              << ", stripeOffset=" << stripeInfo.offset()
+              << ", stripeIndexLength=" << stripeInfo.index_length();
           throw ParseError(msg.str());
         }
         std::unique_ptr<SeekableInputStream> pbStream =
-          createDecompressor(contents->compression,
-                  std::unique_ptr<SeekableInputStream>
-                  (new SeekableFileInputStream(contents->stream.get(),
-                                                offset,
-                                                length,
-                                                *contents->pool)),
-                  contents->blockSize,
-                  *(contents->pool));
+            createDecompressor(contents->compression,
+                               std::unique_ptr<SeekableInputStream>(new SeekableFileInputStream(
+                                   contents->stream.get(), offset, length, *contents->pool)),
+                               contents->blockSize, *(contents->pool), contents->readerMetrics);
 
         proto::RowIndex rowIndex;
         if (!rowIndex.ParseFromZeroCopyStream(pbStream.get())) {
@@ -752,7 +740,7 @@ namespace orc {
   }
 
   bool ReaderImpl::hasMetadataValue(const std::string& key) const {
-    for(int i=0; i < footer->metadata_size(); ++i) {
+    for (int i = 0; i < footer->metadata_size(); ++i) {
       if (footer->metadata(i).name() == TString(key)) {
         return true;
       }
@@ -764,8 +752,7 @@ namespace orc {
     return *(contents->schema.get());
   }
 
-  std::unique_ptr<StripeStatistics>
-  ReaderImpl::getStripeStatistics(uint64_t stripeIndex) const {
+  std::unique_ptr<StripeStatistics> ReaderImpl::getStripeStatistics(uint64_t stripeIndex) const {
     if (!isMetadataLoaded) {
       readMetadata();
     }
@@ -773,48 +760,40 @@ namespace orc {
       throw std::logic_error("No stripe statistics in file");
     }
     size_t num_cols = static_cast<size_t>(
-                          contents->metadata->stripestats(
-                              static_cast<int>(stripeIndex)).colstats_size());
-    std::vector<std::vector<proto::ColumnStatistics> > indexStats(num_cols);
+        contents->metadata->stripe_stats(static_cast<int>(stripeIndex)).col_stats_size());
+    std::vector<std::vector<proto::ColumnStatistics>> indexStats(num_cols);
 
-    proto::StripeInformation currentStripeInfo =
-        footer->stripes(static_cast<int>(stripeIndex));
-    proto::StripeFooter currentStripeFooter =
-        getStripeFooter(currentStripeInfo, *contents.get());
+    proto::StripeInformation currentStripeInfo = footer->stripes(static_cast<int>(stripeIndex));
+    proto::StripeFooter currentStripeFooter = getStripeFooter(currentStripeInfo, *contents.get());
 
     getRowIndexStatistics(currentStripeInfo, stripeIndex, currentStripeFooter, &indexStats);
 
-    const Timezone& writerTZ =
-      currentStripeFooter.has_writertimezone() ?
-        getTimezoneByName(currentStripeFooter.writertimezone()) :
-        getLocalTimezone();
+    const Timezone& writerTZ = currentStripeFooter.has_writer_timezone()
+                                   ? getTimezoneByName(currentStripeFooter.writer_timezone())
+                                   : getLocalTimezone();
     StatContext statContext(hasCorrectStatistics(), &writerTZ);
-    return std::unique_ptr<StripeStatistics>
-           (new StripeStatisticsImpl(contents->metadata->stripestats(static_cast<int>(stripeIndex)),
-                                                   indexStats, statContext));
+    return std::make_unique<StripeStatisticsImpl>(
+        contents->metadata->stripe_stats(static_cast<int>(stripeIndex)), indexStats, statContext);
   }
 
   std::unique_ptr<Statistics> ReaderImpl::getStatistics() const {
     StatContext statContext(hasCorrectStatistics());
-    return std::unique_ptr<Statistics>
-      (new StatisticsImpl(*footer, statContext));
+    return std::make_unique<StatisticsImpl>(*footer, statContext);
   }
 
-  std::unique_ptr<ColumnStatistics>
-  ReaderImpl::getColumnStatistics(uint32_t index) const {
+  std::unique_ptr<ColumnStatistics> ReaderImpl::getColumnStatistics(uint32_t index) const {
     if (index >= static_cast<uint64_t>(footer->statistics_size())) {
       throw std::logic_error("column index out of range");
     }
-    proto::ColumnStatistics col =
-      footer->statistics(static_cast<int32_t>(index));
+    proto::ColumnStatistics col = footer->statistics(static_cast<int32_t>(index));
 
     StatContext statContext(hasCorrectStatistics());
-    return std::unique_ptr<ColumnStatistics> (convertColumnStatistics(col, statContext));
+    return std::unique_ptr<ColumnStatistics>(convertColumnStatistics(col, statContext));
   }
 
   void ReaderImpl::readMetadata() const {
-    uint64_t metadataSize = contents->postscript->metadatalength();
-    uint64_t footerLength = contents->postscript->footerlength();
+    uint64_t metadataSize = contents->postscript->metadata_length();
+    uint64_t footerLength = contents->postscript->footer_length();
     if (fileLength < metadataSize + footerLength + postscriptLength + 1) {
       std::stringstream msg;
       msg << "Invalid Metadata length: fileLength=" << fileLength
@@ -824,15 +803,11 @@ namespace orc {
     }
     uint64_t metadataStart = fileLength - metadataSize - footerLength - postscriptLength - 1;
     if (metadataSize != 0) {
-      std::unique_ptr<SeekableInputStream> pbStream =
-        createDecompressor(contents->compression,
-                           std::unique_ptr<SeekableInputStream>
-                             (new SeekableFileInputStream(contents->stream.get(),
-                                                          metadataStart,
-                                                          metadataSize,
-                                                          *contents->pool)),
-                           contents->blockSize,
-                           *contents->pool);
+      std::unique_ptr<SeekableInputStream> pbStream = createDecompressor(
+          contents->compression,
+          std::make_unique<SeekableFileInputStream>(contents->stream.get(), metadataStart,
+                                                    metadataSize, *contents->pool),
+          contents->blockSize, *contents->pool, contents->readerMetrics);
       contents->metadata.reset(new proto::Metadata());
       if (!contents->metadata->ParseFromZeroCopyStream(pbStream.get())) {
         throw ParseError("Failed to parse the metadata");
@@ -848,10 +823,9 @@ namespace orc {
   void ReaderImpl::checkOrcVersion() {
     FileVersion version = getFormatVersion();
     if (version != FileVersion(0, 11) && version != FileVersion(0, 12)) {
-      *(options.getErrorStream())
-        << "Warning: ORC file " << contents->stream->getName()
-        << " was written in an unknown format version "
-        << version.toString() << "\n";
+      *(options.getErrorStream()) << "Warning: ORC file " << contents->stream->getName()
+                                  << " was written in an unknown format version "
+                                  << version.toString() << "\n";
     }
   }
 
@@ -860,13 +834,12 @@ namespace orc {
     return createRowReader(defaultOpts);
   }
 
-  std::unique_ptr<RowReader> ReaderImpl::createRowReader(
-           const RowReaderOptions& opts) const {
+  std::unique_ptr<RowReader> ReaderImpl::createRowReader(const RowReaderOptions& opts) const {
     if (opts.getSearchArgument() && !isMetadataLoaded) {
       // load stripe statistics for PPD
       readMetadata();
     }
-    return std::unique_ptr<RowReader>(new RowReaderImpl(contents, opts));
+    return std::make_unique<RowReaderImpl>(contents, opts);
   }
 
   uint64_t maxStreamsForType(const proto::Type& type) {
@@ -895,8 +868,8 @@ namespace orc {
       case proto::Type_Kind_VARCHAR:
         return 4;
       default:
-          return 0;
-      }
+        return 0;
+    }
   }
 
   uint64_t ReaderImpl::getMemoryUse(int stripeIx) {
@@ -910,8 +883,8 @@ namespace orc {
     selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), false);
     ColumnSelector column_selector(contents.get());
     if (contents->schema->getKind() == STRUCT && include.begin() != include.end()) {
-      for(std::list<uint64_t>::const_iterator field = include.begin();
-          field != include.end(); ++field) {
+      for (std::list<uint64_t>::const_iterator field = include.begin(); field != include.end();
+           ++field) {
         column_selector.updateSelectedByFieldId(selectedColumns, *field);
       }
     } else {
@@ -919,7 +892,7 @@ namespace orc {
       std::fill(selectedColumns.begin(), selectedColumns.end(), true);
     }
     column_selector.selectParents(selectedColumns, *contents->schema.get());
-    selectedColumns[0] = true; // column 0 is selected by default
+    selectedColumns[0] = true;  // column 0 is selected by default
     return getMemoryUse(stripeIx, selectedColumns);
   }
 
@@ -928,8 +901,8 @@ namespace orc {
     selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), false);
     ColumnSelector column_selector(contents.get());
     if (contents->schema->getKind() == STRUCT && names.begin() != names.end()) {
-      for(std::list<std::string>::const_iterator field = names.begin();
-          field != names.end(); ++field) {
+      for (std::list<std::string>::const_iterator field = names.begin(); field != names.end();
+           ++field) {
         column_selector.updateSelectedByName(selectedColumns, *field);
       }
     } else {
@@ -937,7 +910,7 @@ namespace orc {
       std::fill(selectedColumns.begin(), selectedColumns.end(), true);
     }
     column_selector.selectParents(selectedColumns, *contents->schema.get());
-    selectedColumns[0] = true; // column 0 is selected by default
+    selectedColumns[0] = true;  // column 0 is selected by default
     return getMemoryUse(stripeIx, selectedColumns);
   }
 
@@ -946,8 +919,8 @@ namespace orc {
     selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), false);
     ColumnSelector column_selector(contents.get());
     if (include.begin() != include.end()) {
-      for(std::list<uint64_t>::const_iterator field = include.begin();
-          field != include.end(); ++field) {
+      for (std::list<uint64_t>::const_iterator field = include.begin(); field != include.end();
+           ++field) {
         column_selector.updateSelectedByTypeId(selectedColumns, *field);
       }
     } else {
@@ -955,7 +928,7 @@ namespace orc {
       std::fill(selectedColumns.begin(), selectedColumns.end(), true);
     }
     column_selector.selectParents(selectedColumns, *contents->schema.get());
-    selectedColumns[0] = true; // column 0 is selected by default
+    selectedColumns[0] = true;  // column 0 is selected by default
     return getMemoryUse(stripeIx, selectedColumns);
   }
 
@@ -963,13 +936,13 @@ namespace orc {
     uint64_t maxDataLength = 0;
 
     if (stripeIx >= 0 && stripeIx < footer->stripes_size()) {
-      uint64_t stripe = footer->stripes(stripeIx).datalength();
+      uint64_t stripe = footer->stripes(stripeIx).data_length();
       if (maxDataLength < stripe) {
         maxDataLength = stripe;
       }
     } else {
-      for (int i=0; i < footer->stripes_size(); i++) {
-        uint64_t stripe = footer->stripes(i).datalength();
+      for (int i = 0; i < footer->stripes_size(); i++) {
+        uint64_t stripe = footer->stripes(i).data_length();
         if (maxDataLength < stripe) {
           maxDataLength = stripe;
         }
@@ -978,10 +951,10 @@ namespace orc {
 
     bool hasStringColumn = false;
     uint64_t nSelectedStreams = 0;
-    for (int i=0; !hasStringColumn && i < footer->types_size(); i++) {
+    for (int i = 0; !hasStringColumn && i < footer->types_size(); i++) {
       if (selectedColumns[static_cast<size_t>(i)]) {
         const proto::Type& type = footer->types(i);
-        nSelectedStreams += maxStreamsForType(type) ;
+        nSelectedStreams += maxStreamsForType(type);
         switch (static_cast<int64_t>(type.kind())) {
           case proto::Type_Kind_CHAR:
           case proto::Type_Kind_STRING:
@@ -997,22 +970,23 @@ namespace orc {
       }
     }
 
-    /* If a string column is read, use stripe datalength as a memory estimate
+    /* If a string column is read, use stripe data_length as a memory estimate
      * because we don't know the dictionary size. Multiply by 2 because
      * a string column requires two buffers:
      * in the input stream and in the seekable input stream.
      * If no string column is read, estimate from the number of streams.
      */
-    uint64_t memory = hasStringColumn ? 2 * maxDataLength :
-        std::min(uint64_t(maxDataLength),
-                 nSelectedStreams * contents->stream->getNaturalReadSize());
+    uint64_t memory = hasStringColumn
+                          ? 2 * maxDataLength
+                          : std::min(uint64_t(maxDataLength),
+                                     nSelectedStreams * contents->stream->getNaturalReadSize());
 
     // Do we need even more memory to read the footer or the metadata?
-    if (memory < contents->postscript->footerlength() + DIRECTORY_SIZE_GUESS) {
-      memory =  contents->postscript->footerlength() + DIRECTORY_SIZE_GUESS;
+    if (memory < contents->postscript->footer_length() + DIRECTORY_SIZE_GUESS) {
+      memory = contents->postscript->footer_length() + DIRECTORY_SIZE_GUESS;
     }
-    if (memory < contents->postscript->metadatalength()) {
-      memory =  contents->postscript->metadatalength();
+    if (memory < contents->postscript->metadata_length()) {
+      memory = contents->postscript->metadata_length();
     }
 
     // Account for firstRowOfStripe.
@@ -1021,7 +995,7 @@ namespace orc {
     // Decompressors need buffers for each stream
     uint64_t decompressorMemory = 0;
     if (contents->compression != CompressionKind_NONE) {
-      for (int i=0; i < footer->types_size(); i++) {
+      for (int i = 0; i < footer->types_size(); i++) {
         if (selectedColumns[static_cast<size_t>(i)]) {
           const proto::Type& type = footer->types(i);
           decompressorMemory += maxStreamsForType(type) * contents->blockSize;
@@ -1032,7 +1006,7 @@ namespace orc {
       }
     }
 
-    return memory + decompressorMemory ;
+    return memory + decompressorMemory;
   }
 
   // Update fields to indicate we've reached the end of file
@@ -1045,17 +1019,17 @@ namespace orc {
       previousRow = 0;
     } else {
       previousRow = firstRowOfStripe[lastStripe - 1] +
-          footer->stripes(static_cast<int>(lastStripe - 1)).numberofrows();
+                    footer->stripes(static_cast<int>(lastStripe - 1)).number_of_rows();
     }
   }
 
   void RowReaderImpl::startNextStripe() {
-    reader.reset(); // ColumnReaders use lots of memory; free old memory first
+    reader.reset();  // ColumnReaders use lots of memory; free old memory first
     rowIndexes.clear();
     bloomFilterIndex.clear();
 
     // evaluate file statistics if it exists
-    if (sargsApplier && !sargsApplier->evaluateFileStatistics(*footer)) {
+    if (sargsApplier && !sargsApplier->evaluateFileStatistics(*footer, numRowGroupsInStripeRange)) {
       // skip the entire file
       markEndOfFile();
       return;
@@ -1064,25 +1038,32 @@ namespace orc {
     do {
       currentStripeInfo = footer->stripes(static_cast<int>(currentStripe));
       uint64_t fileLength = contents->stream->getLength();
-      if (currentStripeInfo.offset() + currentStripeInfo.indexlength() +
-        currentStripeInfo.datalength() + currentStripeInfo.footerlength() >= fileLength) {
+      if (currentStripeInfo.offset() + currentStripeInfo.index_length() +
+              currentStripeInfo.data_length() + currentStripeInfo.footer_length() >=
+          fileLength) {
         std::stringstream msg;
-        msg << "Malformed StripeInformation at stripe index " << currentStripe << ": fileLength="
-            << fileLength << ", StripeInfo=(offset=" << currentStripeInfo.offset() << ", indexLength="
-            << currentStripeInfo.indexlength() << ", dataLength=" << currentStripeInfo.datalength()
-            << ", footerLength=" << currentStripeInfo.footerlength() << ")";
+        msg << "Malformed StripeInformation at stripe index " << currentStripe
+            << ": fileLength=" << fileLength
+            << ", StripeInfo=(offset=" << currentStripeInfo.offset()
+            << ", indexLength=" << currentStripeInfo.index_length()
+            << ", dataLength=" << currentStripeInfo.data_length()
+            << ", footerLength=" << currentStripeInfo.footer_length() << ")";
         throw ParseError(msg.str());
       }
       currentStripeFooter = getStripeFooter(currentStripeInfo, *contents.get());
-      rowsInCurrentStripe = currentStripeInfo.numberofrows();
+      rowsInCurrentStripe = currentStripeInfo.number_of_rows();
+      processingStripe = currentStripe;
 
       if (sargsApplier) {
         bool isStripeNeeded = true;
         if (contents->metadata) {
           const auto& currentStripeStats =
-            contents->metadata->stripestats(static_cast<int>(currentStripe));
+              contents->metadata->stripe_stats(static_cast<int>(currentStripe));
           // skip this stripe after stats fail to satisfy sargs
-          isStripeNeeded = sargsApplier->evaluateStripeStatistics(currentStripeStats);
+          uint64_t stripeRowGroupCount =
+              (rowsInCurrentStripe + footer->row_index_stride() - 1) / footer->row_index_stride();
+          isStripeNeeded =
+              sargsApplier->evaluateStripeStatistics(currentStripeStats, stripeRowGroupCount);
         }
 
         if (isStripeNeeded) {
@@ -1090,9 +1071,7 @@ namespace orc {
           loadStripeIndex();
 
           // select row groups to read in the current stripe
-          sargsApplier->pickRowGroups(rowsInCurrentStripe,
-                                      rowIndexes,
-                                      bloomFilterIndex);
+          sargsApplier->pickRowGroups(rowsInCurrentStripe, rowIndexes, bloomFilterIndex);
           if (sargsApplier->hasSelectedFrom(currentRowInStripe)) {
             // current stripe has at least one row group matching the predicate
             break;
@@ -1110,26 +1089,23 @@ namespace orc {
     if (currentStripe < lastStripe) {
       // get writer timezone info from stripe footer to help understand timestamp values.
       const Timezone& writerTimezone =
-        currentStripeFooter.has_writertimezone() ?
-          getTimezoneByName(currentStripeFooter.writertimezone()) :
-          localTimezone;
-      StripeStreamsImpl stripeStreams(*this, currentStripe, currentStripeInfo,
-                                      currentStripeFooter,
-                                      currentStripeInfo.offset(),
-                                      *contents->stream,
-                                      writerTimezone,
+          currentStripeFooter.has_writer_timezone()
+              ? getTimezoneByName(currentStripeFooter.writer_timezone())
+              : localTimezone;
+      StripeStreamsImpl stripeStreams(*this, currentStripe, currentStripeInfo, currentStripeFooter,
+                                      currentStripeInfo.offset(), *contents->stream, writerTimezone,
                                       readerTimezone);
-      reader = buildReader(*contents->schema, stripeStreams);
+      reader = buildReader(*contents->schema, stripeStreams, useTightNumericVector,
+                           throwOnSchemaEvolutionOverflow, /*convertToReadType=*/true);
 
       if (sargsApplier) {
         // move to the 1st selected row group when PPD is enabled.
-        currentRowInStripe = advanceToNextRowGroup(currentRowInStripe,
-                                                   rowsInCurrentStripe,
-                                                   footer->rowindexstride(),
-                                                   sargsApplier->getNextSkippedRows());
+        currentRowInStripe =
+            advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe,
+                                  footer->row_index_stride(), sargsApplier->getNextSkippedRows());
         previousRow = firstRowOfStripe[currentStripe] + currentRowInStripe - 1;
         if (currentRowInStripe > 0) {
-          seekToRowGroup(static_cast<uint32_t>(currentRowInStripe / footer->rowindexstride()));
+          seekToRowGroup(static_cast<uint32_t>(currentRowInStripe / footer->row_index_stride()));
         }
       }
     } else {
@@ -1139,6 +1115,7 @@ namespace orc {
   }
 
   bool RowReaderImpl::next(ColumnVectorBatch& data) {
+    SCOPED_STOPWATCH(contents->readerMetrics, ReaderInclusiveLatencyUs, ReaderCall);
     if (currentStripe >= lastStripe) {
       data.numElements = 0;
       markEndOfFile();
@@ -1148,14 +1125,10 @@ namespace orc {
       startNextStripe();
     }
     uint64_t rowsToRead =
-      std::min(static_cast<uint64_t>(data.capacity),
-               rowsInCurrentStripe - currentRowInStripe);
+        std::min(static_cast<uint64_t>(data.capacity), rowsInCurrentStripe - currentRowInStripe);
     if (sargsApplier && rowsToRead > 0) {
-      rowsToRead = computeBatchSize(rowsToRead,
-                                    currentRowInStripe,
-                                    rowsInCurrentStripe,
-                                    footer->rowindexstride(),
-                                    sargsApplier->getNextSkippedRows());
+      rowsToRead = computeBatchSize(rowsToRead, currentRowInStripe, rowsInCurrentStripe,
+                                    footer->row_index_stride(), sargsApplier->getNextSkippedRows());
     }
     data.numElements = rowsToRead;
     if (rowsToRead == 0) {
@@ -1164,8 +1137,7 @@ namespace orc {
     }
     if (enableEncodedBlock) {
       reader->nextEncoded(data, rowsToRead, nullptr);
-    }
-    else {
+    } else {
       reader->next(data, rowsToRead, nullptr);
     }
     // update row number
@@ -1174,15 +1146,14 @@ namespace orc {
 
     // check if we need to advance to next selected row group
     if (sargsApplier) {
-      uint64_t nextRowToRead = advanceToNextRowGroup(currentRowInStripe,
-                                                     rowsInCurrentStripe,
-                                                     footer->rowindexstride(),
-                                                     sargsApplier->getNextSkippedRows());
+      uint64_t nextRowToRead =
+          advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe, footer->row_index_stride(),
+                                sargsApplier->getNextSkippedRows());
       if (currentRowInStripe != nextRowToRead) {
         // it is guaranteed to be at start of a row group
         currentRowInStripe = nextRowToRead;
         if (currentRowInStripe < rowsInCurrentStripe) {
-          seekToRowGroup(static_cast<uint32_t>(currentRowInStripe / footer->rowindexstride()));
+          seekToRowGroup(static_cast<uint32_t>(currentRowInStripe / footer->row_index_stride()));
         }
       }
     }
@@ -1194,10 +1165,8 @@ namespace orc {
     return rowsToRead != 0;
   }
 
-  uint64_t RowReaderImpl::computeBatchSize(uint64_t requestedSize,
-                                           uint64_t currentRowInStripe,
-                                           uint64_t rowsInCurrentStripe,
-                                           uint64_t rowIndexStride,
+  uint64_t RowReaderImpl::computeBatchSize(uint64_t requestedSize, uint64_t currentRowInStripe,
+                                           uint64_t rowsInCurrentStripe, uint64_t rowIndexStride,
                                            const std::vector<uint64_t>& nextSkippedRows) {
     // In case of PPD, batch size should be aware of row group boundaries. If only a subset of row
     // groups are selected then marker position is set to the end of range (subset of row groups
@@ -1240,18 +1209,39 @@ namespace orc {
     return rowsInCurrentStripe;
   }
 
-  std::unique_ptr<ColumnVectorBatch> RowReaderImpl::createRowBatch
-                                              (uint64_t capacity) const {
-    return getSelectedType().createRowBatch(capacity, *contents->pool, enableEncodedBlock);
+  static void getColumnIds(const Type* type, std::set<uint64_t>& columnIds) {
+    columnIds.insert(type->getColumnId());
+    for (uint64_t i = 0; i < type->getSubtypeCount(); ++i) {
+      getColumnIds(type->getSubtype(i), columnIds);
+    }
   }
 
-  void ensureOrcFooter(InputStream* stream,
-                       DataBuffer<char> *buffer,
-                       uint64_t postscriptLength) {
+  std::unique_ptr<ColumnVectorBatch> RowReaderImpl::createRowBatch(uint64_t capacity) const {
+    // If the read type is specified, then check that the selected schema matches the read type
+    // on the first call to createRowBatch.
+    if (schemaEvolution.getReadType() && selectedSchema.get() == nullptr) {
+      auto fileSchema = &getSelectedType();
+      auto readType = schemaEvolution.getReadType();
+      std::set<uint64_t> readColumns, fileColumns;
+      getColumnIds(readType, readColumns);
+      getColumnIds(fileSchema, fileColumns);
+      if (readColumns != fileColumns) {
+        std::ostringstream ss;
+        ss << "The selected schema " << fileSchema->toString() << " doesn't match read type "
+           << readType->toString();
+        throw SchemaEvolutionError(ss.str());
+      }
+    }
+    const Type& readType =
+        schemaEvolution.getReadType() ? *schemaEvolution.getReadType() : getSelectedType();
+    return readType.createRowBatch(capacity, *contents->pool, enableEncodedBlock,
+                                   useTightNumericVector);
+  }
 
+  void ensureOrcFooter(InputStream* stream, DataBuffer<char>* buffer, uint64_t postscriptLength) {
     const std::string MAGIC("ORC");
     const uint64_t magicLength = MAGIC.length();
-    const char * const bufferStart = buffer->data();
+    const char* const bufferStart = buffer->data();
     const uint64_t bufferLength = buffer->size();
 
     if (postscriptLength < magicLength || bufferLength < magicLength) {
@@ -1263,7 +1253,7 @@ namespace orc {
     if (memcmp(magicStart, MAGIC.c_str(), magicLength) != 0) {
       // If there is no magic string at the end, check the beginning.
       // Only files written by Hive 0.11.0 don't have the tail ORC string.
-      std::unique_ptr<char[]> frontBuffer( new char[magicLength] );
+      std::unique_ptr<char[]> frontBuffer(new char[magicLength]);
       stream->read(frontBuffer.get(), magicLength, 0);
       bool foundMatch = memcmp(frontBuffer.get(), MAGIC.c_str(), magicLength) == 0;
 
@@ -1279,28 +1269,25 @@ namespace orc {
    * @param buffer the buffer with the tail of the file.
    * @param postscriptSize the length of postscript in bytes
    */
-  std::unique_ptr<proto::PostScript> readPostscript(InputStream *stream,
-                                                    DataBuffer<char> *buffer,
+  std::unique_ptr<proto::PostScript> readPostscript(InputStream* stream, DataBuffer<char>* buffer,
                                                     uint64_t postscriptSize) {
-    char *ptr = buffer->data();
+    char* ptr = buffer->data();
     uint64_t readSize = buffer->size();
 
     ensureOrcFooter(stream, buffer, postscriptSize);
 
-    std::unique_ptr<proto::PostScript> postscript =
-      std::unique_ptr<proto::PostScript>(new proto::PostScript());
+    auto postscript = std::make_unique<proto::PostScript>();
     if (readSize < 1 + postscriptSize) {
       std::stringstream msg;
-      msg << "Invalid ORC postscript length: " << postscriptSize << ", file length = "
-          << stream->getLength();
+      msg << "Invalid ORC postscript length: " << postscriptSize
+          << ", file length = " << stream->getLength();
       throw ParseError(msg.str());
     }
     if (!postscript->ParseFromArray(ptr + readSize - 1 - postscriptSize,
-                                   static_cast<int>(postscriptSize))) {
-      throw ParseError("Failed to parse the postscript from " +
-                       stream->getName());
+                                    static_cast<int>(postscriptSize))) {
+      throw ParseError("Failed to parse the postscript from " + stream->getName());
     }
-    return REDUNDANT_MOVE(postscript);
+    return postscript;
   }
 
   /**
@@ -1308,7 +1295,7 @@ namespace orc {
    * so we won't crash when we convert the proto::Types to TypeImpls (ORC-317).
    * For STRUCT types, fieldName size should match subTypes size (ORC-581).
    */
-  void checkProtoTypes(const proto::Footer &footer) {
+  void checkProtoTypes(const proto::Footer& footer) {
     std::stringstream msg;
     int maxId = footer.types_size();
     if (maxId <= 0) {
@@ -1316,17 +1303,16 @@ namespace orc {
     }
     for (int i = 0; i < maxId; ++i) {
       const proto::Type& type = footer.types(i);
-      if (type.kind() == proto::Type_Kind_STRUCT
-         && type.subtypes_size() != type.fieldnames_size()) {
+      if (type.kind() == proto::Type_Kind_STRUCT &&
+          type.subtypes_size() != type.field_names_size()) {
         msg << "Footer is corrupt: STRUCT type " << i << " has " << type.subtypes_size()
-            << " subTypes, but has " << type.fieldnames_size() << " fieldNames";
+            << " subTypes, but has " << type.field_names_size() << " fieldNames";
         throw ParseError(msg.str());
       }
       for (int j = 0; j < type.subtypes_size(); ++j) {
         int subTypeId = static_cast<int>(type.subtypes(j));
         if (subTypeId <= i) {
-          msg << "Footer is corrupt: malformed link from type " << i << " to "
-              << subTypeId;
+          msg << "Footer is corrupt: malformed link from type " << i << " to " << subTypeId;
           throw ParseError(msg.str());
         }
         if (subTypeId >= maxId) {
@@ -1334,9 +1320,8 @@ namespace orc {
           throw ParseError(msg.str());
         }
         if (j > 0 && static_cast<int>(type.subtypes(j - 1)) >= subTypeId) {
-          msg << "Footer is corrupt: subType(" << (j-1) << ") >= subType(" << j
-              << ") in types(" << i << "). (" << type.subtypes(j - 1) << " >= "
-              << subTypeId << ")";
+          msg << "Footer is corrupt: subType(" << (j - 1) << ") >= subType(" << j << ") in types("
+              << i << "). (" << type.subtypes(j - 1) << " >= " << subTypeId << ")";
           throw ParseError(msg.str());
         }
       }
@@ -1351,37 +1336,31 @@ namespace orc {
    * @param ps the file's postscript
    * @param memoryPool the memory pool to use
    */
-  std::unique_ptr<proto::Footer> readFooter(InputStream* stream,
-                                            const DataBuffer<char> *buffer,
-                                            uint64_t footerOffset,
-                                            const proto::PostScript& ps,
-                                            MemoryPool& memoryPool) {
-    const char *footerPtr = buffer->data() + footerOffset;
-
-    std::unique_ptr<SeekableInputStream> pbStream =
-      createDecompressor(convertCompressionKind(ps),
-                         std::unique_ptr<SeekableInputStream>
-                         (new SeekableArrayInputStream(footerPtr,
-                                                       ps.footerlength())),
-                         getCompressionBlockSize(ps),
-                         memoryPool);
-
-    std::unique_ptr<proto::Footer> footer =
-      std::unique_ptr<proto::Footer>(new proto::Footer());
+  std::unique_ptr<proto::Footer> readFooter(InputStream* stream, const DataBuffer<char>* buffer,
+                                            uint64_t footerOffset, const proto::PostScript& ps,
+                                            MemoryPool& memoryPool, ReaderMetrics* readerMetrics) {
+    const char* footerPtr = buffer->data() + footerOffset;
+
+    std::unique_ptr<SeekableInputStream> pbStream = createDecompressor(
+        convertCompressionKind(ps),
+        std::make_unique<SeekableArrayInputStream>(footerPtr, ps.footer_length()),
+        getCompressionBlockSize(ps), memoryPool, readerMetrics);
+
+    auto footer = std::make_unique<proto::Footer>();
     if (!footer->ParseFromZeroCopyStream(pbStream.get())) {
-      throw ParseError("Failed to parse the footer from " +
-                       stream->getName());
+      throw ParseError("Failed to parse the footer from " + stream->getName());
     }
 
     checkProtoTypes(*footer);
-    return REDUNDANT_MOVE(footer);
+    return footer;
   }
 
   std::unique_ptr<Reader> createReader(std::unique_ptr<InputStream> stream,
                                        const ReaderOptions& options) {
-    std::shared_ptr<FileContents> contents = std::shared_ptr<FileContents>(new FileContents());
+    auto contents = std::make_shared<FileContents>();
     contents->pool = options.getMemoryPool();
     contents->errorStream = options.getErrorStream();
+    contents->readerMetrics = options.getReaderMetrics();
     std::string serializedFooter = options.getSerializedFileTail();
     uint64_t fileLength;
     uint64_t postscriptLength;
@@ -1391,27 +1370,25 @@ namespace orc {
       if (!tail.ParseFromString(TString(serializedFooter))) {
         throw ParseError("Failed to parse the file tail from string");
       }
-      contents->postscript.reset(new proto::PostScript(tail.postscript()));
-      contents->footer.reset(new proto::Footer(tail.footer()));
-      fileLength = tail.filelength();
-      postscriptLength = tail.postscriptlength();
+      contents->postscript = std::make_unique<proto::PostScript>(tail.postscript());
+      contents->footer = std::make_unique<proto::Footer>(tail.footer());
+      fileLength = tail.file_length();
+      postscriptLength = tail.postscript_length();
     } else {
       // figure out the size of the file using the option or filesystem
-      fileLength = std::min(options.getTailLocation(),
-                            static_cast<uint64_t>(stream->getLength()));
+      fileLength = std::min(options.getTailLocation(), static_cast<uint64_t>(stream->getLength()));
 
-      //read last bytes into buffer to get PostScript
+      // read last bytes into buffer to get PostScript
       uint64_t readSize = std::min(fileLength, DIRECTORY_SIZE_GUESS);
       if (readSize < 4) {
         throw ParseError("File size too small");
       }
-      std::unique_ptr<DataBuffer<char>> buffer( new DataBuffer<char>(*contents->pool, readSize) );
+      auto buffer = std::make_unique<DataBuffer<char>>(*contents->pool, readSize);
       stream->read(buffer->data(), readSize, fileLength - readSize);
 
       postscriptLength = buffer->data()[readSize - 1] & 0xff;
-      contents->postscript = REDUNDANT_MOVE(readPostscript(stream.get(),
-        buffer.get(), postscriptLength));
-      uint64_t footerSize = contents->postscript->footerlength();
+      contents->postscript = readPostscript(stream.get(), buffer.get(), postscriptLength);
+      uint64_t footerSize = contents->postscript->footer_length();
       uint64_t tailSize = 1 + postscriptLength + footerSize;
       if (tailSize >= fileLength) {
         std::stringstream msg;
@@ -1428,8 +1405,8 @@ namespace orc {
         footerOffset = readSize - tailSize;
       }
 
-      contents->footer = REDUNDANT_MOVE(readFooter(stream.get(), buffer.get(),
-        footerOffset, *contents->postscript,  *contents->pool));
+      contents->footer = readFooter(stream.get(), buffer.get(), footerOffset, *contents->postscript,
+                                    *contents->pool, contents->readerMetrics);
     }
     contents->isDecimalAsLong = false;
     if (contents->postscript->version_size() == 2) {
@@ -1439,27 +1416,23 @@ namespace orc {
       }
     }
     contents->stream = std::move(stream);
-    return std::unique_ptr<Reader>(new ReaderImpl(std::move(contents),
-                                                  options,
-                                                  fileLength,
-                                                  postscriptLength));
+    return std::make_unique<ReaderImpl>(std::move(contents), options, fileLength, postscriptLength);
   }
 
-  std::map<uint32_t, BloomFilterIndex>
-  ReaderImpl::getBloomFilters(uint32_t stripeIndex,
-                              const std::set<uint32_t>& included) const {
+  std::map<uint32_t, BloomFilterIndex> ReaderImpl::getBloomFilters(
+      uint32_t stripeIndex, const std::set<uint32_t>& included) const {
     std::map<uint32_t, BloomFilterIndex> ret;
 
     // find stripe info
     if (stripeIndex >= static_cast<uint32_t>(footer->stripes_size())) {
-      throw std::logic_error("Illegal stripe index: " + to_string(static_cast<int64_t>(stripeIndex)));
+      throw std::logic_error("Illegal stripe index: " +
+                             to_string(static_cast<int64_t>(stripeIndex)));
     }
     const proto::StripeInformation currentStripeInfo =
-      footer->stripes(static_cast<int>(stripeIndex));
-    const proto::StripeFooter currentStripeFooter =
-      getStripeFooter(currentStripeInfo, *contents);
+        footer->stripes(static_cast<int>(stripeIndex));
+    const proto::StripeFooter currentStripeFooter = getStripeFooter(currentStripeInfo, *contents);
 
-    // iterate stripe footer to get stream of bloomfilter
+    // iterate stripe footer to get stream of bloom_filter
     uint64_t offset = static_cast<uint64_t>(currentStripeInfo.offset());
     for (int i = 0; i < currentStripeFooter.streams_size(); i++) {
       const proto::Stream& stream = currentStripeFooter.streams(i);
@@ -1469,16 +1442,11 @@ namespace orc {
       // a bloom filter stream from a selected column is found
       if (stream.kind() == proto::Stream_Kind_BLOOM_FILTER_UTF8 &&
           (included.empty() || included.find(column) != included.end())) {
-
         std::unique_ptr<SeekableInputStream> pbStream =
-          createDecompressor(contents->compression,
-                             std::unique_ptr<SeekableInputStream>
-                               (new SeekableFileInputStream(contents->stream.get(),
-                                                            offset,
-                                                            length,
-                                                            *contents->pool)),
-                             contents->blockSize,
-                             *(contents->pool));
+            createDecompressor(contents->compression,
+                               std::make_unique<SeekableFileInputStream>(
+                                   contents->stream.get(), offset, length, *contents->pool),
+                               contents->blockSize, *(contents->pool), contents->readerMetrics);
 
         proto::BloomFilterIndex pbBFIndex;
         if (!pbBFIndex.ParseFromZeroCopyStream(pbStream.get())) {
@@ -1486,11 +1454,10 @@ namespace orc {
         }
 
         BloomFilterIndex bfIndex;
-        for (int j = 0; j < pbBFIndex.bloomfilter_size(); j++) {
-	  std::unique_ptr<BloomFilter> entry = BloomFilterUTF8Utils::deserialize(
-            stream.kind(),
-            currentStripeFooter.columns(static_cast<int>(stream.column())),
-            pbBFIndex.bloomfilter(j));
+        for (int j = 0; j < pbBFIndex.bloom_filter_size(); j++) {
+          std::unique_ptr<BloomFilter> entry = BloomFilterUTF8Utils::deserialize(
+              stream.kind(), currentStripeFooter.columns(static_cast<int>(stream.column())),
+              pbBFIndex.bloom_filter(j));
           bfIndex.entries.push_back(std::shared_ptr<BloomFilter>(std::move(entry)));
         }
 
@@ -1512,10 +1479,8 @@ namespace orc {
     // PASS
   }
 
-  InputStream::~InputStream() {
-    // PASS
+  InputStream::~InputStream(){
+      // PASS
   };
 
-
-
-}// namespace
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/Reader.hh b/contrib/libs/apache/orc/c++/src/Reader.hh
index ffaff4176e..a1367e4bd3 100644
--- a/contrib/libs/apache/orc/c++/src/Reader.hh
+++ b/contrib/libs/apache/orc/c++/src/Reader.hh
@@ -26,20 +26,22 @@
 
 #include "ColumnReader.hh"
 #include "RLE.hh"
-#include "sargs/SargsApplier.hh"
+#include "SchemaEvolution.hh"
 #include "TypeImpl.hh"
+#include "sargs/SargsApplier.hh"
 
 namespace orc {
 
   static const uint64_t DIRECTORY_SIZE_GUESS = 16 * 1024;
 
   /**
-  * WriterVersion Implementation
-  */
+   * WriterVersion Implementation
+   */
   class WriterVersionImpl {
-  private:
+   private:
     WriterVersion version;
-  public:
+
+   public:
     // Known Versions with issues resolved
     // The static method below is to fix global constructors Clang warning
     static const WriterVersionImpl& VERSION_HIVE_8732();
@@ -52,8 +54,8 @@ namespace orc {
   };
 
   /**
-  * State shared between Reader and Row Reader
-  */
+   * State shared between Reader and Row Reader
+   */
   struct FileContents {
     std::unique_ptr<InputStream> stream;
     std::unique_ptr<proto::PostScript> postscript;
@@ -61,12 +63,13 @@ namespace orc {
     std::unique_ptr<Type> schema;
     uint64_t blockSize;
     CompressionKind compression;
-    MemoryPool *pool;
-    std::ostream *errorStream;
+    MemoryPool* pool;
+    std::ostream* errorStream;
     /// Decimal64 in ORCv2 uses RLE to store values. This flag indicates whether
     /// this new encoding is used.
     bool isDecimalAsLong;
     std::unique_ptr<proto::Metadata> metadata;
+    ReaderMetrics* readerMetrics;
   };
 
   proto::StripeFooter getStripeFooter(const proto::StripeInformation& info,
@@ -109,10 +112,10 @@ namespace orc {
     // is selected.
     bool selectParents(std::vector<bool>& selectedColumns, const Type& type);
 
-   /**
-    * Constructor that selects columns.
-    * @param contents of the file
-    */
+    /**
+     * Constructor that selects columns.
+     * @param contents of the file
+     */
     ColumnSelector(const FileContents* contents);
 
     // Select the columns from the RowReaderoptions object
@@ -122,9 +125,8 @@ namespace orc {
     void updateSelected(std::vector<bool>& selectedColumns, const ReaderOptions& options);
   };
 
-
   class RowReaderImpl : public RowReader {
-  private:
+   private:
     const Timezone& localTimezone;
 
     // contents
@@ -145,14 +147,19 @@ namespace orc {
     uint64_t previousRow;
     uint64_t firstStripe;
     uint64_t currentStripe;
-    uint64_t lastStripe; // the stripe AFTER the last one
+    uint64_t lastStripe;  // the stripe AFTER the last one
+    uint64_t processingStripe;
     uint64_t currentRowInStripe;
     uint64_t rowsInCurrentStripe;
+    // number of row groups between first stripe and last stripe
+    uint64_t numRowGroupsInStripeRange;
     proto::StripeInformation currentStripeInfo;
     proto::StripeFooter currentStripeFooter;
     std::unique_ptr<ColumnReader> reader;
 
     bool enableEncodedBlock;
+    bool useTightNumericVector;
+    bool throwOnSchemaEvolutionOverflow;
     // internal methods
     void startNextStripe();
     inline void markEndOfFile();
@@ -166,27 +173,32 @@ namespace orc {
     // desired timezone to return data of timestamp types.
     const Timezone& readerTimezone;
 
+    // match read and file types
+    SchemaEvolution schemaEvolution;
+
     // load stripe index if not done so
     void loadStripeIndex();
 
     // In case of PPD, batch size should be aware of row group boundaries.
     // If only a subset of row groups are selected then the next read should
     // stop at the end of selected range.
-    static uint64_t computeBatchSize(uint64_t requestedSize,
-                                     uint64_t currentRowInStripe,
-                                     uint64_t rowsInCurrentStripe,
-                                     uint64_t rowIndexStride,
+    static uint64_t computeBatchSize(uint64_t requestedSize, uint64_t currentRowInStripe,
+                                     uint64_t rowsInCurrentStripe, uint64_t rowIndexStride,
                                      const std::vector<uint64_t>& nextSkippedRows);
 
     // Skip non-selected rows
-    static uint64_t advanceToNextRowGroup(uint64_t currentRowInStripe,
-                                          uint64_t rowsInCurrentStripe,
+    static uint64_t advanceToNextRowGroup(uint64_t currentRowInStripe, uint64_t rowsInCurrentStripe,
                                           uint64_t rowIndexStride,
                                           const std::vector<uint64_t>& nextSkippedRows);
 
     friend class TestRowReader_advanceToNextRowGroup_Test;
     friend class TestRowReader_computeBatchSize_Test;
 
+    // whether the current stripe is initialized
+    inline bool isCurrentStripeInited() const {
+      return currentStripe == processingStripe;
+    }
+
     /**
      * Seek to the start of a row group in the current stripe
      * @param rowGroupEntryId the row group id to seek to
@@ -200,22 +212,20 @@ namespace orc {
      */
     bool hasBadBloomFilters();
 
-  public:
-   /**
-    * Constructor that lets the user specify additional options.
-    * @param contents of the file
-    * @param options options for reading
-    */
-    RowReaderImpl(std::shared_ptr<FileContents> contents,
-                  const RowReaderOptions& options);
+   public:
+    /**
+     * Constructor that lets the user specify additional options.
+     * @param contents of the file
+     * @param options options for reading
+     */
+    RowReaderImpl(std::shared_ptr<FileContents> contents, const RowReaderOptions& options);
 
     // Select the columns from the options object
     const std::vector<bool> getSelectedColumns() const override;
 
     const Type& getSelectedType() const override;
 
-    std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size
-                                                      ) const override;
+    std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size) const override;
 
     bool next(ColumnVectorBatch& data) override;
 
@@ -231,6 +241,10 @@ namespace orc {
     bool getThrowOnHive11DecimalOverflow() const;
     bool getIsDecimalAsLong() const;
     int32_t getForcedScaleOnHive11Decimal() const;
+
+    const SchemaEvolution* getSchemaEvolution() const {
+      return &schemaEvolution;
+    }
   };
 
   class ReaderImpl : public Reader {
@@ -251,12 +265,14 @@ namespace orc {
     // internal methods
     void readMetadata() const;
     void checkOrcVersion();
-    void getRowIndexStatistics(const proto::StripeInformation& stripeInfo, uint64_t stripeIndex,
-                               const proto::StripeFooter& currentStripeFooter,
-                               std::vector<std::vector<proto::ColumnStatistics> >* indexStats) const;
+    void getRowIndexStatistics(
+        const proto::StripeInformation& stripeInfo, uint64_t stripeIndex,
+        const proto::StripeFooter& currentStripeFooter,
+        std::vector<std::vector<proto::ColumnStatistics> >* indexStats) const;
 
     // metadata
     mutable bool isMetadataLoaded;
+
    public:
     /**
      * Constructor that lets the user specify additional options.
@@ -265,10 +281,8 @@ namespace orc {
      * @param fileLength the length of the file in bytes
      * @param postscriptLength the length of the postscript in bytes
      */
-    ReaderImpl(std::shared_ptr<FileContents> contents,
-               const ReaderOptions& options,
-               uint64_t fileLength,
-               uint64_t postscriptLength);
+    ReaderImpl(std::shared_ptr<FileContents> contents, const ReaderOptions& options,
+               uint64_t fileLength, uint64_t postscriptLength);
 
     const ReaderOptions& getReaderOptions() const;
 
@@ -298,20 +312,17 @@ namespace orc {
 
     uint64_t getNumberOfStripes() const override;
 
-    std::unique_ptr<StripeInformation> getStripe(uint64_t
-                                                 ) const override;
+    std::unique_ptr<StripeInformation> getStripe(uint64_t) const override;
 
     uint64_t getNumberOfStripeStatistics() const override;
 
     const std::string& getStreamName() const override;
 
-    std::unique_ptr<StripeStatistics>
-    getStripeStatistics(uint64_t stripeIndex) const override;
+    std::unique_ptr<StripeStatistics> getStripeStatistics(uint64_t stripeIndex) const override;
 
     std::unique_ptr<RowReader> createRowReader() const override;
 
-    std::unique_ptr<RowReader> createRowReader(const RowReaderOptions& options
-                                               ) const override;
+    std::unique_ptr<RowReader> createRowReader(const RowReaderOptions& options) const override;
 
     uint64_t getContentLength() const override;
     uint64_t getStripeStatisticsLength() const override;
@@ -321,8 +332,7 @@ namespace orc {
 
     std::unique_ptr<Statistics> getStatistics() const override;
 
-    std::unique_ptr<ColumnStatistics> getColumnStatistics(uint32_t columnId
-                                                          ) const override;
+    std::unique_ptr<ColumnStatistics> getColumnStatistics(uint32_t columnId) const override;
 
     std::string getSerializedFileTail() const override;
 
@@ -330,28 +340,41 @@ namespace orc {
 
     bool hasCorrectStatistics() const override;
 
-    const proto::PostScript* getPostscript() const {return contents->postscript.get();}
+    const ReaderMetrics* getReaderMetrics() const override {
+      return contents->readerMetrics;
+    }
 
-    uint64_t getBlockSize() const {return contents->blockSize;}
+    const proto::PostScript* getPostscript() const {
+      return contents->postscript.get();
+    }
 
-    const proto::Footer* getFooter() const {return contents->footer.get();}
+    uint64_t getBlockSize() const {
+      return contents->blockSize;
+    }
 
-    const Type* getSchema() const {return contents->schema.get();}
+    const proto::Footer* getFooter() const {
+      return contents->footer.get();
+    }
 
-    InputStream* getStream() const {return contents->stream.get();}
+    const Type* getSchema() const {
+      return contents->schema.get();
+    }
+
+    InputStream* getStream() const {
+      return contents->stream.get();
+    }
 
     uint64_t getMemoryUse(int stripeIx = -1) override;
 
-    uint64_t getMemoryUseByFieldId(const std::list<uint64_t>& include, int stripeIx=-1) override;
+    uint64_t getMemoryUseByFieldId(const std::list<uint64_t>& include, int stripeIx = -1) override;
 
-    uint64_t getMemoryUseByName(const std::list<std::string>& names, int stripeIx=-1) override;
+    uint64_t getMemoryUseByName(const std::list<std::string>& names, int stripeIx = -1) override;
 
-    uint64_t getMemoryUseByTypeId(const std::list<uint64_t>& include, int stripeIx=-1) override;
+    uint64_t getMemoryUseByTypeId(const std::list<uint64_t>& include, int stripeIx = -1) override;
 
-    std::map<uint32_t, BloomFilterIndex>
-    getBloomFilters(uint32_t stripeIndex, const std::set<uint32_t>& included) const override;
+    std::map<uint32_t, BloomFilterIndex> getBloomFilters(
+        uint32_t stripeIndex, const std::set<uint32_t>& included) const override;
   };
-
-}// namespace
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/src/RleDecoderV2.cc b/contrib/libs/apache/orc/c++/src/RleDecoderV2.cc
index 8ab57b1f6e..ae05a70a36 100644
--- a/contrib/libs/apache/orc/c++/src/RleDecoderV2.cc
+++ b/contrib/libs/apache/orc/c++/src/RleDecoderV2.cc
@@ -17,731 +17,439 @@
  */
 
 #include "Adaptor.hh"
+#include "BpackingDefault.hh"
+#if defined(ORC_HAVE_RUNTIME_AVX512)
+#error #include "BpackingAvx512.hh"
+#endif
 #include "Compression.hh"
-#include "RLEv2.hh"
+#include "Dispatch.hh"
 #include "RLEV2Util.hh"
+#include "RLEv2.hh"
+#include "Utils.hh"
 
 namespace orc {
 
-unsigned char RleDecoderV2::readByte() {
-  if (bufferStart == bufferEnd) {
-    int bufferLength;
-    const void* bufferPointer;
-    if (!inputStream->Next(&bufferPointer, &bufferLength)) {
-      throw ParseError("bad read in RleDecoderV2::readByte");
+  unsigned char RleDecoderV2::readByte() {
+    SCOPED_MINUS_STOPWATCH(metrics, DecodingLatencyUs);
+    if (bufferStart == bufferEnd) {
+      int bufferLength;
+      const void* bufferPointer;
+      if (!inputStream->Next(&bufferPointer, &bufferLength)) {
+        throw ParseError("bad read in RleDecoderV2::readByte");
+      }
+      bufferStart = const_cast<char*>(static_cast<const char*>(bufferPointer));
+      bufferEnd = bufferStart + bufferLength;
     }
-    bufferStart = static_cast<const char*>(bufferPointer);
-    bufferEnd = bufferStart + bufferLength;
-  }
 
-  unsigned char result = static_cast<unsigned char>(*bufferStart++);
-  return result;
-}
-
-int64_t RleDecoderV2::readLongBE(uint64_t bsz) {
-  int64_t ret = 0, val;
-  uint64_t n = bsz;
-  while (n > 0) {
-    n--;
-    val = readByte();
-    ret |= (val << (n * 8));
-  }
-  return ret;
-}
-
-inline int64_t RleDecoderV2::readVslong() {
-  return unZigZag(readVulong());
-}
-
-uint64_t RleDecoderV2::readVulong() {
-  uint64_t ret = 0, b;
-  uint64_t offset = 0;
-  do {
-    b = readByte();
-    ret |= (0x7f & b) << offset;
-    offset += 7;
-  } while (b >= 0x80);
-  return ret;
-}
-
-void RleDecoderV2::readLongs(int64_t *data, uint64_t offset, uint64_t len, uint64_t fbs) {
-  switch (fbs) {
-    case 4:
-      unrolledUnpack4(data, offset, len);
-      return;
-    case 8:
-      unrolledUnpack8(data, offset, len);
-      return;
-    case 16:
-      unrolledUnpack16(data, offset, len);
-      return;
-    case 24:
-      unrolledUnpack24(data, offset, len);
-      return;
-    case 32:
-      unrolledUnpack32(data, offset, len);
-      return;
-    case 40:
-      unrolledUnpack40(data, offset, len);
-      return;
-    case 48:
-      unrolledUnpack48(data, offset, len);
-      return;
-    case 56:
-      unrolledUnpack56(data, offset, len);
-      return;
-    case 64:
-      unrolledUnpack64(data, offset, len);
-      return;
-    default:
-      // Fallback to the default implementation for deprecated bit size.
-      plainUnpackLongs(data, offset, len, fbs);
-      return;
+    unsigned char result = static_cast<unsigned char>(*bufferStart++);
+    return result;
   }
-}
-
-void RleDecoderV2::unrolledUnpack4(int64_t* data, uint64_t offset, uint64_t len) {
-  uint64_t curIdx = offset;
-  while (curIdx < offset + len) {
-    // Make sure bitsLeft is 0 before the loop. bitsLeft can only be 0, 4, or 8.
-    while (bitsLeft > 0 && curIdx < offset + len) {
-      bitsLeft -= 4;
-      data[curIdx++] = (curByte >> bitsLeft) & 15;
-    }
-    if (curIdx == offset + len) return;
-
-    // Exhaust the buffer
-    uint64_t numGroups = (offset + len - curIdx) / 2;
-    numGroups = std::min(numGroups, static_cast<uint64_t>(bufferEnd - bufferStart));
-    // Avoid updating 'bufferStart' inside the loop.
-    const auto *buffer = reinterpret_cast<const unsigned char*>(bufferStart);
-    uint32_t localByte;
-    for (uint64_t i = 0; i < numGroups; ++i) {
-      localByte = *buffer++;
-      data[curIdx] = (localByte >> 4) & 15;
-      data[curIdx + 1] = localByte & 15;
-      curIdx += 2;
-    }
-    bufferStart = reinterpret_cast<const char*>(buffer);
-    if (curIdx == offset + len) return;
 
-    // readByte() will update 'bufferStart' and 'bufferEnd'
-    curByte = readByte();
-    bitsLeft = 8;
-  }
-}
-
-void RleDecoderV2::unrolledUnpack8(int64_t* data, uint64_t offset, uint64_t len) {
-  uint64_t curIdx = offset;
-  while (curIdx < offset + len) {
-    // Exhaust the buffer
-    int64_t bufferNum = bufferEnd - bufferStart;
-    bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
-    // Avoid updating 'bufferStart' inside the loop.
-    const auto* buffer = reinterpret_cast<const unsigned char*>(bufferStart);
-    for (int i = 0; i < bufferNum; ++i) {
-      data[curIdx++] = *buffer++;
+  int64_t RleDecoderV2::readLongBE(uint64_t bsz) {
+    int64_t ret = 0, val;
+    uint64_t n = bsz;
+    while (n > 0) {
+      n--;
+      val = readByte();
+      ret |= (val << (n * 8));
     }
-    bufferStart = reinterpret_cast<const char*>(buffer);
-    if (curIdx == offset + len) return;
+    return ret;
+  }
 
-    // readByte() will update 'bufferStart' and 'bufferEnd'.
-    data[curIdx++] = readByte();
+  inline int64_t RleDecoderV2::readVslong() {
+    return unZigZag(readVulong());
   }
-}
-
-void RleDecoderV2::unrolledUnpack16(int64_t* data, uint64_t offset, uint64_t len) {
-  uint64_t curIdx = offset;
-  while (curIdx < offset + len) {
-    // Exhaust the buffer
-    int64_t bufferNum = (bufferEnd - bufferStart) / 2;
-    bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
-    uint16_t b0, b1;
-    // Avoid updating 'bufferStart' inside the loop.
-    const auto* buffer = reinterpret_cast<const unsigned char*>(bufferStart);
-    for (int i = 0; i < bufferNum; ++i) {
-      b0 = static_cast<uint16_t>(*buffer);
-      b1 = static_cast<uint16_t>(*(buffer + 1));
-      buffer += 2;
-      data[curIdx++] = (b0 << 8) | b1;
-    }
-    bufferStart = reinterpret_cast<const char*>(buffer);
-    if (curIdx == offset + len) return;
 
-    // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
-    b0 = readByte();
-    b1 = readByte();
-    data[curIdx++] = (b0 << 8) | b1;
+  uint64_t RleDecoderV2::readVulong() {
+    uint64_t ret = 0, b;
+    uint64_t offset = 0;
+    do {
+      b = readByte();
+      ret |= (0x7f & b) << offset;
+      offset += 7;
+    } while (b >= 0x80);
+    return ret;
   }
-}
-
-void RleDecoderV2::unrolledUnpack24(int64_t* data, uint64_t offset, uint64_t len) {
-  uint64_t curIdx = offset;
-  while (curIdx < offset + len) {
-    // Exhaust the buffer
-    int64_t bufferNum = (bufferEnd - bufferStart) / 3;
-    bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
-    uint32_t b0, b1, b2;
-    // Avoid updating 'bufferStart' inside the loop.
-    const auto* buffer = reinterpret_cast<const unsigned char*>(bufferStart);
-    for (int i = 0; i < bufferNum; ++i) {
-      b0 = static_cast<uint32_t>(*buffer);
-      b1 = static_cast<uint32_t>(*(buffer + 1));
-      b2 = static_cast<uint32_t>(*(buffer + 2));
-      buffer += 3;
-      data[curIdx++] = static_cast<int64_t>((b0 << 16) | (b1 << 8) | b2);
+
+  struct UnpackDynamicFunction {
+    using FunctionType = decltype(&BitUnpack::readLongs);
+
+    static std::vector<std::pair<DispatchLevel, FunctionType>> implementations() {
+#if defined(ORC_HAVE_RUNTIME_AVX512)
+      return {{DispatchLevel::NONE, BitUnpackDefault::readLongs},
+              {DispatchLevel::AVX512, BitUnpackAVX512::readLongs}};
+#else
+      return {{DispatchLevel::NONE, BitUnpackDefault::readLongs}};
+#endif
     }
-    bufferStart += bufferNum * 3;
-    if (curIdx == offset + len) return;
-
-    // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
-    b0 = readByte();
-    b1 = readByte();
-    b2 = readByte();
-    data[curIdx++] = static_cast<int64_t>((b0 << 16) | (b1 << 8) | b2);
+  };
+
+  void RleDecoderV2::readLongs(int64_t* data, uint64_t offset, uint64_t len, uint64_t fbs) {
+    static DynamicDispatch<UnpackDynamicFunction> dispatch;
+    return dispatch.func(this, data, offset, len, fbs);
   }
-}
-
-void RleDecoderV2::unrolledUnpack32(int64_t* data, uint64_t offset, uint64_t len) {
-  uint64_t curIdx = offset;
-  while (curIdx < offset + len) {
-    // Exhaust the buffer
-    int64_t bufferNum = (bufferEnd - bufferStart) / 4;
-    bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
-    uint32_t b0, b1, b2, b3;
-    // Avoid updating 'bufferStart' inside the loop.
-    const auto* buffer = reinterpret_cast<const unsigned char*>(bufferStart);
-    for (int i = 0; i < bufferNum; ++i) {
-      b0 = static_cast<uint32_t>(*buffer);
-      b1 = static_cast<uint32_t>(*(buffer + 1));
-      b2 = static_cast<uint32_t>(*(buffer + 2));
-      b3 = static_cast<uint32_t>(*(buffer + 3));
-      buffer += 4;
-      data[curIdx++] = static_cast<int64_t>((b0 << 24) | (b1 << 16) | (b2 << 8) | b3);
-    }
-    bufferStart = reinterpret_cast<const char*>(buffer);
-    if (curIdx == offset + len) return;
-
-    // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
-    b0 = readByte();
-    b1 = readByte();
-    b2 = readByte();
-    b3 = readByte();
-    data[curIdx++] = static_cast<int64_t>((b0 << 24) | (b1 << 16) | (b2 << 8) | b3);
+
+  RleDecoderV2::RleDecoderV2(std::unique_ptr<SeekableInputStream> input, bool _isSigned,
+                             MemoryPool& pool, ReaderMetrics* _metrics)
+      : RleDecoder(_metrics),
+        inputStream(std::move(input)),
+        isSigned(_isSigned),
+        firstByte(0),
+        bufferStart(nullptr),
+        bufferEnd(bufferStart),
+        runLength(0),
+        runRead(0),
+        bitsLeft(0),
+        curByte(0),
+        unpackedPatch(pool, 0),
+        literals(pool, MAX_LITERAL_SIZE) {
+    // PASS
   }
-}
-
-void RleDecoderV2::unrolledUnpack40(int64_t* data, uint64_t offset, uint64_t len) {
-  uint64_t curIdx = offset;
-  while (curIdx < offset + len) {
-    // Exhaust the buffer
-    int64_t bufferNum = (bufferEnd - bufferStart) / 5;
-    bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
-    uint64_t b0, b1, b2, b3, b4;
-    // Avoid updating 'bufferStart' inside the loop.
-    const auto* buffer = reinterpret_cast<const unsigned char*>(bufferStart);
-    for (int i = 0; i < bufferNum; ++i) {
-      b0 = static_cast<uint32_t>(*buffer);
-      b1 = static_cast<uint32_t>(*(buffer + 1));
-      b2 = static_cast<uint32_t>(*(buffer + 2));
-      b3 = static_cast<uint32_t>(*(buffer + 3));
-      b4 = static_cast<uint32_t>(*(buffer + 4));
-      buffer += 5;
-      data[curIdx++] = static_cast<int64_t>((b0 << 32) | (b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
-    }
-    bufferStart = reinterpret_cast<const char*>(buffer);
-    if (curIdx == offset + len) return;
-
-    // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
-    b0 = readByte();
-    b1 = readByte();
-    b2 = readByte();
-    b3 = readByte();
-    b4 = readByte();
-    data[curIdx++] = static_cast<int64_t>((b0 << 32) | (b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
+
+  void RleDecoderV2::seek(PositionProvider& location) {
+    // move the input stream
+    inputStream->seek(location);
+    // clear state
+    bufferEnd = bufferStart = nullptr;
+    runRead = runLength = 0;
+    // skip ahead the given number of records
+    skip(location.next());
   }
-}
-
-void RleDecoderV2::unrolledUnpack48(int64_t *data, uint64_t offset, uint64_t len) {
-  uint64_t curIdx = offset;
-  while (curIdx < offset + len) {
-    // Exhaust the buffer
-    int64_t bufferNum = (bufferEnd - bufferStart) / 6;
-    bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
-    uint64_t b0, b1, b2, b3, b4, b5;
-    // Avoid updating 'bufferStart' inside the loop.
-    const auto* buffer = reinterpret_cast<const unsigned char*>(bufferStart);
-    for (int i = 0; i < bufferNum; ++i) {
-      b0 = static_cast<uint32_t>(*buffer);
-      b1 = static_cast<uint32_t>(*(buffer + 1));
-      b2 = static_cast<uint32_t>(*(buffer + 2));
-      b3 = static_cast<uint32_t>(*(buffer + 3));
-      b4 = static_cast<uint32_t>(*(buffer + 4));
-      b5 = static_cast<uint32_t>(*(buffer + 5));
-      buffer += 6;
-      data[curIdx++] = static_cast<int64_t>((b0 << 40) | (b1 << 32) | (b2 << 24) | (b3 << 16) | (b4 << 8) | b5);
+
+  void RleDecoderV2::skip(uint64_t numValues) {
+    // simple for now, until perf tests indicate something encoding specific is
+    // needed
+    const uint64_t N = 64;
+    int64_t dummy[N];
+
+    while (numValues) {
+      uint64_t nRead = std::min(N, numValues);
+      next(dummy, nRead, nullptr);
+      numValues -= nRead;
     }
-    bufferStart = reinterpret_cast<const char*>(buffer);
-    if (curIdx == offset + len) return;
-
-    // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
-    b0 = readByte();
-    b1 = readByte();
-    b2 = readByte();
-    b3 = readByte();
-    b4 = readByte();
-    b5 = readByte();
-    data[curIdx++] = static_cast<int64_t>((b0 << 40) | (b1 << 32) | (b2 << 24) | (b3 << 16) | (b4 << 8) | b5);
   }
-}
-
-void RleDecoderV2::unrolledUnpack56(int64_t *data, uint64_t offset, uint64_t len) {
-  uint64_t curIdx = offset;
-  while (curIdx < offset + len) {
-    // Exhaust the buffer
-    int64_t bufferNum = (bufferEnd - bufferStart) / 7;
-    bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
-    uint64_t b0, b1, b2, b3, b4, b5, b6;
-    // Avoid updating 'bufferStart' inside the loop.
-    const auto* buffer = reinterpret_cast<const unsigned char*>(bufferStart);
-    for (int i = 0; i < bufferNum; ++i) {
-      b0 = static_cast<uint32_t>(*buffer);
-      b1 = static_cast<uint32_t>(*(buffer + 1));
-      b2 = static_cast<uint32_t>(*(buffer + 2));
-      b3 = static_cast<uint32_t>(*(buffer + 3));
-      b4 = static_cast<uint32_t>(*(buffer + 4));
-      b5 = static_cast<uint32_t>(*(buffer + 5));
-      b6 = static_cast<uint32_t>(*(buffer + 6));
-      buffer += 7;
-      data[curIdx++] = static_cast<int64_t>((b0 << 48) | (b1 << 40) | (b2 << 32) | (b3 << 24) | (b4 << 16) | (b5 << 8) | b6);
+
+  template <typename T>
+  void RleDecoderV2::next(T* const data, const uint64_t numValues, const char* const notNull) {
+    SCOPED_STOPWATCH(metrics, DecodingLatencyUs, DecodingCall);
+    uint64_t nRead = 0;
+
+    while (nRead < numValues) {
+      // Skip any nulls before attempting to read first byte.
+      while (notNull && !notNull[nRead]) {
+        if (++nRead == numValues) {
+          return;  // ended with null values
+        }
+      }
+
+      if (runRead == runLength) {
+        resetRun();
+        firstByte = readByte();
+      }
+
+      uint64_t offset = nRead, length = numValues - nRead;
+
+      EncodingType enc = static_cast<EncodingType>((firstByte >> 6) & 0x03);
+      switch (static_cast<int64_t>(enc)) {
+        case SHORT_REPEAT:
+          nRead += nextShortRepeats(data, offset, length, notNull);
+          break;
+        case DIRECT:
+          nRead += nextDirect(data, offset, length, notNull);
+          break;
+        case PATCHED_BASE:
+          nRead += nextPatched(data, offset, length, notNull);
+          break;
+        case DELTA:
+          nRead += nextDelta(data, offset, length, notNull);
+          break;
+        default:
+          throw ParseError("unknown encoding");
+      }
     }
-    bufferStart = reinterpret_cast<const char*>(buffer);
-    if (curIdx == offset + len) return;
-
-    // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
-    b0 = readByte();
-    b1 = readByte();
-    b2 = readByte();
-    b3 = readByte();
-    b4 = readByte();
-    b5 = readByte();
-    b6 = readByte();
-    data[curIdx++] = static_cast<int64_t>((b0 << 48) | (b1 << 40) | (b2 << 32) | (b3 << 24) | (b4 << 16) | (b5 << 8) | b6);
   }
-}
-
-void RleDecoderV2::unrolledUnpack64(int64_t *data, uint64_t offset, uint64_t len) {
-  uint64_t curIdx = offset;
-  while (curIdx < offset + len) {
-    // Exhaust the buffer
-    int64_t bufferNum = (bufferEnd - bufferStart) / 8;
-    bufferNum = std::min(bufferNum, static_cast<int64_t>(offset + len - curIdx));
-    uint64_t b0, b1, b2, b3, b4, b5, b6, b7;
-    // Avoid updating 'bufferStart' inside the loop.
-    const auto* buffer = reinterpret_cast<const unsigned char*>(bufferStart);
-    for (int i = 0; i < bufferNum; ++i) {
-      b0 = static_cast<uint32_t>(*buffer);
-      b1 = static_cast<uint32_t>(*(buffer + 1));
-      b2 = static_cast<uint32_t>(*(buffer + 2));
-      b3 = static_cast<uint32_t>(*(buffer + 3));
-      b4 = static_cast<uint32_t>(*(buffer + 4));
-      b5 = static_cast<uint32_t>(*(buffer + 5));
-      b6 = static_cast<uint32_t>(*(buffer + 6));
-      b7 = static_cast<uint32_t>(*(buffer + 7));
-      buffer += 8;
-      data[curIdx++] = static_cast<int64_t>((b0 << 56) | (b1 << 48) | (b2 << 40) | (b3 << 32) | (b4 << 24) | (b5 << 16) | (b6 << 8) | b7);
-    }
-    bufferStart = reinterpret_cast<const char*>(buffer);
-    if (curIdx == offset + len) return;
-
-    // One of the following readByte() will update 'bufferStart' and 'bufferEnd'.
-    b0 = readByte();
-    b1 = readByte();
-    b2 = readByte();
-    b3 = readByte();
-    b4 = readByte();
-    b5 = readByte();
-    b6 = readByte();
-    b7 = readByte();
-    data[curIdx++] = static_cast<int64_t>((b0 << 56) | (b1 << 48) | (b2 << 40) | (b3 << 32) | (b4 << 24) | (b5 << 16) | (b6 << 8) | b7);
+
+  void RleDecoderV2::next(int64_t* data, uint64_t numValues, const char* notNull) {
+    next<int64_t>(data, numValues, notNull);
   }
-}
-
-void RleDecoderV2::plainUnpackLongs(int64_t *data, uint64_t offset, uint64_t len,
-                                    uint64_t fbs) {
-  for (uint64_t i = offset; i < (offset + len); i++) {
-    uint64_t result = 0;
-    uint64_t bitsLeftToRead = fbs;
-    while (bitsLeftToRead > bitsLeft) {
-      result <<= bitsLeft;
-      result |= curByte & ((1 << bitsLeft) - 1);
-      bitsLeftToRead -= bitsLeft;
-      curByte = readByte();
-      bitsLeft = 8;
-    }
 
-    // handle the left over bits
-    if (bitsLeftToRead > 0) {
-      result <<= bitsLeftToRead;
-      bitsLeft -= static_cast<uint32_t>(bitsLeftToRead);
-      result |= (curByte >> bitsLeft) & ((1 << bitsLeftToRead) - 1);
-    }
-    data[i] = static_cast<int64_t>(result);
+  void RleDecoderV2::next(int32_t* data, uint64_t numValues, const char* notNull) {
+    next<int32_t>(data, numValues, notNull);
   }
-}
-
-RleDecoderV2::RleDecoderV2(std::unique_ptr<SeekableInputStream> input,
-                           bool _isSigned, MemoryPool& pool
-                           ): inputStream(std::move(input)),
-                              isSigned(_isSigned),
-                              firstByte(0),
-                              runLength(0),
-                              runRead(0),
-                              bufferStart(nullptr),
-                              bufferEnd(bufferStart),
-                              bitsLeft(0),
-                              curByte(0),
-                              unpackedPatch(pool, 0),
-                              literals(pool, MAX_LITERAL_SIZE) {
-  // PASS
-}
-
-void RleDecoderV2::seek(PositionProvider& location) {
-  // move the input stream
-  inputStream->seek(location);
-  // clear state
-  bufferEnd = bufferStart = nullptr;
-  runRead = runLength = 0;
-  // skip ahead the given number of records
-  skip(location.next());
-}
-
-void RleDecoderV2::skip(uint64_t numValues) {
-  // simple for now, until perf tests indicate something encoding specific is
-  // needed
-  const uint64_t N = 64;
-  int64_t dummy[N];
-
-  while (numValues) {
-    uint64_t nRead = std::min(N, numValues);
-    next(dummy, nRead, nullptr);
-    numValues -= nRead;
+
+  void RleDecoderV2::next(int16_t* data, uint64_t numValues, const char* notNull) {
+    next<int16_t>(data, numValues, notNull);
   }
-}
-
-void RleDecoderV2::next(int64_t* const data,
-                        const uint64_t numValues,
-                        const char* const notNull) {
-  uint64_t nRead = 0;
-
-  while (nRead < numValues) {
-    // Skip any nulls before attempting to read first byte.
-    while (notNull && !notNull[nRead]) {
-      if (++nRead == numValues) {
-        return; // ended with null values
-      }
-    }
 
+  template <typename T>
+  uint64_t RleDecoderV2::nextShortRepeats(T* const data, uint64_t offset, uint64_t numValues,
+                                          const char* const notNull) {
     if (runRead == runLength) {
-      resetRun();
-      firstByte = readByte();
-    }
+      // extract the number of fixed bytes
+      uint64_t byteSize = (firstByte >> 3) & 0x07;
+      byteSize += 1;
 
-    uint64_t offset = nRead, length = numValues - nRead;
-
-    EncodingType enc = static_cast<EncodingType>
-        ((firstByte >> 6) & 0x03);
-    switch(static_cast<int64_t>(enc)) {
-    case SHORT_REPEAT:
-      nRead += nextShortRepeats(data, offset, length, notNull);
-      break;
-    case DIRECT:
-      nRead += nextDirect(data, offset, length, notNull);
-      break;
-    case PATCHED_BASE:
-      nRead += nextPatched(data, offset, length, notNull);
-      break;
-    case DELTA:
-      nRead += nextDelta(data, offset, length, notNull);
-      break;
-    default:
-      throw ParseError("unknown encoding");
-    }
-  }
-}
-
-uint64_t RleDecoderV2::nextShortRepeats(int64_t* const data,
-                                        uint64_t offset,
-                                        uint64_t numValues,
-                                        const char* const notNull) {
-  if (runRead == runLength) {
-    // extract the number of fixed bytes
-    uint64_t byteSize = (firstByte >> 3) & 0x07;
-    byteSize += 1;
-
-    runLength = firstByte & 0x07;
-    // run lengths values are stored only after MIN_REPEAT value is met
-    runLength += MIN_REPEAT;
-    runRead = 0;
-
-    // read the repeated value which is store using fixed bytes
-    literals[0] = readLongBE(byteSize);
-
-    if (isSigned) {
-      literals[0] = unZigZag(static_cast<uint64_t>(literals[0]));
+      runLength = firstByte & 0x07;
+      // run lengths values are stored only after MIN_REPEAT value is met
+      runLength += MIN_REPEAT;
+      runRead = 0;
+
+      // read the repeated value which is store using fixed bytes
+      literals[0] = readLongBE(byteSize);
+
+      if (isSigned) {
+        literals[0] = unZigZag(static_cast<uint64_t>(literals[0]));
+      }
     }
-  }
 
-  uint64_t nRead = std::min(runLength - runRead, numValues);
+    uint64_t nRead = std::min(runLength - runRead, numValues);
 
-  if (notNull) {
-    for(uint64_t pos = offset; pos < offset + nRead; ++pos) {
-      if (notNull[pos]) {
-        data[pos] = literals[0];
+    if (notNull) {
+      for (uint64_t pos = offset; pos < offset + nRead; ++pos) {
+        if (notNull[pos]) {
+          data[pos] = static_cast<T>(literals[0]);
+          ++runRead;
+        }
+      }
+    } else {
+      for (uint64_t pos = offset; pos < offset + nRead; ++pos) {
+        data[pos] = static_cast<T>(literals[0]);
         ++runRead;
       }
     }
-  } else {
-    for(uint64_t pos = offset; pos < offset + nRead; ++pos) {
-      data[pos] = literals[0];
-      ++runRead;
-    }
+
+    return nRead;
   }
 
-  return nRead;
-}
-
-uint64_t RleDecoderV2::nextDirect(int64_t* const data,
-                                  uint64_t offset,
-                                  uint64_t numValues,
-                                  const char* const notNull) {
-  if (runRead == runLength) {
-    // extract the number of fixed bits
-    unsigned char fbo = (firstByte >> 1) & 0x1f;
-    uint32_t bitSize = decodeBitWidth(fbo);
-
-    // extract the run length
-    runLength = static_cast<uint64_t>(firstByte & 0x01) << 8;
-    runLength |= readByte();
-    // runs are one off
-    runLength += 1;
-    runRead = 0;
-
-    readLongs(literals.data(), 0, runLength, bitSize);
-    if (isSigned) {
-      for (uint64_t i = 0; i < runLength; ++i) {
-        literals[i] = unZigZag(static_cast<uint64_t>(literals[i]));
+  template <typename T>
+  uint64_t RleDecoderV2::nextDirect(T* const data, uint64_t offset, uint64_t numValues,
+                                    const char* const notNull) {
+    if (runRead == runLength) {
+      // extract the number of fixed bits
+      unsigned char fbo = (firstByte >> 1) & 0x1f;
+      uint32_t bitSize = decodeBitWidth(fbo);
+
+      // extract the run length
+      runLength = static_cast<uint64_t>(firstByte & 0x01) << 8;
+      runLength |= readByte();
+      // runs are one off
+      runLength += 1;
+      runRead = 0;
+
+      readLongs(literals.data(), 0, runLength, bitSize);
+      if (isSigned) {
+        for (uint64_t i = 0; i < runLength; ++i) {
+          literals[i] = unZigZag(static_cast<uint64_t>(literals[i]));
+        }
       }
     }
+
+    return copyDataFromBuffer(data, offset, numValues, notNull);
   }
 
-  return copyDataFromBuffer(data, offset, numValues, notNull);
-}
-
-void RleDecoderV2::adjustGapAndPatch(uint32_t patchBitSize, int64_t patchMask,
-                                     int64_t* resGap, int64_t* resPatch,
-                                     uint64_t* patchIdx) {
-  uint64_t idx = *patchIdx;
-  uint64_t gap = static_cast<uint64_t>(unpackedPatch[idx]) >> patchBitSize;
-  int64_t patch = unpackedPatch[idx] & patchMask;
-  int64_t actualGap = 0;
-
-  // special case: gap is >255 then patch value will be 0.
-  // if gap is <=255 then patch value cannot be 0
-  while (gap == 255 && patch == 0) {
-    actualGap += 255;
-    ++idx;
-    gap = static_cast<uint64_t>(unpackedPatch[idx]) >> patchBitSize;
-    patch = unpackedPatch[idx] & patchMask;
+  void RleDecoderV2::adjustGapAndPatch(uint32_t patchBitSize, int64_t patchMask, int64_t* resGap,
+                                       int64_t* resPatch, uint64_t* patchIdx) {
+    uint64_t idx = *patchIdx;
+    uint64_t gap = static_cast<uint64_t>(unpackedPatch[idx]) >> patchBitSize;
+    int64_t patch = unpackedPatch[idx] & patchMask;
+    int64_t actualGap = 0;
+
+    // special case: gap is >255 then patch value will be 0.
+    // if gap is <=255 then patch value cannot be 0
+    while (gap == 255 && patch == 0) {
+      actualGap += 255;
+      ++idx;
+      gap = static_cast<uint64_t>(unpackedPatch[idx]) >> patchBitSize;
+      patch = unpackedPatch[idx] & patchMask;
+    }
+    // add the left over gap
+    actualGap += gap;
+
+    *resGap = actualGap;
+    *resPatch = patch;
+    *patchIdx = idx;
   }
-  // add the left over gap
-  actualGap += gap;
 
-  *resGap = actualGap;
-  *resPatch = patch;
-  *patchIdx = idx;
-}
+  template <typename T>
+  uint64_t RleDecoderV2::nextPatched(T* const data, uint64_t offset, uint64_t numValues,
+                                     const char* const notNull) {
+    if (runRead == runLength) {
+      // extract the number of fixed bits
+      unsigned char fbo = (firstByte >> 1) & 0x1f;
+      uint32_t bitSize = decodeBitWidth(fbo);
+
+      // extract the run length
+      runLength = static_cast<uint64_t>(firstByte & 0x01) << 8;
+      runLength |= readByte();
+      // runs are one off
+      runLength += 1;
+      runRead = 0;
+
+      // extract the number of bytes occupied by base
+      uint64_t thirdByte = readByte();
+      uint64_t byteSize = (thirdByte >> 5) & 0x07;
+      // base width is one off
+      byteSize += 1;
+
+      // extract patch width
+      uint32_t pwo = thirdByte & 0x1f;
+      uint32_t patchBitSize = decodeBitWidth(pwo);
+
+      // read fourth byte and extract patch gap width
+      uint64_t fourthByte = readByte();
+      uint32_t pgw = (fourthByte >> 5) & 0x07;
+      // patch gap width is one off
+      pgw += 1;
+
+      // extract the length of the patch list
+      size_t pl = fourthByte & 0x1f;
+      if (pl == 0) {
+        throw ParseError("Corrupt PATCHED_BASE encoded data (pl==0)!");
+      }
 
-uint64_t RleDecoderV2::nextPatched(int64_t* const data,
-                                   uint64_t offset,
-                                   uint64_t numValues,
-                                   const char* const notNull) {
-  if (runRead == runLength) {
-    // extract the number of fixed bits
-    unsigned char fbo = (firstByte >> 1) & 0x1f;
-    uint32_t bitSize = decodeBitWidth(fbo);
-
-    // extract the run length
-    runLength = static_cast<uint64_t>(firstByte & 0x01) << 8;
-    runLength |= readByte();
-    // runs are one off
-    runLength += 1;
-    runRead = 0;
-
-    // extract the number of bytes occupied by base
-    uint64_t thirdByte = readByte();
-    uint64_t byteSize = (thirdByte >> 5) & 0x07;
-    // base width is one off
-    byteSize += 1;
-
-    // extract patch width
-    uint32_t pwo = thirdByte & 0x1f;
-    uint32_t patchBitSize = decodeBitWidth(pwo);
-
-    // read fourth byte and extract patch gap width
-    uint64_t fourthByte = readByte();
-    uint32_t pgw = (fourthByte >> 5) & 0x07;
-    // patch gap width is one off
-    pgw += 1;
-
-    // extract the length of the patch list
-    size_t pl = fourthByte & 0x1f;
-    if (pl == 0) {
-      throw ParseError("Corrupt PATCHED_BASE encoded data (pl==0)!");
-    }
+      // read the next base width number of bytes to extract base value
+      int64_t base = readLongBE(byteSize);
+      int64_t mask = (static_cast<int64_t>(1) << ((byteSize * 8) - 1));
+      // if mask of base value is 1 then base is negative value else positive
+      if ((base & mask) != 0) {
+        base = base & ~mask;
+        base = -base;
+      }
 
-    // read the next base width number of bytes to extract base value
-    int64_t base = readLongBE(byteSize);
-    int64_t mask = (static_cast<int64_t>(1) << ((byteSize * 8) - 1));
-    // if mask of base value is 1 then base is negative value else positive
-    if ((base & mask) != 0) {
-      base = base & ~mask;
-      base = -base;
-    }
+      readLongs(literals.data(), 0, runLength, bitSize);
+      // any remaining bits are thrown out
+      resetReadLongs();
+
+      // TODO: something more efficient than resize
+      unpackedPatch.resize(pl);
+      // TODO: Skip corrupt?
+      //    if ((patchBitSize + pgw) > 64 && !skipCorrupt) {
+      if ((patchBitSize + pgw) > 64) {
+        throw ParseError(
+            "Corrupt PATCHED_BASE encoded data "
+            "(patchBitSize + pgw > 64)!");
+      }
+      uint32_t cfb = getClosestFixedBits(patchBitSize + pgw);
+      readLongs(unpackedPatch.data(), 0, pl, cfb);
+      // any remaining bits are thrown out
+      resetReadLongs();
 
-    readLongs(literals.data(), 0, runLength, bitSize);
-    // any remaining bits are thrown out
-    resetReadLongs();
-
-    // TODO: something more efficient than resize
-    unpackedPatch.resize(pl);
-    // TODO: Skip corrupt?
-    //    if ((patchBitSize + pgw) > 64 && !skipCorrupt) {
-    if ((patchBitSize + pgw) > 64) {
-      throw ParseError("Corrupt PATCHED_BASE encoded data "
-                       "(patchBitSize + pgw > 64)!");
-    }
-    uint32_t cfb = getClosestFixedBits(patchBitSize + pgw);
-    readLongs(unpackedPatch.data(), 0, pl, cfb);
-    // any remaining bits are thrown out
-    resetReadLongs();
-
-    // apply the patch directly when decoding the packed data
-    int64_t patchMask = ((static_cast<int64_t>(1) << patchBitSize) - 1);
-
-    int64_t gap = 0;
-    int64_t patch = 0;
-    uint64_t patchIdx = 0;
-    adjustGapAndPatch(patchBitSize, patchMask, &gap, &patch, &patchIdx);
-
-    for (uint64_t i = 0; i < runLength; ++i) {
-      if (static_cast<int64_t>(i) != gap) {
-        // no patching required. add base to unpacked value to get final value
-        literals[i] += base;
-      } else {
-        // extract the patch value
-        int64_t patchedVal = literals[i] | (patch << bitSize);
+      // apply the patch directly when decoding the packed data
+      int64_t patchMask = ((static_cast<int64_t>(1) << patchBitSize) - 1);
+
+      int64_t gap = 0;
+      int64_t patch = 0;
+      uint64_t patchIdx = 0;
+      adjustGapAndPatch(patchBitSize, patchMask, &gap, &patch, &patchIdx);
+
+      for (uint64_t i = 0; i < runLength; ++i) {
+        if (static_cast<int64_t>(i) != gap) {
+          // no patching required. add base to unpacked value to get final value
+          literals[i] += base;
+        } else {
+          // extract the patch value
+          int64_t patchedVal = literals[i] | (patch << bitSize);
 
-        // add base to patched value
-        literals[i] = base + patchedVal;
+          // add base to patched value
+          literals[i] = base + patchedVal;
 
-        // increment the patch to point to next entry in patch list
-        ++patchIdx;
+          // increment the patch to point to next entry in patch list
+          ++patchIdx;
 
-        if (patchIdx < unpackedPatch.size()) {
-          adjustGapAndPatch(patchBitSize, patchMask, &gap, &patch,
-                            &patchIdx);
+          if (patchIdx < unpackedPatch.size()) {
+            adjustGapAndPatch(patchBitSize, patchMask, &gap, &patch, &patchIdx);
 
-          // next gap is relative to the current gap
-          gap += i;
+            // next gap is relative to the current gap
+            gap += i;
+          }
         }
       }
     }
+
+    return copyDataFromBuffer(data, offset, numValues, notNull);
   }
 
-  return copyDataFromBuffer(data, offset, numValues, notNull);
-}
-
-uint64_t RleDecoderV2::nextDelta(int64_t* const data,
-                                 uint64_t offset,
-                                 uint64_t numValues,
-                                 const char* const notNull) {
-  if (runRead == runLength) {
-    // extract the number of fixed bits
-    unsigned char fbo = (firstByte >> 1) & 0x1f;
-    uint32_t bitSize;
-    if (fbo != 0) {
-      bitSize = decodeBitWidth(fbo);
-    } else {
-      bitSize = 0;
-    }
+  template <typename T>
+  uint64_t RleDecoderV2::nextDelta(T* const data, uint64_t offset, uint64_t numValues,
+                                   const char* const notNull) {
+    if (runRead == runLength) {
+      // extract the number of fixed bits
+      unsigned char fbo = (firstByte >> 1) & 0x1f;
+      uint32_t bitSize;
+      if (fbo != 0) {
+        bitSize = decodeBitWidth(fbo);
+      } else {
+        bitSize = 0;
+      }
 
-    // extract the run length
-    runLength = static_cast<uint64_t>(firstByte & 0x01) << 8;
-    runLength |= readByte();
-    ++runLength; // account for first value
-    runRead = 0;
+      // extract the run length
+      runLength = static_cast<uint64_t>(firstByte & 0x01) << 8;
+      runLength |= readByte();
+      ++runLength;  // account for first value
+      runRead = 0;
 
-    int64_t prevValue;
-    // read the first value stored as vint
-    if (isSigned) {
-      prevValue = readVslong();
-    } else {
-      prevValue = static_cast<int64_t>(readVulong());
-    }
+      int64_t prevValue;
+      // read the first value stored as vint
+      if (isSigned) {
+        prevValue = readVslong();
+      } else {
+        prevValue = static_cast<int64_t>(readVulong());
+      }
 
-    literals[0] = prevValue;
+      literals[0] = prevValue;
 
-    // read the fixed delta value stored as vint (deltas can be negative even
-    // if all number are positive)
-    int64_t deltaBase = readVslong();
+      // read the fixed delta value stored as vint (deltas can be negative even
+      // if all number are positive)
+      int64_t deltaBase = readVslong();
 
-    if (bitSize == 0) {
-      // add fixed deltas to adjacent values
-      for (uint64_t i = 1; i < runLength; ++i) {
-        literals[i] = literals[i - 1] + deltaBase;
-      }
-    } else {
-      prevValue = literals[1] = prevValue + deltaBase;
-      if (runLength < 2) {
-        std::stringstream ss;
-        ss << "Illegal run length for delta encoding: " << runLength;
-        throw ParseError(ss.str());
-      }
-      // write the unpacked values, add it to previous value and store final
-      // value to result buffer. if the delta base value is negative then it
-      // is a decreasing sequence else an increasing sequence.
-      // read deltas using the literals buffer.
-      readLongs(literals.data(), 2, runLength - 2, bitSize);
-      if (deltaBase < 0) {
-        for (uint64_t i = 2; i < runLength; ++i) {
-          prevValue = literals[i] = prevValue - literals[i];
+      if (bitSize == 0) {
+        // add fixed deltas to adjacent values
+        for (uint64_t i = 1; i < runLength; ++i) {
+          literals[i] = literals[i - 1] + deltaBase;
         }
       } else {
-        for (uint64_t i = 2; i < runLength; ++i) {
-          prevValue = literals[i] = prevValue + literals[i];
+        prevValue = literals[1] = prevValue + deltaBase;
+        if (runLength < 2) {
+          std::stringstream ss;
+          ss << "Illegal run length for delta encoding: " << runLength;
+          throw ParseError(ss.str());
+        }
+        // write the unpacked values, add it to previous value and store final
+        // value to result buffer. if the delta base value is negative then it
+        // is a decreasing sequence else an increasing sequence.
+        // read deltas using the literals buffer.
+        readLongs(literals.data(), 2, runLength - 2, bitSize);
+        if (deltaBase < 0) {
+          for (uint64_t i = 2; i < runLength; ++i) {
+            prevValue = literals[i] = prevValue - literals[i];
+          }
+        } else {
+          for (uint64_t i = 2; i < runLength; ++i) {
+            prevValue = literals[i] = prevValue + literals[i];
+          }
         }
       }
     }
-  }
 
-  return copyDataFromBuffer(data, offset, numValues, notNull);
-}
+    return copyDataFromBuffer(data, offset, numValues, notNull);
+  }
 
-uint64_t RleDecoderV2::copyDataFromBuffer(int64_t* data, uint64_t offset,
-                                          uint64_t numValues, const char* notNull) {
-  uint64_t nRead = std::min(runLength - runRead, numValues);
-  if (notNull) {
-    for (uint64_t i = offset; i < (offset + nRead); ++i) {
-      if (notNull[i]) {
-        data[i] = literals[runRead++];
+  template <typename T>
+  uint64_t RleDecoderV2::copyDataFromBuffer(T* data, uint64_t offset, uint64_t numValues,
+                                            const char* notNull) {
+    uint64_t nRead = std::min(runLength - runRead, numValues);
+    if (notNull) {
+      for (uint64_t i = offset; i < (offset + nRead); ++i) {
+        if (notNull[i]) {
+          data[i] = static_cast<T>(literals[runRead++]);
+        }
+      }
+    } else {
+      for (uint64_t i = offset; i < (offset + nRead); ++i) {
+        data[i] = static_cast<T>(literals[runRead++]);
       }
     }
-  } else {
-    memcpy(data + offset, literals.data() + runRead, nRead * sizeof(int64_t));
-    runRead += nRead;
+    return nRead;
   }
-  return nRead;
-}
 
 }  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/RleEncoderV2.cc b/contrib/libs/apache/orc/c++/src/RleEncoderV2.cc
index 4e7a145a5a..a75aeac2eb 100644
--- a/contrib/libs/apache/orc/c++/src/RleEncoderV2.cc
+++ b/contrib/libs/apache/orc/c++/src/RleEncoderV2.cc
@@ -1,133 +1,135 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
- * distributed with option work for additional information
- * regarding copyright ownership.  The ASF licenses option file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use option file except in compliance
+ * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 #include "Adaptor.hh"
 #include "Compression.hh"
-#include "RLEv2.hh"
 #include "RLEV2Util.hh"
+#include "RLEv2.hh"
 
 #define MAX_SHORT_REPEAT_LENGTH 10
 
 namespace orc {
 
-/**
- * Compute the bits required to represent pth percentile value
- * @param data - array
- * @param p - percentile value (>=0.0 to <=1.0)
- * @return pth percentile bits
- */
-uint32_t RleEncoderV2::percentileBits(int64_t* data, size_t offset, size_t length, double p, bool reuseHist) {
+  /**
+   * Compute the bits required to represent pth percentile value
+   * @param data - array
+   * @param p - percentile value (>=0.0 to <=1.0)
+   * @return pth percentile bits
+   */
+  uint32_t RleEncoderV2::percentileBits(int64_t* data, size_t offset, size_t length, double p,
+                                        bool reuseHist) {
     if ((p > 1.0) || (p <= 0.0)) {
-        throw InvalidArgument("Invalid p value: " + to_string(p));
+      throw InvalidArgument("Invalid p value: " + to_string(p));
     }
 
     if (!reuseHist) {
-        // histogram that store the encoded bit requirement for each values.
-        // maximum number of bits that can encoded is 32 (refer FixedBitSizes)
-        memset(histgram, 0, FixedBitSizes::SIZE * sizeof(int32_t));
-        // compute the histogram
-        for(size_t i = offset; i < (offset + length); i++) {
-            uint32_t idx = encodeBitWidth(findClosestNumBits(data[i]));
-            histgram[idx] += 1;
-        }
+      // histogram that store the encoded bit requirement for each values.
+      // maximum number of bits that can encoded is 32 (refer FixedBitSizes)
+      memset(histgram, 0, FixedBitSizes::SIZE * sizeof(int32_t));
+      // compute the histogram
+      for (size_t i = offset; i < (offset + length); i++) {
+        uint32_t idx = encodeBitWidth(findClosestNumBits(data[i]));
+        histgram[idx] += 1;
+      }
     }
 
     int32_t perLen = static_cast<int32_t>(static_cast<double>(length) * (1.0 - p));
 
     // return the bits required by pth percentile length
-    for(int32_t i = HIST_LEN - 1; i >= 0; i--) {
-        perLen -= histgram[i];
-        if (perLen < 0) {
-            return decodeBitWidth(static_cast<uint32_t>(i));
-        }
+    for (int32_t i = HIST_LEN - 1; i >= 0; i--) {
+      perLen -= histgram[i];
+      if (perLen < 0) {
+        return decodeBitWidth(static_cast<uint32_t>(i));
+      }
     }
     return 0;
-}
+  }
 
-RleEncoderV2::RleEncoderV2(std::unique_ptr<BufferedOutputStream> outStream,
-                           bool hasSigned, bool alignBitPacking) :
-        RleEncoder(std::move(outStream), hasSigned),
+  RleEncoderV2::RleEncoderV2(std::unique_ptr<BufferedOutputStream> outStream, bool hasSigned,
+                             bool alignBitPacking)
+      : RleEncoder(std::move(outStream), hasSigned),
         alignedBitPacking(alignBitPacking),
-        prevDelta(0){
+        prevDelta(0) {
     literals = new int64_t[MAX_LITERAL_SIZE];
     gapVsPatchList = new int64_t[MAX_LITERAL_SIZE];
     zigzagLiterals = hasSigned ? new int64_t[MAX_LITERAL_SIZE] : nullptr;
     baseRedLiterals = new int64_t[MAX_LITERAL_SIZE];
     adjDeltas = new int64_t[MAX_LITERAL_SIZE];
-}
+  }
 
-void RleEncoderV2::write(int64_t val) {
-    if(numLiterals == 0) {
-        initializeLiterals(val);
-        return;
+  void RleEncoderV2::write(int64_t val) {
+    if (numLiterals == 0) {
+      initializeLiterals(val);
+      return;
     }
 
-    if(numLiterals == 1) {
-        prevDelta = val - literals[0];
-        literals[numLiterals++] = val;
+    if (numLiterals == 1) {
+      prevDelta = val - literals[0];
+      literals[numLiterals++] = val;
 
-        if(val == literals[0]) {
-            fixedRunLength = 2;
-            variableRunLength = 0;
-        } else {
-            fixedRunLength = 0;
-            variableRunLength = 2;
-        }
-        return;
+      if (val == literals[0]) {
+        fixedRunLength = 2;
+        variableRunLength = 0;
+      } else {
+        fixedRunLength = 0;
+        variableRunLength = 2;
+      }
+      return;
     }
 
     int64_t currentDelta = val - literals[numLiterals - 1];
     EncodingOption option = {};
     if (prevDelta == 0 && currentDelta == 0) {
-        // case 1: fixed delta run
-        literals[numLiterals++] = val;
-
-        if (variableRunLength > 0) {
-            // if variable run is non-zero then we are seeing repeating
-            // values at the end of variable run in which case fixed Run
-            // length is 2
-            fixedRunLength = 2;
-        }
-        fixedRunLength++;
-
-        // if fixed run met the minimum condition and if variable
-        // run is non-zero then flush the variable run and shift the
-        // tail fixed runs to start of the buffer
-        if (fixedRunLength >= MIN_REPEAT && variableRunLength > 0) {
-            numLiterals -= MIN_REPEAT;
-            variableRunLength -= (MIN_REPEAT - 1);
-
-            determineEncoding(option);
-            writeValues(option);
-
-            // shift tail fixed runs to beginning of the buffer
-            for (size_t i = 0; i < MIN_REPEAT; ++i) {
-                literals[i] = val;
-            }
-            numLiterals = MIN_REPEAT;
-        }
+      // case 1: fixed delta run
+      literals[numLiterals++] = val;
+
+      if (variableRunLength > 0) {
+        // if variable run is non-zero then we are seeing repeating
+        // values at the end of variable run in which case fixed Run
+        // length is 2
+        fixedRunLength = 2;
+      }
+      fixedRunLength++;
+
+      // if fixed run met the minimum condition and if variable
+      // run is non-zero then flush the variable run and shift the
+      // tail fixed runs to start of the buffer
+      if (fixedRunLength >= MIN_REPEAT && variableRunLength > 0) {
+        numLiterals -= MIN_REPEAT;
+        variableRunLength -= (MIN_REPEAT - 1);
+
+        determineEncoding(option);
+        writeValues(option);
 
-        if (fixedRunLength == MAX_LITERAL_SIZE) {
-            option.encoding = DELTA;
-            option.isFixedDelta = true;
-            writeValues(option);
+        // shift tail fixed runs to beginning of the buffer
+        for (size_t i = 0; i < MIN_REPEAT; ++i) {
+          literals[i] = val;
         }
-        return;
+        numLiterals = MIN_REPEAT;
+      }
+
+      if (fixedRunLength == MAX_LITERAL_SIZE) {
+        option.encoding = DELTA;
+        option.isFixedDelta = true;
+        writeValues(option);
+      }
+      return;
     }
 
     // case 2: variable delta run
@@ -136,45 +138,45 @@ void RleEncoderV2::write(int64_t val) {
     // short repeat conditions then write the values as short repeats
     // else use delta encoding
     if (fixedRunLength >= MIN_REPEAT) {
-        if (fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
-            option.encoding = SHORT_REPEAT;
-        } else {
-            option.encoding = DELTA;
-            option.isFixedDelta = true;
-        }
-        writeValues(option);
+      if (fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
+        option.encoding = SHORT_REPEAT;
+      } else {
+        option.encoding = DELTA;
+        option.isFixedDelta = true;
+      }
+      writeValues(option);
     }
 
     // if fixed run length is <MIN_REPEAT and current value is
     // different from previous then treat it as variable run
     if (fixedRunLength > 0 && fixedRunLength < MIN_REPEAT && val != literals[numLiterals - 1]) {
-        variableRunLength = fixedRunLength;
-        fixedRunLength = 0;
+      variableRunLength = fixedRunLength;
+      fixedRunLength = 0;
     }
 
     // after writing values re-initialize the variables
     if (numLiterals == 0) {
-        initializeLiterals(val);
+      initializeLiterals(val);
     } else {
-        prevDelta = val - literals[numLiterals - 1];
-        literals[numLiterals++] = val;
-        variableRunLength++;
+      prevDelta = val - literals[numLiterals - 1];
+      literals[numLiterals++] = val;
+      variableRunLength++;
 
-        if (variableRunLength == MAX_LITERAL_SIZE) {
-            determineEncoding(option);
-            writeValues(option);
-        }
+      if (variableRunLength == MAX_LITERAL_SIZE) {
+        determineEncoding(option);
+        writeValues(option);
+      }
     }
-}
+  }
 
-void RleEncoderV2::computeZigZagLiterals(EncodingOption &option) {
-    assert (isSigned);
+  void RleEncoderV2::computeZigZagLiterals(EncodingOption& option) {
+    assert(isSigned);
     for (size_t i = 0; i < numLiterals; i++) {
-        zigzagLiterals[option.zigzagLiteralsCount++] = zigZag(literals[i]);
+      zigzagLiterals[option.zigzagLiteralsCount++] = zigZag(literals[i]);
     }
-}
+  }
 
-void RleEncoderV2::preparePatchedBlob(EncodingOption& option) {
+  void RleEncoderV2::preparePatchedBlob(EncodingOption& option) {
     // mask will be max value beyond which patch will be generated
     int64_t mask = static_cast<int64_t>(static_cast<uint64_t>(1) << option.brBits95p) - 1;
 
@@ -190,9 +192,9 @@ void RleEncoderV2::preparePatchedBlob(EncodingOption& option) {
     // gap and patch together in a long. To make sure gap and patch can be
     // packed together adjust the patch width
     if (option.patchWidth == 64) {
-        option.patchWidth = 56;
-        option.brBits95p = 8;
-        mask = static_cast<int64_t>(static_cast<uint64_t>(1) << option.brBits95p) - 1;
+      option.patchWidth = 56;
+      option.brBits95p = 8;
+      mask = static_cast<int64_t>(static_cast<uint64_t>(1) << option.brBits95p) - 1;
     }
 
     uint32_t gapIdx = 0;
@@ -203,27 +205,27 @@ void RleEncoderV2::preparePatchedBlob(EncodingOption& option) {
     std::vector<int64_t> gapList;
     std::vector<int64_t> patchList;
 
-    for(size_t i = 0; i < numLiterals; i++) {
-        // if value is above mask then create the patch and record the gap
-        if (baseRedLiterals[i] > mask) {
-            size_t gap = i - prev;
-            if (gap > maxGap) {
-                maxGap = gap;
-            }
-
-            // gaps are relative, so store the previous patched value index
-            prev = i;
-            gapList.push_back(static_cast<int64_t>(gap));
-            gapIdx++;
-
-            // extract the most significant bits that are over mask bits
-            int64_t patch = baseRedLiterals[i] >> option.brBits95p;
-            patchList.push_back(patch);
-            patchIdx++;
-
-            // strip off the MSB to enable safe bit packing
-            baseRedLiterals[i] &= mask;
+    for (size_t i = 0; i < numLiterals; i++) {
+      // if value is above mask then create the patch and record the gap
+      if (baseRedLiterals[i] > mask) {
+        size_t gap = i - prev;
+        if (gap > maxGap) {
+          maxGap = gap;
         }
+
+        // gaps are relative, so store the previous patched value index
+        prev = i;
+        gapList.push_back(static_cast<int64_t>(gap));
+        gapIdx++;
+
+        // extract the most significant bits that are over mask bits
+        int64_t patch = baseRedLiterals[i] >> option.brBits95p;
+        patchList.push_back(patch);
+        patchIdx++;
+
+        // strip off the MSB to enable safe bit packing
+        baseRedLiterals[i] &= mask;
+      }
     }
 
     // adjust the patch length to number of entries in gap list
@@ -232,9 +234,9 @@ void RleEncoderV2::preparePatchedBlob(EncodingOption& option) {
     // if the element to be patched is the first and only element then
     // max gap will be 0, but to store the gap as 0 we need atleast 1 bit
     if (maxGap == 0 && option.patchLength != 0) {
-        option.patchGapWidth = 1;
+      option.patchGapWidth = 1;
     } else {
-        option.patchGapWidth = findClosestNumBits(static_cast<int64_t>(maxGap));
+      option.patchGapWidth = findClosestNumBits(static_cast<int64_t>(maxGap));
     }
 
     // special case: if the patch gap width is greater than 256, then
@@ -250,58 +252,58 @@ void RleEncoderV2::preparePatchedBlob(EncodingOption& option) {
     // 255 gap width => 0 for patch value
     // 1 gap width => actual patch value
     if (option.patchGapWidth > 8) {
-        option.patchGapWidth = 8;
-        // for gap = 511, we need two additional entries in patch list
-        if (maxGap == 511) {
-            option.patchLength += 2;
-        } else {
-            option.patchLength += 1;
-        }
+      option.patchGapWidth = 8;
+      // for gap = 511, we need two additional entries in patch list
+      if (maxGap == 511) {
+        option.patchLength += 2;
+      } else {
+        option.patchLength += 1;
+      }
     }
 
     // create gap vs patch list
     gapIdx = 0;
     patchIdx = 0;
-    for(size_t i = 0; i < option.patchLength; i++) {
-        int64_t g = gapList[gapIdx++];
-        int64_t p = patchList[patchIdx++];
-        while (g > 255) {
-            gapVsPatchList[option.gapVsPatchListCount++] = (255L << option.patchWidth);
-            i++;
-            g -= 255;
-        }
+    for (size_t i = 0; i < option.patchLength; i++) {
+      int64_t g = gapList[gapIdx++];
+      int64_t p = patchList[patchIdx++];
+      while (g > 255) {
+        gapVsPatchList[option.gapVsPatchListCount++] = (255L << option.patchWidth);
+        i++;
+        g -= 255;
+      }
 
-        // store patch value in LSBs and gap in MSBs
-        gapVsPatchList[option.gapVsPatchListCount++] = ((g << option.patchWidth) | p);
+      // store patch value in LSBs and gap in MSBs
+      gapVsPatchList[option.gapVsPatchListCount++] = ((g << option.patchWidth) | p);
     }
-}
+  }
 
-/**
- * Prepare for Direct or PatchedBase encoding
- * compute zigZagLiterals and zzBits100p (Max number of encoding bits required)
- * @return zigzagLiterals
- */
-int64_t* RleEncoderV2::prepareForDirectOrPatchedBase(EncodingOption& option) {
+  /**
+   * Prepare for Direct or PatchedBase encoding
+   * compute zigZagLiterals and zzBits100p (Max number of encoding bits required)
+   * @return zigzagLiterals
+   */
+  int64_t* RleEncoderV2::prepareForDirectOrPatchedBase(EncodingOption& option) {
     if (isSigned) {
-        computeZigZagLiterals(option);
+      computeZigZagLiterals(option);
     }
     int64_t* currentZigzagLiterals = isSigned ? zigzagLiterals : literals;
     option.zzBits100p = percentileBits(currentZigzagLiterals, 0, numLiterals, 1.0);
     return currentZigzagLiterals;
-}
+  }
 
-void RleEncoderV2::determineEncoding(EncodingOption& option) {
+  void RleEncoderV2::determineEncoding(EncodingOption& option) {
     // We need to compute zigzag values for DIRECT and PATCHED_BASE encodings,
     // but not for SHORT_REPEAT or DELTA. So we only perform the zigzag
     // computation when it's determined to be necessary.
 
     // not a big win for shorter runs to determine encoding
     if (numLiterals <= MIN_REPEAT) {
-        // we need to compute zigzag values for DIRECT encoding if we decide to
-        // break early for delta overflows or for shorter runs
-        prepareForDirectOrPatchedBase(option);
-        option.encoding = DIRECT;
-        return;
+      // we need to compute zigzag values for DIRECT encoding if we decide to
+      // break early for delta overflows or for shorter runs
+      prepareForDirectOrPatchedBase(option);
+      option.encoding = DIRECT;
+      return;
     }
 
     // DELTA encoding check
@@ -319,29 +321,29 @@ void RleEncoderV2::determineEncoding(EncodingOption& option) {
     adjDeltas[option.adjDeltasCount++] = initialDelta;
 
     for (size_t i = 1; i < numLiterals; i++) {
-        const int64_t l1 = literals[i];
-        const int64_t l0 = literals[i - 1];
-        currDelta = l1 - l0;
-        option.min = std::min(option.min, l1);
-        max = std::max(max, l1);
-
-        isIncreasing &= (l0 <= l1);
-        isDecreasing &= (l0 >= l1);
-
-        option.isFixedDelta &= (currDelta == initialDelta);
-        if (i > 1) {
-            adjDeltas[option.adjDeltasCount++] = std::abs(currDelta);
-            deltaMax = std::max(deltaMax, adjDeltas[i - 1]);
-        }
+      const int64_t l1 = literals[i];
+      const int64_t l0 = literals[i - 1];
+      currDelta = l1 - l0;
+      option.min = std::min(option.min, l1);
+      max = std::max(max, l1);
+
+      isIncreasing &= (l0 <= l1);
+      isDecreasing &= (l0 >= l1);
+
+      option.isFixedDelta &= (currDelta == initialDelta);
+      if (i > 1) {
+        adjDeltas[option.adjDeltasCount++] = std::abs(currDelta);
+        deltaMax = std::max(deltaMax, adjDeltas[i - 1]);
+      }
     }
 
     // it's faster to exit under delta overflow condition without checking for
     // PATCHED_BASE condition as encoding using DIRECT is faster and has less
     // overhead than PATCHED_BASE
     if (!isSafeSubtract(max, option.min)) {
-        prepareForDirectOrPatchedBase(option);
-        option.encoding = DIRECT;
-        return;
+      prepareForDirectOrPatchedBase(option);
+      option.encoding = DIRECT;
+      return;
     }
 
     // invariant - subtracting any number from any other in the literals after
@@ -350,42 +352,42 @@ void RleEncoderV2::determineEncoding(EncodingOption& option) {
     // if min is equal to max then the delta is 0, option condition happens for
     // fixed values run >10 which cannot be encoded with SHORT_REPEAT
     if (option.min == max) {
-        if (!option.isFixedDelta) {
-            throw InvalidArgument(to_string(option.min) + "==" +
-              to_string(max) + ", isFixedDelta cannot be false");
-        }
+      if (!option.isFixedDelta) {
+        throw InvalidArgument(to_string(option.min) + "==" + to_string(max) +
+                              ", isFixedDelta cannot be false");
+      }
 
-        if(currDelta != 0) {
-            throw InvalidArgument(to_string(option.min) + "==" +
-            to_string(max) + ", currDelta should be zero");
-        }
-        option.fixedDelta = 0;
-        option.encoding = DELTA;
-        return;
+      if (currDelta != 0) {
+        throw InvalidArgument(to_string(option.min) + "==" + to_string(max) +
+                              ", currDelta should be zero");
+      }
+      option.fixedDelta = 0;
+      option.encoding = DELTA;
+      return;
     }
 
     if (option.isFixedDelta) {
-        if (currDelta != initialDelta) {
-            throw InvalidArgument("currDelta should be equal to initialDelta for fixed delta encoding");
-        }
+      if (currDelta != initialDelta) {
+        throw InvalidArgument("currDelta should be equal to initialDelta for fixed delta encoding");
+      }
 
-        option.encoding = DELTA;
-        option.fixedDelta = currDelta;
-        return;
+      option.encoding = DELTA;
+      option.fixedDelta = currDelta;
+      return;
     }
 
     // if initialDelta is 0 then we cannot delta encode as we cannot identify
     // the sign of deltas (increasing or decreasing)
     if (initialDelta != 0) {
-        // stores the number of bits required for packing delta blob in
-        // delta encoding
-        option.bitsDeltaMax = findClosestNumBits(deltaMax);
-
-        // monotonic condition
-        if (isIncreasing || isDecreasing) {
-            option.encoding = DELTA;
-            return;
-        }
+      // stores the number of bits required for packing delta blob in
+      // delta encoding
+      option.bitsDeltaMax = findClosestNumBits(deltaMax);
+
+      // monotonic condition
+      if (isIncreasing || isDecreasing) {
+        option.encoding = DELTA;
+        return;
+      }
     }
 
     // PATCHED_BASE encoding check
@@ -402,106 +404,105 @@ void RleEncoderV2::determineEncoding(EncodingOption& option) {
     // if the difference between 90th percentile and 100th percentile fixed
     // bits is > 1 then we need patch the values
     if (diffBitsLH > 1) {
+      // patching is done only on base reduced values.
+      // remove base from literals
+      for (size_t i = 0; i < numLiterals; i++) {
+        baseRedLiterals[option.baseRedLiteralsCount++] = (literals[i] - option.min);
+      }
 
-        // patching is done only on base reduced values.
-        // remove base from literals
-        for (size_t i = 0; i < numLiterals; i++) {
-            baseRedLiterals[option.baseRedLiteralsCount++] = (literals[i] - option.min);
-        }
-
-        // 95th percentile width is used to determine max allowed value
-        // after which patching will be done
-        option.brBits95p = percentileBits(baseRedLiterals, 0, numLiterals, 0.95);
-
-        // 100th percentile is used to compute the max patch width
-        option.brBits100p = percentileBits(baseRedLiterals, 0, numLiterals, 1.0, true);
-
-        // after base reducing the values, if the difference in bits between
-        // 95th percentile and 100th percentile value is zero then there
-        // is no point in patching the values, in which case we will
-        // fallback to DIRECT encoding.
-        // The decision to use patched base was based on zigzag values, but the
-        // actual patching is done on base reduced literals.
-        if ((option.brBits100p - option.brBits95p) != 0) {
-            option.encoding = PATCHED_BASE;
-            preparePatchedBlob(option);
-            return;
-        } else {
-            option.encoding = DIRECT;
-            return;
-        }
-    } else {
-        // if difference in bits between 95th percentile and 100th percentile is
-        // 0, then patch length will become 0. Hence we will fallback to direct
+      // 95th percentile width is used to determine max allowed value
+      // after which patching will be done
+      option.brBits95p = percentileBits(baseRedLiterals, 0, numLiterals, 0.95);
+
+      // 100th percentile is used to compute the max patch width
+      option.brBits100p = percentileBits(baseRedLiterals, 0, numLiterals, 1.0, true);
+
+      // after base reducing the values, if the difference in bits between
+      // 95th percentile and 100th percentile value is zero then there
+      // is no point in patching the values, in which case we will
+      // fallback to DIRECT encoding.
+      // The decision to use patched base was based on zigzag values, but the
+      // actual patching is done on base reduced literals.
+      if ((option.brBits100p - option.brBits95p) != 0) {
+        option.encoding = PATCHED_BASE;
+        preparePatchedBlob(option);
+        return;
+      } else {
         option.encoding = DIRECT;
         return;
+      }
+    } else {
+      // if difference in bits between 95th percentile and 100th percentile is
+      // 0, then patch length will become 0. Hence we will fallback to direct
+      option.encoding = DIRECT;
+      return;
     }
-}
+  }
 
-uint64_t RleEncoderV2::flush() {
+  uint64_t RleEncoderV2::flush() {
     if (numLiterals != 0) {
-        EncodingOption option = {};
-        if (variableRunLength != 0) {
-            determineEncoding(option);
-            writeValues(option);
-        } else if (fixedRunLength != 0) {
-            if (fixedRunLength < MIN_REPEAT) {
-                variableRunLength = fixedRunLength;
-                fixedRunLength = 0;
-                determineEncoding(option);
-                writeValues(option);
-            } else if (fixedRunLength >= MIN_REPEAT
-                       && fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
-                option.encoding = SHORT_REPEAT;
-                writeValues(option);
-            } else {
-                option.encoding = DELTA;
-                option.isFixedDelta = true;
-                writeValues(option);
-            }
+      EncodingOption option = {};
+      if (variableRunLength != 0) {
+        determineEncoding(option);
+        writeValues(option);
+      } else if (fixedRunLength != 0) {
+        if (fixedRunLength < MIN_REPEAT) {
+          variableRunLength = fixedRunLength;
+          fixedRunLength = 0;
+          determineEncoding(option);
+          writeValues(option);
+        } else if (fixedRunLength >= MIN_REPEAT && fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
+          option.encoding = SHORT_REPEAT;
+          writeValues(option);
+        } else {
+          option.encoding = DELTA;
+          option.isFixedDelta = true;
+          writeValues(option);
         }
+      }
     }
 
     outputStream->BackUp(static_cast<int>(bufferLength - bufferPosition));
     uint64_t dataSize = outputStream->flush();
     bufferLength = bufferPosition = 0;
     return dataSize;
-}
+  }
 
-void RleEncoderV2::writeValues(EncodingOption& option) {
+  void RleEncoderV2::writeValues(EncodingOption& option) {
     if (numLiterals != 0) {
-        switch (option.encoding) {
-            case SHORT_REPEAT:
-                writeShortRepeatValues(option);
-                break;
-            case DIRECT:
-                writeDirectValues(option);
-                break;
-            case PATCHED_BASE:
-                writePatchedBasedValues(option);
-                break;
-            case DELTA:
-                writeDeltaValues(option);
-                break;
-            default:
-                throw NotImplementedYet("Not implemented yet");
-        }
+      switch (option.encoding) {
+        case SHORT_REPEAT:
+          writeShortRepeatValues(option);
+          break;
+        case DIRECT:
+          writeDirectValues(option);
+          break;
+        case PATCHED_BASE:
+          writePatchedBasedValues(option);
+          break;
+        case DELTA:
+          writeDeltaValues(option);
+          break;
+        default:
+          throw NotImplementedYet("Not implemented yet");
+      }
 
-        numLiterals = 0;
-        prevDelta = 0;
+      numLiterals = 0;
+      prevDelta = 0;
     }
-}
+  }
 
-void RleEncoderV2::writeShortRepeatValues(EncodingOption&) {
+  void RleEncoderV2::writeShortRepeatValues(EncodingOption&) {
     int64_t repeatVal;
     if (isSigned) {
-        repeatVal = zigZag(literals[0]);
+      repeatVal = zigZag(literals[0]);
     } else {
-        repeatVal = literals[0];
+      repeatVal = literals[0];
     }
 
     const uint32_t numBitsRepeatVal = findClosestNumBits(repeatVal);
-    const uint32_t numBytesRepeatVal = numBitsRepeatVal % 8 == 0 ? (numBitsRepeatVal >> 3) : ((numBitsRepeatVal >> 3) + 1);
+    const uint32_t numBytesRepeatVal =
+        numBitsRepeatVal % 8 == 0 ? (numBitsRepeatVal >> 3) : ((numBitsRepeatVal >> 3) + 1);
 
     uint32_t header = getOpCode(SHORT_REPEAT);
 
@@ -511,19 +512,19 @@ void RleEncoderV2::writeShortRepeatValues(EncodingOption&) {
 
     writeByte(static_cast<char>(header));
 
-    for(int32_t i = static_cast<int32_t>(numBytesRepeatVal - 1); i >= 0; i--) {
-        int64_t b = ((repeatVal >> (i * 8)) & 0xff);
-        writeByte(static_cast<char>(b));
+    for (int32_t i = static_cast<int32_t>(numBytesRepeatVal - 1); i >= 0; i--) {
+      int64_t b = ((repeatVal >> (i * 8)) & 0xff);
+      writeByte(static_cast<char>(b));
     }
 
     fixedRunLength = 0;
-}
+  }
 
-void RleEncoderV2::writeDirectValues(EncodingOption& option) {
+  void RleEncoderV2::writeDirectValues(EncodingOption& option) {
     // write the number of fixed bits required in next 5 bits
     uint32_t fb = option.zzBits100p;
     if (alignedBitPacking) {
-        fb = getClosestAlignedFixedBits(fb);
+      fb = getClosestAlignedFixedBits(fb);
     }
 
     const uint32_t efb = encodeBitWidth(fb) << 1;
@@ -550,9 +551,9 @@ void RleEncoderV2::writeDirectValues(EncodingOption& option) {
 
     // reset run length
     variableRunLength = 0;
-}
+  }
 
-void RleEncoderV2::writePatchedBasedValues(EncodingOption& option) {
+  void RleEncoderV2::writePatchedBasedValues(EncodingOption& option) {
     // NOTE: Aligned bit packing cannot be applied for PATCHED_BASE encoding
     // because patch is applied to MSB bits. For example: If fixed bit width of
     // base value is 7 bits and if patch is 3 bits, the actual value is
@@ -578,7 +579,7 @@ void RleEncoderV2::writePatchedBasedValues(EncodingOption& option) {
     // if the min value is negative toggle the sign
     const bool isNegative = (option.min < 0);
     if (isNegative) {
-        option.min = -option.min;
+      option.min = -option.min;
     }
 
     // find the number of bytes required for base and shift it by 5 bits
@@ -590,7 +591,7 @@ void RleEncoderV2::writePatchedBasedValues(EncodingOption& option) {
 
     // if the base value is negative then set MSB to 1
     if (isNegative) {
-        option.min |= (1LL << ((baseBytes * 8) - 1));
+      option.min |= (1LL << ((baseBytes * 8) - 1));
     }
 
     // third byte contains 3 bits for number of bytes occupied by base
@@ -599,7 +600,8 @@ void RleEncoderV2::writePatchedBasedValues(EncodingOption& option) {
 
     // fourth byte contains 3 bits for page gap width and 5 bits for
     // patch length
-    const char headerFourthByte = static_cast<char>((option.patchGapWidth - 1) << 5 | option.patchLength);
+    const char headerFourthByte =
+        static_cast<char>((option.patchGapWidth - 1) << 5 | option.patchLength);
 
     // write header
     writeByte(headerFirstByte);
@@ -608,9 +610,9 @@ void RleEncoderV2::writePatchedBasedValues(EncodingOption& option) {
     writeByte(headerFourthByte);
 
     // write the base value using fixed bytes in big endian order
-    for(int32_t i = static_cast<int32_t>(baseBytes - 1); i >= 0; i--) {
-        char b = static_cast<char>(((option.min >> (i * 8)) & 0xff));
-        writeByte(b);
+    for (int32_t i = static_cast<int32_t>(baseBytes - 1); i >= 0; i--) {
+      char b = static_cast<char>(((option.min >> (i * 8)) & 0xff));
+      writeByte(b);
     }
 
     // base reduced literals are bit packed
@@ -625,39 +627,39 @@ void RleEncoderV2::writePatchedBasedValues(EncodingOption& option) {
 
     // reset run length
     variableRunLength = 0;
-}
+  }
 
-void RleEncoderV2::writeDeltaValues(EncodingOption& option) {
+  void RleEncoderV2::writeDeltaValues(EncodingOption& option) {
     uint32_t len = 0;
     uint32_t fb = option.bitsDeltaMax;
     uint32_t efb = 0;
 
     if (alignedBitPacking) {
-        fb = getClosestAlignedFixedBits(fb);
+      fb = getClosestAlignedFixedBits(fb);
     }
 
     if (option.isFixedDelta) {
-        // if fixed run length is greater than threshold then it will be fixed
-        // delta sequence with delta value 0 else fixed delta sequence with
-        // non-zero delta value
-        if (fixedRunLength > MIN_REPEAT) {
-            // ex. sequence: 2 2 2 2 2 2 2 2
-            len = fixedRunLength - 1;
-            fixedRunLength = 0;
-        } else {
-            // ex. sequence: 4 6 8 10 12 14 16
-            len = variableRunLength - 1;
-            variableRunLength = 0;
-        }
-    } else {
-        // fixed width 0 is used for long repeating values.
-        // sequences that require only 1 bit to encode will have an additional bit
-        if (fb == 1) {
-            fb = 2;
-        }
-        efb = encodeBitWidth(fb) << 1;
+      // if fixed run length is greater than threshold then it will be fixed
+      // delta sequence with delta value 0 else fixed delta sequence with
+      // non-zero delta value
+      if (fixedRunLength > MIN_REPEAT) {
+        // ex. sequence: 2 2 2 2 2 2 2 2
+        len = fixedRunLength - 1;
+        fixedRunLength = 0;
+      } else {
+        // ex. sequence: 4 6 8 10 12 14 16
         len = variableRunLength - 1;
         variableRunLength = 0;
+      }
+    } else {
+      // fixed width 0 is used for long repeating values.
+      // sequences that require only 1 bit to encode will have an additional bit
+      if (fb == 1) {
+        fb = 2;
+      }
+      efb = encodeBitWidth(fb) << 1;
+      len = variableRunLength - 1;
+      variableRunLength = 0;
     }
 
     // extract the 9th bit of run length
@@ -675,106 +677,106 @@ void RleEncoderV2::writeDeltaValues(EncodingOption& option) {
 
     // store the first value from zigzag literal array
     if (isSigned) {
-        writeVslong(literals[0]);
+      writeVslong(literals[0]);
     } else {
-        writeVulong(literals[0]);
+      writeVulong(literals[0]);
     }
 
     if (option.isFixedDelta) {
-        // if delta is fixed then we don't need to store delta blob
-        writeVslong(option.fixedDelta);
+      // if delta is fixed then we don't need to store delta blob
+      writeVslong(option.fixedDelta);
     } else {
-        // store the first value as delta value using zigzag encoding
-        writeVslong(adjDeltas[0]);
+      // store the first value as delta value using zigzag encoding
+      writeVslong(adjDeltas[0]);
 
-        // adjacent delta values are bit packed. The length of adjDeltas array is
-        // always one less than the number of literals (delta difference for n
-        // elements is n-1). We have already written one element, write the
-        // remaining numLiterals - 2 elements here
-        writeInts(adjDeltas, 1, numLiterals - 2, fb);
+      // adjacent delta values are bit packed. The length of adjDeltas array is
+      // always one less than the number of literals (delta difference for n
+      // elements is n-1). We have already written one element, write the
+      // remaining numLiterals - 2 elements here
+      writeInts(adjDeltas, 1, numLiterals - 2, fb);
     }
-}
+  }
 
-void RleEncoderV2::writeInts(int64_t* input, uint32_t offset, size_t len, uint32_t bitSize) {
-  if(input == nullptr || len < 1 || bitSize < 1) {
+  void RleEncoderV2::writeInts(int64_t* input, uint32_t offset, size_t len, uint32_t bitSize) {
+    if (input == nullptr || len < 1 || bitSize < 1) {
       return;
-  }
+    }
 
-  if (getClosestAlignedFixedBits(bitSize) == bitSize) {
-    uint32_t numBytes;
-    uint32_t endOffSet = static_cast<uint32_t>(offset + len);
-    if (bitSize < 8 ) {
-      char bitMask = static_cast<char>((1 << bitSize) - 1);
-      uint32_t numHops = 8 / bitSize;
-      uint32_t remainder = static_cast<uint32_t>(len % numHops);
-      uint32_t endUnroll = endOffSet - remainder;
-      for (uint32_t i = offset; i < endUnroll; i+=numHops) {
-        char toWrite = 0;
-        for (uint32_t j = 0; j < numHops; ++j) {
-          toWrite |= static_cast<char>((input[i+j] & bitMask) << (8 - (j + 1) * bitSize));
+    if (getClosestAlignedFixedBits(bitSize) == bitSize) {
+      uint32_t numBytes;
+      uint32_t endOffSet = static_cast<uint32_t>(offset + len);
+      if (bitSize < 8) {
+        char bitMask = static_cast<char>((1 << bitSize) - 1);
+        uint32_t numHops = 8 / bitSize;
+        uint32_t remainder = static_cast<uint32_t>(len % numHops);
+        uint32_t endUnroll = endOffSet - remainder;
+        for (uint32_t i = offset; i < endUnroll; i += numHops) {
+          char toWrite = 0;
+          for (uint32_t j = 0; j < numHops; ++j) {
+            toWrite |= static_cast<char>((input[i + j] & bitMask) << (8 - (j + 1) * bitSize));
+          }
+          writeByte(toWrite);
         }
-        writeByte(toWrite);
-      }
 
-      if (remainder > 0) {
-        uint32_t startShift = 8 - bitSize;
-        char toWrite = 0;
-        for (uint32_t i = endUnroll; i < endOffSet; ++i) {
-          toWrite |= static_cast<char>((input[i] & bitMask) << startShift);
-          startShift -= bitSize;
+        if (remainder > 0) {
+          uint32_t startShift = 8 - bitSize;
+          char toWrite = 0;
+          for (uint32_t i = endUnroll; i < endOffSet; ++i) {
+            toWrite |= static_cast<char>((input[i] & bitMask) << startShift);
+            startShift -= bitSize;
+          }
+          writeByte(toWrite);
         }
-        writeByte(toWrite);
-      }
 
-    } else {
-      numBytes = bitSize / 8;
+      } else {
+        numBytes = bitSize / 8;
 
-      for (uint32_t i = offset; i < endOffSet; ++i) {
-        for (uint32_t j = 0; j < numBytes; ++j) {
-          char toWrite = static_cast<char>((input[i] >> (8 * (numBytes - j - 1))) & 255);
-          writeByte(toWrite);
+        for (uint32_t i = offset; i < endOffSet; ++i) {
+          for (uint32_t j = 0; j < numBytes; ++j) {
+            char toWrite = static_cast<char>((input[i] >> (8 * (numBytes - j - 1))) & 255);
+            writeByte(toWrite);
+          }
         }
       }
-    }
 
-    return;
-  }
+      return;
+    }
 
-  // write for unaligned bit size
-  uint32_t bitsLeft = 8;
-  char current = 0;
-  for(uint32_t i = offset; i < (offset + len); i++) {
-    int64_t value = input[i];
-    uint32_t bitsToWrite = bitSize;
-    while (bitsToWrite > bitsLeft) {
-      // add the bits to the bottom of the current word
-      current |= static_cast<char>(value >> (bitsToWrite - bitsLeft));
-      // subtract out the bits we just added
-      bitsToWrite -= bitsLeft;
-      // zero out the bits above bitsToWrite
-      value &= (static_cast<uint64_t>(1) << bitsToWrite) - 1;
-      writeByte(current);
-      current = 0;
-      bitsLeft = 8;
+    // write for unaligned bit size
+    uint32_t bitsLeft = 8;
+    char current = 0;
+    for (uint32_t i = offset; i < (offset + len); i++) {
+      int64_t value = input[i];
+      uint32_t bitsToWrite = bitSize;
+      while (bitsToWrite > bitsLeft) {
+        // add the bits to the bottom of the current word
+        current |= static_cast<char>(value >> (bitsToWrite - bitsLeft));
+        // subtract out the bits we just added
+        bitsToWrite -= bitsLeft;
+        // zero out the bits above bitsToWrite
+        value &= (static_cast<uint64_t>(1) << bitsToWrite) - 1;
+        writeByte(current);
+        current = 0;
+        bitsLeft = 8;
+      }
+      bitsLeft -= bitsToWrite;
+      current |= static_cast<char>(value << bitsLeft);
+      if (bitsLeft == 0) {
+        writeByte(current);
+        current = 0;
+        bitsLeft = 8;
+      }
     }
-    bitsLeft -= bitsToWrite;
-    current |= static_cast<char>(value << bitsLeft);
-    if (bitsLeft == 0) {
+
+    // flush
+    if (bitsLeft != 8) {
       writeByte(current);
-      current = 0;
-      bitsLeft = 8;
     }
   }
 
-  // flush
-  if (bitsLeft != 8) {
-    writeByte(current);
-  }
-}
-
-void RleEncoderV2::initializeLiterals(int64_t val) {
+  void RleEncoderV2::initializeLiterals(int64_t val) {
     literals[numLiterals++] = val;
     fixedRunLength = 1;
     variableRunLength = 1;
-}
-}
+  }
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/SchemaEvolution.cc b/contrib/libs/apache/orc/c++/src/SchemaEvolution.cc
new file mode 100644
index 0000000000..b8c4fd4048
--- /dev/null
+++ b/contrib/libs/apache/orc/c++/src/SchemaEvolution.cc
@@ -0,0 +1,255 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SchemaEvolution.hh"
+#include "orc/Exceptions.hh"
+
+namespace orc {
+
+  SchemaEvolution::SchemaEvolution(const std::shared_ptr<Type>& _readType, const Type* fileType)
+      : readType(_readType) {
+    if (readType) {
+      buildConversion(readType.get(), fileType);
+    } else {
+      for (uint64_t i = 0; i <= fileType->getMaximumColumnId(); ++i) {
+        safePPDConversionMap.insert(i);
+      }
+    }
+  }
+
+  const Type* SchemaEvolution::getReadType(const Type& fileType) const {
+    auto ret = readTypeMap.find(fileType.getColumnId());
+    return ret == readTypeMap.cend() ? &fileType : ret->second;
+  }
+
+  inline void invalidConversion(const Type* readType, const Type* fileType) {
+    throw SchemaEvolutionError("Cannot convert from " + fileType->toString() + " to " +
+                               readType->toString());
+  }
+
+  struct EnumClassHash {
+    template <typename T>
+    std::size_t operator()(T t) const {
+      return static_cast<std::size_t>(t);
+    }
+  };
+
+  bool isNumeric(const Type& type) {
+    auto kind = type.getKind();
+    return kind == BOOLEAN || kind == BYTE || kind == SHORT || kind == INT || kind == LONG ||
+           kind == FLOAT || kind == DOUBLE;
+  }
+
+  bool isStringVariant(const Type& type) {
+    auto kind = type.getKind();
+    return kind == STRING || kind == CHAR || kind == VARCHAR;
+  }
+
+  bool isDecimal(const Type& type) {
+    auto kind = type.getKind();
+    return kind == DECIMAL;
+  }
+
+  bool isTimestamp(const Type& type) {
+    auto kind = type.getKind();
+    return kind == TIMESTAMP || kind == TIMESTAMP_INSTANT;
+  }
+
+  struct ConversionCheckResult {
+    bool isValid;
+    bool needConvert;
+  };
+
+  ConversionCheckResult checkConversion(const Type& readType, const Type& fileType) {
+    ConversionCheckResult ret = {false, false};
+    if (readType.getKind() == fileType.getKind()) {
+      ret.isValid = true;
+      if (fileType.getKind() == CHAR || fileType.getKind() == VARCHAR) {
+        ret.isValid = readType.getMaximumLength() == fileType.getMaximumLength();
+      } else if (fileType.getKind() == DECIMAL) {
+        ret.needConvert = readType.getPrecision() != fileType.getPrecision() ||
+                          readType.getScale() != fileType.getScale();
+      }
+    } else {
+      switch (fileType.getKind()) {
+        case BOOLEAN:
+        case BYTE:
+        case SHORT:
+        case INT:
+        case LONG:
+        case FLOAT:
+        case DOUBLE: {
+          ret.isValid = ret.needConvert = isNumeric(readType) || isStringVariant(readType) ||
+                                          isDecimal(readType) || isTimestamp(readType);
+          break;
+        }
+        case DECIMAL: {
+          ret.isValid = ret.needConvert = isNumeric(readType);
+          break;
+        }
+        case STRING:
+        case CHAR:
+        case VARCHAR:
+        case TIMESTAMP:
+        case TIMESTAMP_INSTANT:
+        case DATE:
+        case BINARY: {
+          // Not support
+          break;
+        }
+        case STRUCT:
+        case LIST:
+        case MAP:
+        case UNION: {
+          ret.isValid = ret.needConvert = false;
+          break;
+        }
+        default:
+          break;
+      }
+    }
+    return ret;
+  }
+
+  void SchemaEvolution::buildConversion(const Type* _readType, const Type* fileType) {
+    if (fileType == nullptr) {
+      throw SchemaEvolutionError("File does not have " + _readType->toString());
+    }
+
+    auto [valid, convert] = checkConversion(*_readType, *fileType);
+    if (!valid) {
+      invalidConversion(_readType, fileType);
+    }
+    readTypeMap.emplace(_readType->getColumnId(), convert ? _readType : fileType);
+
+    // check whether PPD conversion is safe
+    buildSafePPDConversionMap(_readType, fileType);
+
+    for (uint64_t i = 0; i < _readType->getSubtypeCount(); ++i) {
+      auto subType = _readType->getSubtype(i);
+      if (subType) {
+        // null subType means that this is a sub column of map/list type
+        // and it does not exist in the file. simply skip it.
+        buildConversion(subType, fileType->getTypeByColumnId(subType->getColumnId()));
+      }
+    }
+  }
+
+  bool SchemaEvolution::needConvert(const Type& fileType) const {
+    auto _readType = getReadType(fileType);
+    if (_readType == &fileType) {
+      return false;
+    }
+    // it does not check valid here as verified by buildConversion()
+    return checkConversion(*_readType, fileType).needConvert;
+  }
+
+  inline bool isPrimitive(const Type* type) {
+    auto kind = type->getKind();
+    return kind != STRUCT && kind != MAP && kind != LIST && kind != UNION;
+  }
+
+  void SchemaEvolution::buildSafePPDConversionMap(const Type* _readType, const Type* fileType) {
+    if (_readType == nullptr || !isPrimitive(_readType) || fileType == nullptr ||
+        !isPrimitive(fileType)) {
+      return;
+    }
+
+    bool isSafe = false;
+    if (_readType == fileType) {
+      // short cut for same type
+      isSafe = true;
+    } else if (_readType->getKind() == DECIMAL && fileType->getKind() == DECIMAL) {
+      // for decimals alone do equality check to not mess up with precision change
+      if (fileType->getPrecision() == readType->getPrecision() &&
+          fileType->getScale() == readType->getScale()) {
+        isSafe = true;
+      }
+    } else {
+      // only integer and string evolutions are safe
+      // byte -> short -> int -> long
+      // string <-> char <-> varchar
+      // NOTE: Float to double evolution is not safe as floats are stored as
+      // doubles in ORC's internal index, but when doing predicate evaluation
+      // for queries like "select * from orc_float where f = 74.72" the constant
+      // on the filter is converted from string -> double so the precisions will
+      // be different and the comparison will fail.
+      // Soon, we should convert all sargs that compare equality between floats
+      // or doubles to range predicates.
+      // Similarly string -> char and varchar -> char and vice versa is impossible
+      // as ORC stores char with padded spaces in its internal index.
+      switch (fileType->getKind()) {
+        case BYTE: {
+          if (readType->getKind() == SHORT || readType->getKind() == INT ||
+              readType->getKind() == LONG) {
+            isSafe = true;
+          }
+          break;
+        }
+        case SHORT: {
+          if (readType->getKind() == INT || readType->getKind() == LONG) {
+            isSafe = true;
+          }
+          break;
+        }
+        case INT: {
+          if (readType->getKind() == LONG) {
+            isSafe = true;
+          }
+          break;
+        }
+        case STRING: {
+          if (readType->getKind() == VARCHAR) {
+            isSafe = true;
+          }
+          break;
+        }
+        case VARCHAR: {
+          if (readType->getKind() == STRING) {
+            isSafe = true;
+          }
+          break;
+        }
+        case BOOLEAN:
+        case LONG:
+        case FLOAT:
+        case DOUBLE:
+        case BINARY:
+        case TIMESTAMP:
+        case LIST:
+        case MAP:
+        case STRUCT:
+        case UNION:
+        case DECIMAL:
+        case DATE:
+        case CHAR:
+        case TIMESTAMP_INSTANT:
+          break;
+      }
+    }
+
+    if (isSafe) {
+      safePPDConversionMap.insert(fileType->getColumnId());
+    }
+  }
+
+  bool SchemaEvolution::isSafePPDConversion(uint64_t columnId) const {
+    return safePPDConversionMap.find(columnId) != safePPDConversionMap.cend();
+  }
+
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/SchemaEvolution.hh b/contrib/libs/apache/orc/c++/src/SchemaEvolution.hh
new file mode 100644
index 0000000000..ef9020eba4
--- /dev/null
+++ b/contrib/libs/apache/orc/c++/src/SchemaEvolution.hh
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_SCHEMA_EVOLUTION_HH
+#define ORC_SCHEMA_EVOLUTION_HH
+
+#include "orc/Type.hh"
+
+#include <unordered_map>
+#include <unordered_set>
+
+namespace orc {
+
+  /**
+   * Utility class to compare read type and file type to match their columns
+   * and check type conversion.
+   */
+  class SchemaEvolution {
+   public:
+    SchemaEvolution(const std::shared_ptr<Type>& readType, const Type* fileType);
+
+    // get read type by column id from file type. or return the file type if
+    // read type is not provided (i.e. no schema evolution requested).
+    const Type* getReadType(const Type& fileType) const;
+
+    // check if we need to convert file type to read type for primitive type.
+    bool needConvert(const Type& fileType) const;
+
+    // check if the PPD conversion is safe
+    bool isSafePPDConversion(uint64_t columnId) const;
+
+    // return selected read type
+    const Type* getReadType() const {
+      return readType.get();
+    }
+
+   private:
+    void buildConversion(const Type* readType, const Type* fileType);
+    void buildSafePPDConversionMap(const Type* readType, const Type* fileType);
+
+   private:
+    const std::shared_ptr<Type> readType;
+    std::unordered_map<uint64_t, const Type*> readTypeMap;
+    std::unordered_set<uint64_t> safePPDConversionMap;
+  };
+
+}  // namespace orc
+
+#endif  // ORC_SCHEMA_EVOLUTION_HH
diff --git a/contrib/libs/apache/orc/c++/src/Statistics.cc b/contrib/libs/apache/orc/c++/src/Statistics.cc
index ccc54c291c..8ed29d0e7c 100644
--- a/contrib/libs/apache/orc/c++/src/Statistics.cc
+++ b/contrib/libs/apache/orc/c++/src/Statistics.cc
@@ -1,4 +1,4 @@
- /**
+/**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -16,9 +16,9 @@
  * limitations under the License.
  */
 
-#include "orc/Exceptions.hh"
-#include "RLE.hh"
 #include "Statistics.hh"
+#include "RLE.hh"
+#include "orc/Exceptions.hh"
 
 #include "wrap/coded-stream-wrapper.h"
 
@@ -26,23 +26,23 @@ namespace orc {
 
   ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s,
                                             const StatContext& statContext) {
-    if (s.has_intstatistics()) {
+    if (s.has_int_statistics()) {
       return new IntegerColumnStatisticsImpl(s);
-    } else if (s.has_doublestatistics()) {
+    } else if (s.has_double_statistics()) {
       return new DoubleColumnStatisticsImpl(s);
-    } else if (s.has_collectionstatistics()) {
+    } else if (s.has_collection_statistics()) {
       return new CollectionColumnStatisticsImpl(s);
-    } else if (s.has_stringstatistics()) {
+    } else if (s.has_string_statistics()) {
       return new StringColumnStatisticsImpl(s, statContext);
-    } else if (s.has_bucketstatistics()) {
+    } else if (s.has_bucket_statistics()) {
       return new BooleanColumnStatisticsImpl(s, statContext);
-    } else if (s.has_decimalstatistics()) {
+    } else if (s.has_decimal_statistics()) {
       return new DecimalColumnStatisticsImpl(s, statContext);
-    } else if (s.has_timestampstatistics()) {
+    } else if (s.has_timestamp_statistics()) {
       return new TimestampColumnStatisticsImpl(s, statContext);
-    } else if (s.has_datestatistics()) {
+    } else if (s.has_date_statistics()) {
       return new DateColumnStatisticsImpl(s, statContext);
-    } else if (s.has_binarystatistics()) {
+    } else if (s.has_binary_statistics()) {
       return new BinaryColumnStatisticsImpl(s, statContext);
     } else {
       return new ColumnStatisticsImpl(s);
@@ -51,24 +51,20 @@ namespace orc {
 
   StatisticsImpl::StatisticsImpl(const proto::StripeStatistics& stripeStats,
                                  const StatContext& statContext) {
-    for(int i = 0; i < stripeStats.colstats_size(); i++) {
-      colStats.push_back(
-                convertColumnStatistics(stripeStats.colstats(i), statContext));
+    for (int i = 0; i < stripeStats.col_stats_size(); i++) {
+      colStats.push_back(convertColumnStatistics(stripeStats.col_stats(i), statContext));
     }
   }
 
-  StatisticsImpl::StatisticsImpl(const proto::Footer& footer,
-                                 const StatContext& statContext) {
-    for(int i = 0; i < footer.statistics_size(); i++) {
-      colStats.push_back(
-                convertColumnStatistics(footer.statistics(i), statContext));
+  StatisticsImpl::StatisticsImpl(const proto::Footer& footer, const StatContext& statContext) {
+    for (int i = 0; i < footer.statistics_size(); i++) {
+      colStats.push_back(convertColumnStatistics(footer.statistics(i), statContext));
     }
   }
 
   StatisticsImpl::~StatisticsImpl() {
-    for(std::vector<ColumnStatistics*>::iterator ptr = colStats.begin();
-        ptr != colStats.end();
-        ++ptr) {
+    for (std::vector<ColumnStatistics*>::iterator ptr = colStats.begin(); ptr != colStats.end();
+         ++ptr) {
       delete *ptr;
     }
   }
@@ -86,21 +82,19 @@ namespace orc {
   }
 
   StripeStatisticsImpl::StripeStatisticsImpl(
-                const proto::StripeStatistics& stripeStats,
-                std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
-                const StatContext& statContext) {
-    columnStats.reset(new StatisticsImpl(stripeStats, statContext));
+      const proto::StripeStatistics& stripeStats,
+      std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
+      const StatContext& statContext) {
+    columnStats = std::make_unique<StatisticsImpl>(stripeStats, statContext);
     rowIndexStats.resize(indexStats.size());
-    for(size_t i = 0; i < rowIndexStats.size(); i++) {
-      for(size_t j = 0; j < indexStats[i].size(); j++) {
-        rowIndexStats[i].push_back(
-            std::shared_ptr<const ColumnStatistics>(
-                convertColumnStatistics(indexStats[i][j], statContext)));
+    for (size_t i = 0; i < rowIndexStats.size(); i++) {
+      for (size_t j = 0; j < indexStats[i].size(); j++) {
+        rowIndexStats[i].push_back(std::shared_ptr<const ColumnStatistics>(
+            convertColumnStatistics(indexStats[i][j], statContext)));
       }
     }
   }
 
-
   ColumnStatistics::~ColumnStatistics() {
     // PASS
   }
@@ -185,59 +179,57 @@ namespace orc {
     // PASS
   }
 
-  ColumnStatisticsImpl::ColumnStatisticsImpl
-  (const proto::ColumnStatistics& pb) {
-    _stats.setNumberOfValues(pb.numberofvalues());
-    _stats.setHasNull(pb.hasnull());
+  ColumnStatisticsImpl::ColumnStatisticsImpl(const proto::ColumnStatistics& pb) {
+    _stats.setNumberOfValues(pb.number_of_values());
+    _stats.setHasNull(pb.has_null());
   }
 
-  BinaryColumnStatisticsImpl::BinaryColumnStatisticsImpl
-  (const proto::ColumnStatistics& pb, const StatContext& statContext){
-    _stats.setNumberOfValues(pb.numberofvalues());
-    _stats.setHasNull(pb.hasnull());
-    if (pb.has_binarystatistics() && statContext.correctStats) {
-      _stats.setHasTotalLength(pb.binarystatistics().has_sum());
-      _stats.setTotalLength(
-          static_cast<uint64_t>(pb.binarystatistics().sum()));
+  BinaryColumnStatisticsImpl::BinaryColumnStatisticsImpl(const proto::ColumnStatistics& pb,
+                                                         const StatContext& statContext) {
+    _stats.setNumberOfValues(pb.number_of_values());
+    _stats.setHasNull(pb.has_null());
+    if (pb.has_binary_statistics() && statContext.correctStats) {
+      _stats.setHasTotalLength(pb.binary_statistics().has_sum());
+      _stats.setTotalLength(static_cast<uint64_t>(pb.binary_statistics().sum()));
     }
   }
 
-  BooleanColumnStatisticsImpl::BooleanColumnStatisticsImpl
-  (const proto::ColumnStatistics& pb, const StatContext& statContext){
-    _stats.setNumberOfValues(pb.numberofvalues());
-    _stats.setHasNull(pb.hasnull());
-    if (pb.has_bucketstatistics() && statContext.correctStats) {
+  BooleanColumnStatisticsImpl::BooleanColumnStatisticsImpl(const proto::ColumnStatistics& pb,
+                                                           const StatContext& statContext) {
+    _stats.setNumberOfValues(pb.number_of_values());
+    _stats.setHasNull(pb.has_null());
+    if (pb.has_bucket_statistics() && statContext.correctStats) {
       _hasCount = true;
-      _trueCount = pb.bucketstatistics().count(0);
+      _trueCount = pb.bucket_statistics().count(0);
     } else {
       _hasCount = false;
       _trueCount = 0;
     }
   }
 
-  DateColumnStatisticsImpl::DateColumnStatisticsImpl
-  (const proto::ColumnStatistics& pb, const StatContext& statContext){
-    _stats.setNumberOfValues(pb.numberofvalues());
-    _stats.setHasNull(pb.hasnull());
-    if (!pb.has_datestatistics() || !statContext.correctStats) {
+  DateColumnStatisticsImpl::DateColumnStatisticsImpl(const proto::ColumnStatistics& pb,
+                                                     const StatContext& statContext) {
+    _stats.setNumberOfValues(pb.number_of_values());
+    _stats.setHasNull(pb.has_null());
+    if (!pb.has_date_statistics() || !statContext.correctStats) {
       // hasMinimum_ is false by default;
       // hasMaximum_ is false by default;
       _stats.setMinimum(0);
       _stats.setMaximum(0);
     } else {
-      _stats.setHasMinimum(pb.datestatistics().has_minimum());
-      _stats.setHasMaximum(pb.datestatistics().has_maximum());
-      _stats.setMinimum(pb.datestatistics().minimum());
-      _stats.setMaximum(pb.datestatistics().maximum());
+      _stats.setHasMinimum(pb.date_statistics().has_minimum());
+      _stats.setHasMaximum(pb.date_statistics().has_maximum());
+      _stats.setMinimum(pb.date_statistics().minimum());
+      _stats.setMaximum(pb.date_statistics().maximum());
     }
   }
 
-  DecimalColumnStatisticsImpl::DecimalColumnStatisticsImpl
-  (const proto::ColumnStatistics& pb, const StatContext& statContext){
-    _stats.setNumberOfValues(pb.numberofvalues());
-    _stats.setHasNull(pb.hasnull());
-    if (pb.has_decimalstatistics() && statContext.correctStats) {
-      const proto::DecimalStatistics& stats = pb.decimalstatistics();
+  DecimalColumnStatisticsImpl::DecimalColumnStatisticsImpl(const proto::ColumnStatistics& pb,
+                                                           const StatContext& statContext) {
+    _stats.setNumberOfValues(pb.number_of_values());
+    _stats.setHasNull(pb.has_null());
+    if (pb.has_decimal_statistics() && statContext.correctStats) {
+      const proto::DecimalStatistics& stats = pb.decimal_statistics();
       _stats.setHasMinimum(stats.has_minimum());
       _stats.setHasMaximum(stats.has_maximum());
       _stats.setHasSum(stats.has_sum());
@@ -248,16 +240,15 @@ namespace orc {
     }
   }
 
-  DoubleColumnStatisticsImpl::DoubleColumnStatisticsImpl
-  (const proto::ColumnStatistics& pb){
-    _stats.setNumberOfValues(pb.numberofvalues());
-    _stats.setHasNull(pb.hasnull());
-    if (!pb.has_doublestatistics()) {
+  DoubleColumnStatisticsImpl::DoubleColumnStatisticsImpl(const proto::ColumnStatistics& pb) {
+    _stats.setNumberOfValues(pb.number_of_values());
+    _stats.setHasNull(pb.has_null());
+    if (!pb.has_double_statistics()) {
       _stats.setMinimum(0);
       _stats.setMaximum(0);
       _stats.setSum(0);
-    }else{
-      const proto::DoubleStatistics& stats = pb.doublestatistics();
+    } else {
+      const proto::DoubleStatistics& stats = pb.double_statistics();
       _stats.setHasMinimum(stats.has_minimum());
       _stats.setHasMaximum(stats.has_maximum());
       _stats.setHasSum(stats.has_sum());
@@ -268,16 +259,15 @@ namespace orc {
     }
   }
 
-  IntegerColumnStatisticsImpl::IntegerColumnStatisticsImpl
-  (const proto::ColumnStatistics& pb){
-    _stats.setNumberOfValues(pb.numberofvalues());
-    _stats.setHasNull(pb.hasnull());
-    if (!pb.has_intstatistics()) {
+  IntegerColumnStatisticsImpl::IntegerColumnStatisticsImpl(const proto::ColumnStatistics& pb) {
+    _stats.setNumberOfValues(pb.number_of_values());
+    _stats.setHasNull(pb.has_null());
+    if (!pb.has_int_statistics()) {
       _stats.setMinimum(0);
       _stats.setMaximum(0);
       _stats.setSum(0);
-    }else{
-      const proto::IntegerStatistics& stats = pb.intstatistics();
+    } else {
+      const proto::IntegerStatistics& stats = pb.int_statistics();
       _stats.setHasMinimum(stats.has_minimum());
       _stats.setHasMaximum(stats.has_maximum());
       _stats.setHasSum(stats.has_sum());
@@ -288,14 +278,14 @@ namespace orc {
     }
   }
 
-  StringColumnStatisticsImpl::StringColumnStatisticsImpl
-  (const proto::ColumnStatistics& pb, const StatContext& statContext){
-    _stats.setNumberOfValues(pb.numberofvalues());
-    _stats.setHasNull(pb.hasnull());
-    if (!pb.has_stringstatistics() || !statContext.correctStats) {
+  StringColumnStatisticsImpl::StringColumnStatisticsImpl(const proto::ColumnStatistics& pb,
+                                                         const StatContext& statContext) {
+    _stats.setNumberOfValues(pb.number_of_values());
+    _stats.setHasNull(pb.has_null());
+    if (!pb.has_string_statistics() || !statContext.correctStats) {
       _stats.setTotalLength(0);
-    }else{
-      const proto::StringStatistics& stats = pb.stringstatistics();
+    } else {
+      const proto::StringStatistics& stats = pb.string_statistics();
       _stats.setHasMinimum(stats.has_minimum());
       _stats.setHasMaximum(stats.has_maximum());
       _stats.setHasTotalLength(stats.has_sum());
@@ -306,46 +296,40 @@ namespace orc {
     }
   }
 
-  TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl
-  (const proto::ColumnStatistics& pb, const StatContext& statContext) {
-    _stats.setNumberOfValues(pb.numberofvalues());
-    _stats.setHasNull(pb.hasnull());
-    if (!pb.has_timestampstatistics() || !statContext.correctStats) {
+  TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl(const proto::ColumnStatistics& pb,
+                                                               const StatContext& statContext) {
+    _stats.setNumberOfValues(pb.number_of_values());
+    _stats.setHasNull(pb.has_null());
+    if (!pb.has_timestamp_statistics() || !statContext.correctStats) {
       _stats.setMinimum(0);
       _stats.setMaximum(0);
       _lowerBound = 0;
       _upperBound = 0;
       _minimumNanos = DEFAULT_MIN_NANOS;
       _maximumNanos = DEFAULT_MAX_NANOS;
-    }else{
-      const proto::TimestampStatistics& stats = pb.timestampstatistics();
-      _stats.setHasMinimum(
-                stats.has_minimumutc() ||
-                (stats.has_minimum() && (statContext.writerTimezone != nullptr)));
-      _stats.setHasMaximum(
-                stats.has_maximumutc() ||
-                (stats.has_maximum() && (statContext.writerTimezone != nullptr)));
-      _hasLowerBound = stats.has_minimumutc() || stats.has_minimum();
-      _hasUpperBound = stats.has_maximumutc() || stats.has_maximum();
-      // to be consistent with java side, non-default minimumnanos and maximumnanos
+    } else {
+      const proto::TimestampStatistics& stats = pb.timestamp_statistics();
+      _stats.setHasMinimum(stats.has_minimum_utc() ||
+                           (stats.has_minimum() && (statContext.writerTimezone != nullptr)));
+      _stats.setHasMaximum(stats.has_maximum_utc() ||
+                           (stats.has_maximum() && (statContext.writerTimezone != nullptr)));
+      _hasLowerBound = stats.has_minimum_utc() || stats.has_minimum();
+      _hasUpperBound = stats.has_maximum_utc() || stats.has_maximum();
+      // to be consistent with java side, non-default minimum_nanos and maximum_nanos
       // are added by one in their serialized form.
-      _minimumNanos = stats.has_minimumnanos() ?
-                     stats.minimumnanos() - 1 : DEFAULT_MIN_NANOS;
-      _maximumNanos = stats.has_maximumnanos() ?
-                     stats.maximumnanos() - 1 : DEFAULT_MAX_NANOS;
+      _minimumNanos = stats.has_minimum_nanos() ? stats.minimum_nanos() - 1 : DEFAULT_MIN_NANOS;
+      _maximumNanos = stats.has_maximum_nanos() ? stats.maximum_nanos() - 1 : DEFAULT_MAX_NANOS;
 
       // Timestamp stats are stored in milliseconds
-      if (stats.has_minimumutc()) {
-        int64_t minimum = stats.minimumutc();
+      if (stats.has_minimum_utc()) {
+        int64_t minimum = stats.minimum_utc();
         _stats.setMinimum(minimum);
         _lowerBound = minimum;
       } else if (statContext.writerTimezone) {
         int64_t writerTimeSec = stats.minimum() / 1000;
         // multiply the offset by 1000 to convert to millisecond
-        int64_t minimum =
-          stats.minimum() +
-            (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset)
-              * 1000;
+        int64_t minimum = stats.minimum() +
+                          (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset) * 1000;
         _stats.setMinimum(minimum);
         _lowerBound = minimum;
       } else {
@@ -356,94 +340,82 @@ namespace orc {
       }
 
       // Timestamp stats are stored in milliseconds
-      if (stats.has_maximumutc()) {
-        int64_t maximum = stats.maximumutc();
+      if (stats.has_maximum_utc()) {
+        int64_t maximum = stats.maximum_utc();
         _stats.setMaximum(maximum);
         _upperBound = maximum;
       } else if (statContext.writerTimezone) {
         int64_t writerTimeSec = stats.maximum() / 1000;
         // multiply the offset by 1000 to convert to millisecond
         int64_t maximum = stats.maximum() +
-          (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset)
-            * 1000;
+                          (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset) * 1000;
         _stats.setMaximum(maximum);
         _upperBound = maximum;
       } else {
         _stats.setMaximum(0);
         // add 1 day 1 hour (25 hours) in milliseconds to handle unknown
         // TZ and daylight savings
-        _upperBound = stats.maximum() +  (25 * SECONDS_PER_HOUR * 1000);
+        _upperBound = stats.maximum() + (25 * SECONDS_PER_HOUR * 1000);
       }
       // Add 1 millisecond to account for microsecond precision of values
       _upperBound += 1;
     }
   }
 
-  CollectionColumnStatisticsImpl::CollectionColumnStatisticsImpl
-  (const proto::ColumnStatistics& pb) {
-    _stats.setNumberOfValues(pb.numberofvalues());
-    _stats.setHasNull(pb.hasnull());
-    if (!pb.has_collectionstatistics()) {
+  CollectionColumnStatisticsImpl::CollectionColumnStatisticsImpl(
+      const proto::ColumnStatistics& pb) {
+    _stats.setNumberOfValues(pb.number_of_values());
+    _stats.setHasNull(pb.has_null());
+    if (!pb.has_collection_statistics()) {
       _stats.setMinimum(0);
       _stats.setMaximum(0);
       _stats.setSum(0);
     } else {
-      const proto::CollectionStatistics& stats = pb.collectionstatistics();
-      _stats.setHasMinimum(stats.has_minchildren());
-      _stats.setHasMaximum(stats.has_maxchildren());
-      _stats.setHasSum(stats.has_totalchildren());
-
-      _stats.setMinimum(stats.minchildren());
-      _stats.setMaximum(stats.maxchildren());
-      _stats.setSum(stats.totalchildren());
+      const proto::CollectionStatistics& stats = pb.collection_statistics();
+      _stats.setHasMinimum(stats.has_min_children());
+      _stats.setHasMaximum(stats.has_max_children());
+      _stats.setHasSum(stats.has_total_children());
+
+      _stats.setMinimum(stats.min_children());
+      _stats.setMaximum(stats.max_children());
+      _stats.setSum(stats.total_children());
     }
   }
 
-  std::unique_ptr<MutableColumnStatistics> createColumnStatistics(
-    const Type& type) {
+  std::unique_ptr<MutableColumnStatistics> createColumnStatistics(const Type& type) {
     switch (static_cast<int64_t>(type.getKind())) {
       case BOOLEAN:
-        return std::unique_ptr<MutableColumnStatistics>(
-          new BooleanColumnStatisticsImpl());
+        return std::make_unique<BooleanColumnStatisticsImpl>();
       case BYTE:
       case INT:
       case LONG:
       case SHORT:
-        return std::unique_ptr<MutableColumnStatistics>(
-          new IntegerColumnStatisticsImpl());
+        return std::make_unique<IntegerColumnStatisticsImpl>();
       case MAP:
       case LIST:
-        return std::unique_ptr<MutableColumnStatistics>(
-          new CollectionColumnStatisticsImpl());
+        return std::make_unique<CollectionColumnStatisticsImpl>();
       case STRUCT:
       case UNION:
-        return std::unique_ptr<MutableColumnStatistics>(
-          new ColumnStatisticsImpl());
+        return std::make_unique<ColumnStatisticsImpl>();
       case FLOAT:
       case DOUBLE:
-        return std::unique_ptr<MutableColumnStatistics>(
-          new DoubleColumnStatisticsImpl());
+        return std::make_unique<DoubleColumnStatisticsImpl>();
       case BINARY:
-        return std::unique_ptr<MutableColumnStatistics>(
-          new BinaryColumnStatisticsImpl());
+        return std::make_unique<BinaryColumnStatisticsImpl>();
       case STRING:
       case CHAR:
       case VARCHAR:
-        return std::unique_ptr<MutableColumnStatistics>(
-          new StringColumnStatisticsImpl());
+        return std::make_unique<StringColumnStatisticsImpl>();
       case DATE:
-        return std::unique_ptr<MutableColumnStatistics>(
-          new DateColumnStatisticsImpl());
+        return std::make_unique<DateColumnStatisticsImpl>();
       case TIMESTAMP:
       case TIMESTAMP_INSTANT:
-        return std::unique_ptr<MutableColumnStatistics>(
-          new TimestampColumnStatisticsImpl());
+        return std::make_unique<TimestampColumnStatisticsImpl>();
       case DECIMAL:
-        return std::unique_ptr<MutableColumnStatistics>(
-          new DecimalColumnStatisticsImpl());
+        return std::make_unique<DecimalColumnStatisticsImpl>();
       default:
         throw NotImplementedYet("Not supported type: " + type.toString());
     }
   }
 
-}// namespace
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/Statistics.hh b/contrib/libs/apache/orc/c++/src/Statistics.hh
index 8cb2283f13..b36e431a7f 100644
--- a/contrib/libs/apache/orc/c++/src/Statistics.hh
+++ b/contrib/libs/apache/orc/c++/src/Statistics.hh
@@ -29,25 +29,25 @@
 
 namespace orc {
 
-/**
- * StatContext contains fields required to compute statistics
- */
+  /**
+   * StatContext contains fields required to compute statistics
+   */
 
   struct StatContext {
     const bool correctStats;
     const Timezone* const writerTimezone;
     StatContext() : correctStats(false), writerTimezone(nullptr) {}
-    StatContext(bool cStat, const Timezone* const timezone = nullptr) :
-        correctStats(cStat), writerTimezone(timezone) {}
+    StatContext(bool cStat, const Timezone* const timezone = nullptr)
+        : correctStats(cStat), writerTimezone(timezone) {}
   };
 
-/**
- * Internal Statistics Implementation
- */
+  /**
+   * Internal Statistics Implementation
+   */
 
   template <typename T>
   class InternalStatisticsImpl {
-  private:
+   private:
     bool _hasNull;
     bool _hasMinimum;
     bool _hasMaximum;
@@ -58,7 +58,8 @@ namespace orc {
     T _minimum;
     T _maximum;
     T _sum;
-  public:
+
+   public:
     InternalStatisticsImpl() {
       _hasNull = false;
       _hasMinimum = false;
@@ -72,52 +73,90 @@ namespace orc {
     ~InternalStatisticsImpl() {}
 
     // GET / SET _totalLength
-    bool hasTotalLength() const { return _hasTotalLength; }
+    bool hasTotalLength() const {
+      return _hasTotalLength;
+    }
 
     void setHasTotalLength(bool hasTotalLength) {
       _hasTotalLength = hasTotalLength;
     }
 
-    uint64_t getTotalLength() const { return _totalLength; }
+    uint64_t getTotalLength() const {
+      return _totalLength;
+    }
 
-    void setTotalLength(uint64_t totalLength) { _totalLength = totalLength; }
+    void setTotalLength(uint64_t totalLength) {
+      _totalLength = totalLength;
+    }
 
     // GET / SET _sum
-    bool hasSum() const { return _hasSum; }
+    bool hasSum() const {
+      return _hasSum;
+    }
 
-    void setHasSum(bool hasSum) { _hasSum = hasSum; }
+    void setHasSum(bool hasSum) {
+      _hasSum = hasSum;
+    }
 
-    T getSum() const { return _sum; }
+    T getSum() const {
+      return _sum;
+    }
 
-    void setSum(T sum) { _sum = sum; }
+    void setSum(T sum) {
+      _sum = sum;
+    }
 
     // GET / SET _maximum
-    bool hasMaximum() const { return _hasMaximum; }
+    bool hasMaximum() const {
+      return _hasMaximum;
+    }
 
-    const T & getMaximum() const { return _maximum; }
+    const T& getMaximum() const {
+      return _maximum;
+    }
 
-    void setHasMaximum(bool hasMax) { _hasMaximum = hasMax; }
+    void setHasMaximum(bool hasMax) {
+      _hasMaximum = hasMax;
+    }
 
-    void setMaximum(T max) { _maximum = max; }
+    void setMaximum(T max) {
+      _maximum = max;
+    }
 
     // GET / SET _minimum
-    bool hasMinimum() const { return _hasMinimum; }
+    bool hasMinimum() const {
+      return _hasMinimum;
+    }
 
-    void setHasMinimum(bool hasMin) { _hasMinimum = hasMin; }
+    void setHasMinimum(bool hasMin) {
+      _hasMinimum = hasMin;
+    }
 
-    const T & getMinimum() const { return _minimum; }
+    const T& getMinimum() const {
+      return _minimum;
+    }
 
-    void setMinimum(T min) { _minimum = min; }
+    void setMinimum(T min) {
+      _minimum = min;
+    }
 
     // GET / SET _valueCount
-    uint64_t getNumberOfValues() const { return _valueCount; }
+    uint64_t getNumberOfValues() const {
+      return _valueCount;
+    }
 
-    void setNumberOfValues(uint64_t numValues) { _valueCount = numValues; }
+    void setNumberOfValues(uint64_t numValues) {
+      _valueCount = numValues;
+    }
 
     // GET / SET _hasNullValue
-    bool hasNull() const { return _hasNull; }
+    bool hasNull() const {
+      return _hasNull;
+    }
 
-    void setHasNull(bool hasNull) { _hasNull = hasNull; }
+    void setHasNull(bool hasNull) {
+      _hasNull = hasNull;
+    }
 
     void reset() {
       _hasNull = false;
@@ -164,7 +203,7 @@ namespace orc {
       _hasTotalLength = _hasTotalLength && other._hasTotalLength;
       _totalLength += other._totalLength;
     }
-   };
+  };
 
   typedef InternalStatisticsImpl<char> InternalCharStatistics;
   typedef InternalStatisticsImpl<char> InternalBooleanStatistics;
@@ -179,7 +218,7 @@ namespace orc {
    * Mutable column statistics for use by the writer.
    */
   class MutableColumnStatistics {
-  public:
+   public:
     virtual ~MutableColumnStatistics();
 
     virtual void increase(uint64_t count) = 0;
@@ -195,16 +234,18 @@ namespace orc {
     virtual void toProtoBuf(proto::ColumnStatistics& pbStats) const = 0;
   };
 
-/**
- * ColumnStatistics Implementation
- */
+  /**
+   * ColumnStatistics Implementation
+   */
 
-  class ColumnStatisticsImpl: public ColumnStatistics,
-			      public MutableColumnStatistics {
-  private:
+  class ColumnStatisticsImpl : public ColumnStatistics, public MutableColumnStatistics {
+   private:
     InternalCharStatistics _stats;
-  public:
-    ColumnStatisticsImpl() { reset(); }
+
+   public:
+    ColumnStatisticsImpl() {
+      reset();
+    }
     ColumnStatisticsImpl(const proto::ColumnStatistics& stats);
     virtual ~ColumnStatisticsImpl() override;
 
@@ -237,25 +278,26 @@ namespace orc {
     }
 
     void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
-      pbStats.set_hasnull(_stats.hasNull());
-      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+      pbStats.set_has_null(_stats.hasNull());
+      pbStats.set_number_of_values(_stats.getNumberOfValues());
     }
 
     std::string toString() const override {
       std::ostringstream buffer;
       buffer << "Column has " << getNumberOfValues() << " values"
-             << " and has null value: " << (hasNull() ? "yes" : "no")
-             << std::endl;
+             << " and has null value: " << (hasNull() ? "yes" : "no") << std::endl;
       return buffer.str();
     }
   };
 
-  class BinaryColumnStatisticsImpl: public BinaryColumnStatistics,
-                                    public MutableColumnStatistics {
-  private:
+  class BinaryColumnStatisticsImpl : public BinaryColumnStatistics, public MutableColumnStatistics {
+   private:
     InternalCharStatistics _stats;
-  public:
-    BinaryColumnStatisticsImpl() { reset(); }
+
+   public:
+    BinaryColumnStatisticsImpl() {
+      reset();
+    }
     BinaryColumnStatisticsImpl(const proto::ColumnStatistics& stats,
                                const StatContext& statContext);
     virtual ~BinaryColumnStatisticsImpl() override;
@@ -285,9 +327,9 @@ namespace orc {
     }
 
     uint64_t getTotalLength() const override {
-      if(hasTotalLength()){
+      if (hasTotalLength()) {
         return _stats.getTotalLength();
-      }else{
+      } else {
         throw ParseError("Total length is not defined.");
       }
     }
@@ -303,7 +345,7 @@ namespace orc {
 
     void merge(const MutableColumnStatistics& other) override {
       const BinaryColumnStatisticsImpl& binStats =
-        dynamic_cast<const BinaryColumnStatisticsImpl&>(other);
+          dynamic_cast<const BinaryColumnStatisticsImpl&>(other);
       _stats.merge(binStats._stats);
     }
 
@@ -313,10 +355,10 @@ namespace orc {
     }
 
     void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
-      pbStats.set_hasnull(_stats.hasNull());
-      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+      pbStats.set_has_null(_stats.hasNull());
+      pbStats.set_number_of_values(_stats.getNumberOfValues());
 
-      proto::BinaryStatistics* binStats = pbStats.mutable_binarystatistics();
+      proto::BinaryStatistics* binStats = pbStats.mutable_binary_statistics();
       binStats->set_sum(static_cast<int64_t>(_stats.getTotalLength()));
     }
 
@@ -325,24 +367,26 @@ namespace orc {
       buffer << "Data type: Binary" << std::endl
              << "Values: " << getNumberOfValues() << std::endl
              << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
-      if(hasTotalLength()){
+      if (hasTotalLength()) {
         buffer << "Total length: " << getTotalLength() << std::endl;
-      }else{
+      } else {
         buffer << "Total length: not defined" << std::endl;
       }
       return buffer.str();
     }
   };
 
-  class BooleanColumnStatisticsImpl: public BooleanColumnStatistics,
-                                     public MutableColumnStatistics {
-  private:
+  class BooleanColumnStatisticsImpl : public BooleanColumnStatistics,
+                                      public MutableColumnStatistics {
+   private:
     InternalBooleanStatistics _stats;
     bool _hasCount;
     uint64_t _trueCount;
 
-  public:
-    BooleanColumnStatisticsImpl() { reset(); }
+   public:
+    BooleanColumnStatisticsImpl() {
+      reset();
+    }
     BooleanColumnStatisticsImpl(const proto::ColumnStatistics& stats,
                                 const StatContext& statContext);
     virtual ~BooleanColumnStatisticsImpl() override;
@@ -373,17 +417,17 @@ namespace orc {
     }
 
     uint64_t getFalseCount() const override {
-      if(hasCount()){
+      if (hasCount()) {
         return getNumberOfValues() - _trueCount;
-      }else{
+      } else {
         throw ParseError("False count is not defined.");
       }
     }
 
     uint64_t getTrueCount() const override {
-      if(hasCount()){
+      if (hasCount()) {
         return _trueCount;
-      }else{
+      } else {
         throw ParseError("True count is not defined.");
       }
     }
@@ -401,7 +445,7 @@ namespace orc {
 
     void merge(const MutableColumnStatistics& other) override {
       const BooleanColumnStatisticsImpl& boolStats =
-        dynamic_cast<const BooleanColumnStatisticsImpl&>(other);
+          dynamic_cast<const BooleanColumnStatisticsImpl&>(other);
       _stats.merge(boolStats._stats);
       _hasCount = _hasCount && boolStats._hasCount;
       _trueCount += boolStats._trueCount;
@@ -413,10 +457,10 @@ namespace orc {
     }
 
     void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
-      pbStats.set_hasnull(_stats.hasNull());
-      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+      pbStats.set_has_null(_stats.hasNull());
+      pbStats.set_number_of_values(_stats.getNumberOfValues());
 
-      proto::BucketStatistics* bucketStats = pbStats.mutable_bucketstatistics();
+      proto::BucketStatistics* bucketStats = pbStats.mutable_bucket_statistics();
       if (_hasCount) {
         bucketStats->add_count(_trueCount);
       } else {
@@ -429,9 +473,8 @@ namespace orc {
       buffer << "Data type: Boolean" << std::endl
              << "Values: " << getNumberOfValues() << std::endl
              << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
-      if(hasCount()){
-        buffer << "(true: " << getTrueCount() << "; false: "
-               << getFalseCount() << ")" << std::endl;
+      if (hasCount()) {
+        buffer << "(true: " << getTrueCount() << "; false: " << getFalseCount() << ")" << std::endl;
       } else {
         buffer << "(true: not defined; false: not defined)" << std::endl;
         buffer << "True and false counts are not defined" << std::endl;
@@ -440,14 +483,15 @@ namespace orc {
     }
   };
 
-  class DateColumnStatisticsImpl: public DateColumnStatistics,
-                                  public MutableColumnStatistics{
-  private:
+  class DateColumnStatisticsImpl : public DateColumnStatistics, public MutableColumnStatistics {
+   private:
     InternalDateStatistics _stats;
-  public:
-    DateColumnStatisticsImpl() { reset(); }
-    DateColumnStatisticsImpl(const proto::ColumnStatistics& stats,
-                             const StatContext& statContext);
+
+   public:
+    DateColumnStatisticsImpl() {
+      reset();
+    }
+    DateColumnStatisticsImpl(const proto::ColumnStatistics& stats, const StatContext& statContext);
     virtual ~DateColumnStatisticsImpl() override;
 
     bool hasMinimum() const override {
@@ -479,17 +523,17 @@ namespace orc {
     }
 
     int32_t getMinimum() const override {
-      if(hasMinimum()){
+      if (hasMinimum()) {
         return _stats.getMinimum();
-      }else{
+      } else {
         throw ParseError("Minimum is not defined.");
       }
     }
 
     int32_t getMaximum() const override {
-      if(hasMaximum()){
+      if (hasMaximum()) {
         return _stats.getMaximum();
-      }else{
+      } else {
         throw ParseError("Maximum is not defined.");
       }
     }
@@ -510,7 +554,7 @@ namespace orc {
 
     void merge(const MutableColumnStatistics& other) override {
       const DateColumnStatisticsImpl& dateStats =
-        dynamic_cast<const DateColumnStatisticsImpl&>(other);
+          dynamic_cast<const DateColumnStatisticsImpl&>(other);
       _stats.merge(dateStats._stats);
     }
 
@@ -519,11 +563,10 @@ namespace orc {
     }
 
     void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
-      pbStats.set_hasnull(_stats.hasNull());
-      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+      pbStats.set_has_null(_stats.hasNull());
+      pbStats.set_number_of_values(_stats.getNumberOfValues());
 
-      proto::DateStatistics* dateStatistics =
-        pbStats.mutable_datestatistics();
+      proto::DateStatistics* dateStatistics = pbStats.mutable_date_statistics();
       if (_stats.hasMinimum()) {
         dateStatistics->set_maximum(_stats.getMaximum());
         dateStatistics->set_minimum(_stats.getMinimum());
@@ -538,28 +581,30 @@ namespace orc {
       buffer << "Data type: Date" << std::endl
              << "Values: " << getNumberOfValues() << std::endl
              << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
-      if(hasMinimum()){
+      if (hasMinimum()) {
         buffer << "Minimum: " << getMinimum() << std::endl;
-      }else{
+      } else {
         buffer << "Minimum: not defined" << std::endl;
       }
 
-      if(hasMaximum()){
+      if (hasMaximum()) {
         buffer << "Maximum: " << getMaximum() << std::endl;
-      }else{
+      } else {
         buffer << "Maximum: not defined" << std::endl;
       }
       return buffer.str();
     }
   };
 
-  class DecimalColumnStatisticsImpl: public DecimalColumnStatistics,
-                                     public MutableColumnStatistics {
-  private:
+  class DecimalColumnStatisticsImpl : public DecimalColumnStatistics,
+                                      public MutableColumnStatistics {
+   private:
     InternalDecimalStatistics _stats;
 
-  public:
-    DecimalColumnStatisticsImpl() { reset(); }
+   public:
+    DecimalColumnStatisticsImpl() {
+      reset();
+    }
     DecimalColumnStatisticsImpl(const proto::ColumnStatistics& stats,
                                 const StatContext& statContext);
     virtual ~DecimalColumnStatisticsImpl() override;
@@ -597,17 +642,17 @@ namespace orc {
     }
 
     Decimal getMinimum() const override {
-      if(hasMinimum()){
+      if (hasMinimum()) {
         return _stats.getMinimum();
-      }else{
+      } else {
         throw ParseError("Minimum is not defined.");
       }
     }
 
     Decimal getMaximum() const override {
-      if(hasMaximum()){
+      if (hasMaximum()) {
         return _stats.getMaximum();
-      }else{
+      } else {
         throw ParseError("Maximum is not defined.");
       }
     }
@@ -623,9 +668,9 @@ namespace orc {
     }
 
     Decimal getSum() const override {
-      if(hasSum()){
+      if (hasSum()) {
         return _stats.getSum();
-      }else{
+      } else {
         throw ParseError("Sum is not defined.");
       }
     }
@@ -645,7 +690,7 @@ namespace orc {
 
     void merge(const MutableColumnStatistics& other) override {
       const DecimalColumnStatisticsImpl& decStats =
-        dynamic_cast<const DecimalColumnStatisticsImpl&>(other);
+          dynamic_cast<const DecimalColumnStatisticsImpl&>(other);
 
       _stats.merge(decStats._stats);
 
@@ -661,10 +706,10 @@ namespace orc {
     }
 
     void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
-      pbStats.set_hasnull(_stats.hasNull());
-      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+      pbStats.set_has_null(_stats.hasNull());
+      pbStats.set_number_of_values(_stats.getNumberOfValues());
 
-      proto::DecimalStatistics* decStats = pbStats.mutable_decimalstatistics();
+      proto::DecimalStatistics* decStats = pbStats.mutable_decimal_statistics();
       if (_stats.hasMinimum()) {
         decStats->set_minimum(TString(_stats.getMinimum().toString(true)));
         decStats->set_maximum(TString(_stats.getMaximum().toString(true)));
@@ -684,40 +729,36 @@ namespace orc {
       buffer << "Data type: Decimal" << std::endl
              << "Values: " << getNumberOfValues() << std::endl
              << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
-      if(hasMinimum()){
+      if (hasMinimum()) {
         buffer << "Minimum: " << getMinimum().toString() << std::endl;
-      }else{
+      } else {
         buffer << "Minimum: not defined" << std::endl;
       }
 
-      if(hasMaximum()){
+      if (hasMaximum()) {
         buffer << "Maximum: " << getMaximum().toString() << std::endl;
-      }else{
+      } else {
         buffer << "Maximum: not defined" << std::endl;
       }
 
-      if(hasSum()){
+      if (hasSum()) {
         buffer << "Sum: " << getSum().toString() << std::endl;
-      }else{
+      } else {
         buffer << "Sum: not defined" << std::endl;
       }
 
       return buffer.str();
     }
 
-  private:
+   private:
     void updateSum(Decimal value) {
       if (_stats.hasSum()) {
         bool overflow = false;
         Decimal sum = _stats.getSum();
         if (sum.scale > value.scale) {
-          value.value = scaleUpInt128ByPowerOfTen(value.value,
-                                                  sum.scale - value.scale,
-                                                  overflow);
+          value.value = scaleUpInt128ByPowerOfTen(value.value, sum.scale - value.scale, overflow);
         } else if (sum.scale < value.scale) {
-          sum.value = scaleUpInt128ByPowerOfTen(sum.value,
-                                                value.scale - sum.scale,
-                                                overflow);
+          sum.value = scaleUpInt128ByPowerOfTen(sum.value, value.scale - sum.scale, overflow);
           sum.scale = value.scale;
         }
 
@@ -738,12 +779,14 @@ namespace orc {
     }
   };
 
-  class DoubleColumnStatisticsImpl: public DoubleColumnStatistics,
-                                    public MutableColumnStatistics {
-  private:
+  class DoubleColumnStatisticsImpl : public DoubleColumnStatistics, public MutableColumnStatistics {
+   private:
     InternalDoubleStatistics _stats;
-  public:
-    DoubleColumnStatisticsImpl() { reset(); }
+
+   public:
+    DoubleColumnStatisticsImpl() {
+      reset();
+    }
     DoubleColumnStatisticsImpl(const proto::ColumnStatistics& stats);
     virtual ~DoubleColumnStatisticsImpl() override;
 
@@ -780,17 +823,17 @@ namespace orc {
     }
 
     double getMinimum() const override {
-      if(hasMinimum()){
+      if (hasMinimum()) {
         return _stats.getMinimum();
-      }else{
+      } else {
         throw ParseError("Minimum is not defined.");
       }
     }
 
     double getMaximum() const override {
-      if(hasMaximum()){
+      if (hasMaximum()) {
         return _stats.getMaximum();
-      }else{
+      } else {
         throw ParseError("Maximum is not defined.");
       }
     }
@@ -806,9 +849,9 @@ namespace orc {
     }
 
     double getSum() const override {
-      if(hasSum()){
+      if (hasSum()) {
         return _stats.getSum();
-      }else{
+      } else {
         throw ParseError("Sum is not defined.");
       }
     }
@@ -825,7 +868,7 @@ namespace orc {
 
     void merge(const MutableColumnStatistics& other) override {
       const DoubleColumnStatisticsImpl& doubleStats =
-        dynamic_cast<const DoubleColumnStatisticsImpl&>(other);
+          dynamic_cast<const DoubleColumnStatisticsImpl&>(other);
       _stats.merge(doubleStats._stats);
 
       _stats.setHasSum(_stats.hasSum() && doubleStats.hasSum());
@@ -840,10 +883,10 @@ namespace orc {
     }
 
     void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
-      pbStats.set_hasnull(_stats.hasNull());
-      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+      pbStats.set_has_null(_stats.hasNull());
+      pbStats.set_number_of_values(_stats.getNumberOfValues());
 
-      proto::DoubleStatistics* doubleStats = pbStats.mutable_doublestatistics();
+      proto::DoubleStatistics* doubleStats = pbStats.mutable_double_statistics();
       if (_stats.hasMinimum()) {
         doubleStats->set_minimum(_stats.getMinimum());
         doubleStats->set_maximum(_stats.getMaximum());
@@ -863,33 +906,36 @@ namespace orc {
       buffer << "Data type: Double" << std::endl
              << "Values: " << getNumberOfValues() << std::endl
              << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
-      if(hasMinimum()){
+      if (hasMinimum()) {
         buffer << "Minimum: " << getMinimum() << std::endl;
-      }else{
+      } else {
         buffer << "Minimum: not defined" << std::endl;
       }
 
-      if(hasMaximum()){
+      if (hasMaximum()) {
         buffer << "Maximum: " << getMaximum() << std::endl;
-      }else{
+      } else {
         buffer << "Maximum: not defined" << std::endl;
       }
 
-      if(hasSum()){
+      if (hasSum()) {
         buffer << "Sum: " << getSum() << std::endl;
-      }else{
+      } else {
         buffer << "Sum: not defined" << std::endl;
       }
       return buffer.str();
     }
   };
 
-  class IntegerColumnStatisticsImpl: public IntegerColumnStatistics,
-                                     public MutableColumnStatistics {
-  private:
+  class IntegerColumnStatisticsImpl : public IntegerColumnStatistics,
+                                      public MutableColumnStatistics {
+   private:
     InternalIntegerStatistics _stats;
-  public:
-    IntegerColumnStatisticsImpl() { reset(); }
+
+   public:
+    IntegerColumnStatisticsImpl() {
+      reset();
+    }
     IntegerColumnStatisticsImpl(const proto::ColumnStatistics& stats);
     virtual ~IntegerColumnStatisticsImpl() override;
 
@@ -926,17 +972,17 @@ namespace orc {
     }
 
     int64_t getMinimum() const override {
-      if(hasMinimum()){
+      if (hasMinimum()) {
         return _stats.getMinimum();
-      }else{
+      } else {
         throw ParseError("Minimum is not defined.");
       }
     }
 
     int64_t getMaximum() const override {
-      if(hasMaximum()){
+      if (hasMaximum()) {
         return _stats.getMaximum();
-      }else{
+      } else {
         throw ParseError("Maximum is not defined.");
       }
     }
@@ -952,9 +998,9 @@ namespace orc {
     }
 
     int64_t getSum() const override {
-      if(hasSum()){
+      if (hasSum()) {
         return _stats.getSum();
-      }else{
+      } else {
         throw ParseError("Sum is not defined.");
       }
     }
@@ -984,7 +1030,7 @@ namespace orc {
 
     void merge(const MutableColumnStatistics& other) override {
       const IntegerColumnStatisticsImpl& intStats =
-        dynamic_cast<const IntegerColumnStatisticsImpl&>(other);
+          dynamic_cast<const IntegerColumnStatisticsImpl&>(other);
 
       _stats.merge(intStats._stats);
 
@@ -1005,10 +1051,10 @@ namespace orc {
     }
 
     void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
-      pbStats.set_hasnull(_stats.hasNull());
-      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+      pbStats.set_has_null(_stats.hasNull());
+      pbStats.set_number_of_values(_stats.getNumberOfValues());
 
-      proto::IntegerStatistics* intStats = pbStats.mutable_intstatistics();
+      proto::IntegerStatistics* intStats = pbStats.mutable_int_statistics();
       if (_stats.hasMinimum()) {
         intStats->set_minimum(_stats.getMinimum());
         intStats->set_maximum(_stats.getMaximum());
@@ -1028,33 +1074,32 @@ namespace orc {
       buffer << "Data type: Integer" << std::endl
              << "Values: " << getNumberOfValues() << std::endl
              << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
-      if(hasMinimum()){
+      if (hasMinimum()) {
         buffer << "Minimum: " << getMinimum() << std::endl;
-      }else{
+      } else {
         buffer << "Minimum: not defined" << std::endl;
       }
 
-      if(hasMaximum()){
+      if (hasMaximum()) {
         buffer << "Maximum: " << getMaximum() << std::endl;
-      }else{
+      } else {
         buffer << "Maximum: not defined" << std::endl;
       }
 
-      if(hasSum()){
+      if (hasSum()) {
         buffer << "Sum: " << getSum() << std::endl;
-      }else{
+      } else {
         buffer << "Sum: not defined" << std::endl;
       }
       return buffer.str();
     }
   };
 
-  class StringColumnStatisticsImpl: public StringColumnStatistics,
-                                    public MutableColumnStatistics{
-  private:
+  class StringColumnStatisticsImpl : public StringColumnStatistics, public MutableColumnStatistics {
+   private:
     InternalStringStatistics _stats;
 
-  public:
+   public:
     StringColumnStatisticsImpl() {
       reset();
     }
@@ -1094,18 +1139,18 @@ namespace orc {
       _stats.setHasNull(hasNull);
     }
 
-    const std::string & getMinimum() const override {
-      if(hasMinimum()){
+    const std::string& getMinimum() const override {
+      if (hasMinimum()) {
         return _stats.getMinimum();
-      }else{
+      } else {
         throw ParseError("Minimum is not defined.");
       }
     }
 
-    const std::string & getMaximum() const override {
-      if(hasMaximum()){
+    const std::string& getMaximum() const override {
+      if (hasMaximum()) {
         return _stats.getMaximum();
-      }else{
+      } else {
         throw ParseError("Maximum is not defined.");
       }
     }
@@ -1121,9 +1166,9 @@ namespace orc {
     }
 
     uint64_t getTotalLength() const override {
-      if(hasTotalLength()){
+      if (hasTotalLength()) {
         return _stats.getTotalLength();
-      }else{
+      } else {
         throw ParseError("Total length is not defined.");
       }
     }
@@ -1141,20 +1186,16 @@ namespace orc {
           setMaximum(tempStr);
         } else {
           // update min
-          int minCmp = strncmp(_stats.getMinimum().c_str(),
-                               value,
+          int minCmp = strncmp(_stats.getMinimum().c_str(), value,
                                std::min(_stats.getMinimum().length(), length));
-          if (minCmp > 0 ||
-                (minCmp == 0 && length < _stats.getMinimum().length())) {
+          if (minCmp > 0 || (minCmp == 0 && length < _stats.getMinimum().length())) {
             setMinimum(std::string(value, value + length));
           }
 
           // update max
-          int maxCmp = strncmp(_stats.getMaximum().c_str(),
-                               value,
+          int maxCmp = strncmp(_stats.getMaximum().c_str(), value,
                                std::min(_stats.getMaximum().length(), length));
-          if (maxCmp < 0 ||
-                (maxCmp == 0 && length > _stats.getMaximum().length())) {
+          if (maxCmp < 0 || (maxCmp == 0 && length > _stats.getMaximum().length())) {
             setMaximum(std::string(value, value + length));
           }
         }
@@ -1169,7 +1210,7 @@ namespace orc {
 
     void merge(const MutableColumnStatistics& other) override {
       const StringColumnStatisticsImpl& strStats =
-        dynamic_cast<const StringColumnStatisticsImpl&>(other);
+          dynamic_cast<const StringColumnStatisticsImpl&>(other);
       _stats.merge(strStats._stats);
     }
 
@@ -1179,10 +1220,10 @@ namespace orc {
     }
 
     void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
-      pbStats.set_hasnull(_stats.hasNull());
-      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+      pbStats.set_has_null(_stats.hasNull());
+      pbStats.set_number_of_values(_stats.getNumberOfValues());
 
-      proto::StringStatistics* strStats = pbStats.mutable_stringstatistics();
+      proto::StringStatistics* strStats = pbStats.mutable_string_statistics();
       if (_stats.hasMinimum()) {
         strStats->set_minimum(TString(_stats.getMinimum()));
         strStats->set_maximum(TString(_stats.getMaximum()));
@@ -1202,42 +1243,44 @@ namespace orc {
       buffer << "Data type: String" << std::endl
              << "Values: " << getNumberOfValues() << std::endl
              << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
-      if(hasMinimum()){
+      if (hasMinimum()) {
         buffer << "Minimum: " << getMinimum() << std::endl;
-      }else{
+      } else {
         buffer << "Minimum is not defined" << std::endl;
       }
 
-      if(hasMaximum()){
+      if (hasMaximum()) {
         buffer << "Maximum: " << getMaximum() << std::endl;
-      }else{
+      } else {
         buffer << "Maximum is not defined" << std::endl;
       }
 
-      if(hasTotalLength()){
+      if (hasTotalLength()) {
         buffer << "Total length: " << getTotalLength() << std::endl;
-      }else{
+      } else {
         buffer << "Total length is not defined" << std::endl;
       }
       return buffer.str();
     }
   };
 
-  class TimestampColumnStatisticsImpl: public TimestampColumnStatistics,
-                                       public MutableColumnStatistics {
-  private:
+  class TimestampColumnStatisticsImpl : public TimestampColumnStatistics,
+                                        public MutableColumnStatistics {
+   private:
     InternalIntegerStatistics _stats;
     bool _hasLowerBound;
     bool _hasUpperBound;
     int64_t _lowerBound;
     int64_t _upperBound;
-    int32_t _minimumNanos; // last 6 digits of nanosecond of minimum timestamp
-    int32_t _maximumNanos; // last 6 digits of nanosecond of maximum timestamp
+    int32_t _minimumNanos;  // last 6 digits of nanosecond of minimum timestamp
+    int32_t _maximumNanos;  // last 6 digits of nanosecond of maximum timestamp
     static constexpr int32_t DEFAULT_MIN_NANOS = 0;
     static constexpr int32_t DEFAULT_MAX_NANOS = 999999;
 
-  public:
-    TimestampColumnStatisticsImpl() { reset(); }
+   public:
+    TimestampColumnStatisticsImpl() {
+      reset();
+    }
     TimestampColumnStatisticsImpl(const proto::ColumnStatistics& stats,
                                   const StatContext& statContext);
     virtual ~TimestampColumnStatisticsImpl() override;
@@ -1271,17 +1314,17 @@ namespace orc {
     }
 
     int64_t getMinimum() const override {
-      if(hasMinimum()){
+      if (hasMinimum()) {
         return _stats.getMinimum();
-      }else{
+      } else {
         throw ParseError("Minimum is not defined.");
       }
     }
 
     int64_t getMaximum() const override {
-      if(hasMaximum()){
+      if (hasMaximum()) {
         return _stats.getMaximum();
-      }else{
+      } else {
         throw ParseError("Maximum is not defined.");
       }
     }
@@ -1326,7 +1369,7 @@ namespace orc {
 
     void merge(const MutableColumnStatistics& other) override {
       const TimestampColumnStatisticsImpl& tsStats =
-        dynamic_cast<const TimestampColumnStatisticsImpl&>(other);
+          dynamic_cast<const TimestampColumnStatisticsImpl&>(other);
 
       _stats.setHasNull(_stats.hasNull() || tsStats.hasNull());
       _stats.setNumberOfValues(_stats.getNumberOfValues() + tsStats.getNumberOfValues());
@@ -1365,25 +1408,24 @@ namespace orc {
     }
 
     void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
-      pbStats.set_hasnull(_stats.hasNull());
-      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+      pbStats.set_has_null(_stats.hasNull());
+      pbStats.set_number_of_values(_stats.getNumberOfValues());
 
-      proto::TimestampStatistics* tsStats =
-        pbStats.mutable_timestampstatistics();
+      proto::TimestampStatistics* tsStats = pbStats.mutable_timestamp_statistics();
       if (_stats.hasMinimum()) {
-        tsStats->set_minimumutc(_stats.getMinimum());
-        tsStats->set_maximumutc(_stats.getMaximum());
+        tsStats->set_minimum_utc(_stats.getMinimum());
+        tsStats->set_maximum_utc(_stats.getMaximum());
         if (_minimumNanos != DEFAULT_MIN_NANOS) {
-          tsStats->set_minimumnanos(_minimumNanos + 1);
+          tsStats->set_minimum_nanos(_minimumNanos + 1);
         }
         if (_maximumNanos != DEFAULT_MAX_NANOS) {
-          tsStats->set_maximumnanos(_maximumNanos + 1);
+          tsStats->set_maximum_nanos(_maximumNanos + 1);
         }
       } else {
-        tsStats->clear_minimumutc();
-        tsStats->clear_maximumutc();
-        tsStats->clear_minimumnanos();
-        tsStats->clear_maximumnanos();
+        tsStats->clear_minimum_utc();
+        tsStats->clear_maximum_utc();
+        tsStats->clear_minimum_nanos();
+        tsStats->clear_maximum_nanos();
       }
     }
 
@@ -1396,43 +1438,39 @@ namespace orc {
       buffer << "Data type: Timestamp" << std::endl
              << "Values: " << getNumberOfValues() << std::endl
              << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
-      if(hasMinimum()){
+      if (hasMinimum()) {
         secs = static_cast<time_t>(getMinimum() / 1000);
         gmtime_r(&secs, &tmValue);
         strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
-        buffer << "Minimum: " << timeBuffer << "."
-               << (getMinimum() % 1000) << std::endl;
-      }else{
+        buffer << "Minimum: " << timeBuffer << "." << (getMinimum() % 1000) << std::endl;
+      } else {
         buffer << "Minimum is not defined" << std::endl;
       }
 
-      if(hasLowerBound()){
+      if (hasLowerBound()) {
         secs = static_cast<time_t>(getLowerBound() / 1000);
         gmtime_r(&secs, &tmValue);
         strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
-        buffer << "LowerBound: " << timeBuffer << "."
-               << (getLowerBound() % 1000) << std::endl;
-      }else{
+        buffer << "LowerBound: " << timeBuffer << "." << (getLowerBound() % 1000) << std::endl;
+      } else {
         buffer << "LowerBound is not defined" << std::endl;
       }
 
-      if(hasMaximum()){
-        secs = static_cast<time_t>(getMaximum()/1000);
+      if (hasMaximum()) {
+        secs = static_cast<time_t>(getMaximum() / 1000);
         gmtime_r(&secs, &tmValue);
         strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
-        buffer << "Maximum: " << timeBuffer << "."
-               << (getMaximum() % 1000) << std::endl;
-      }else{
+        buffer << "Maximum: " << timeBuffer << "." << (getMaximum() % 1000) << std::endl;
+      } else {
         buffer << "Maximum is not defined" << std::endl;
       }
 
-      if(hasUpperBound()){
+      if (hasUpperBound()) {
         secs = static_cast<time_t>(getUpperBound() / 1000);
         gmtime_r(&secs, &tmValue);
         strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
-        buffer << "UpperBound: " << timeBuffer << "."
-               << (getUpperBound() % 1000) << std::endl;
-      }else{
+        buffer << "UpperBound: " << timeBuffer << "." << (getUpperBound() % 1000) << std::endl;
+      } else {
         buffer << "UpperBound is not defined" << std::endl;
       }
 
@@ -1448,17 +1486,17 @@ namespace orc {
     }
 
     int64_t getLowerBound() const override {
-      if(hasLowerBound()){
+      if (hasLowerBound()) {
         return _lowerBound;
-      }else{
+      } else {
         throw ParseError("LowerBound is not defined.");
       }
     }
 
     int64_t getUpperBound() const override {
-      if(hasUpperBound()){
+      if (hasUpperBound()) {
         return _upperBound;
-      }else{
+      } else {
         throw ParseError("UpperBound is not defined.");
       }
     }
@@ -1482,12 +1520,14 @@ namespace orc {
 
   class CollectionColumnStatisticsImpl : public CollectionColumnStatistics,
                                          public MutableColumnStatistics {
-  private:
+   private:
     InternalCollectionStatistics _stats;
 
-  public:
-    CollectionColumnStatisticsImpl() { reset(); }
-    CollectionColumnStatisticsImpl(const proto::ColumnStatistics &stats);
+   public:
+    CollectionColumnStatisticsImpl() {
+      reset();
+    }
+    CollectionColumnStatisticsImpl(const proto::ColumnStatistics& stats);
     virtual ~CollectionColumnStatisticsImpl() override;
 
     bool hasMinimumChildren() const override {
@@ -1523,7 +1563,7 @@ namespace orc {
     }
 
     uint64_t getMinimumChildren() const override {
-      if(hasMinimumChildren()) {
+      if (hasMinimumChildren()) {
         return _stats.getMinimum();
       } else {
         throw ParseError("MinimumChildren is not defined.");
@@ -1531,7 +1571,7 @@ namespace orc {
     }
 
     uint64_t getMaximumChildren() const override {
-      if(hasMaximumChildren()) {
+      if (hasMaximumChildren()) {
         return _stats.getMaximum();
       } else {
         throw ParseError("MaximumChildren is not defined.");
@@ -1539,7 +1579,7 @@ namespace orc {
     }
 
     uint64_t getTotalChildren() const override {
-      if(hasTotalChildren()) {
+      if (hasTotalChildren()) {
         return _stats.getSum();
       } else {
         throw ParseError("TotalChildren is not defined.");
@@ -1598,31 +1638,30 @@ namespace orc {
       }
     }
 
-    void toProtoBuf(proto::ColumnStatistics &pbStats) const override {
-      pbStats.set_hasnull(_stats.hasNull());
-      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+    void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+      pbStats.set_has_null(_stats.hasNull());
+      pbStats.set_number_of_values(_stats.getNumberOfValues());
 
-      proto::CollectionStatistics* collectionStats =
-          pbStats.mutable_collectionstatistics();
+      proto::CollectionStatistics* collectionStats = pbStats.mutable_collection_statistics();
       if (_stats.hasMinimum()) {
-        collectionStats->set_minchildren(_stats.getMinimum());
-        collectionStats->set_maxchildren(_stats.getMaximum());
+        collectionStats->set_min_children(_stats.getMinimum());
+        collectionStats->set_max_children(_stats.getMaximum());
       } else {
-        collectionStats->clear_minchildren();
-        collectionStats->clear_maxchildren();
+        collectionStats->clear_min_children();
+        collectionStats->clear_max_children();
       }
       if (_stats.hasSum()) {
-        collectionStats->set_totalchildren(_stats.getSum());
+        collectionStats->set_total_children(_stats.getSum());
       } else {
-        collectionStats->clear_totalchildren();
+        collectionStats->clear_total_children();
       }
     }
 
     std::string toString() const override {
       std::ostringstream buffer;
       buffer << "Data type: Collection(LIST|MAP)" << std::endl
-            << "Values: " << getNumberOfValues() << std::endl
-            << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+             << "Values: " << getNumberOfValues() << std::endl
+             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
       if (hasMinimumChildren()) {
         buffer << "MinChildren: " << getMinimumChildren() << std::endl;
       } else {
@@ -1647,22 +1686,20 @@ namespace orc {
   ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s,
                                             const StatContext& statContext);
 
-  class StatisticsImpl: public Statistics {
-  private:
+  class StatisticsImpl : public Statistics {
+   private:
     std::vector<ColumnStatistics*> colStats;
 
     // DELIBERATELY NOT IMPLEMENTED
     StatisticsImpl(const StatisticsImpl&);
     StatisticsImpl& operator=(const StatisticsImpl&);
 
-  public:
-    StatisticsImpl(const proto::StripeStatistics& stripeStats,
-                   const StatContext& statContext);
+   public:
+    StatisticsImpl(const proto::StripeStatistics& stripeStats, const StatContext& statContext);
 
     StatisticsImpl(const proto::Footer& footer, const StatContext& statContext);
 
-    virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId
-                                                        ) const override {
+    virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId) const override {
       return colStats[columnId];
     }
 
@@ -1673,24 +1710,21 @@ namespace orc {
     }
   };
 
-  class StripeStatisticsImpl: public StripeStatistics {
-  private:
+  class StripeStatisticsImpl : public StripeStatistics {
+   private:
     std::unique_ptr<StatisticsImpl> columnStats;
-    std::vector<std::vector<std::shared_ptr<const ColumnStatistics> > >
-                                                                  rowIndexStats;
+    std::vector<std::vector<std::shared_ptr<const ColumnStatistics> > > rowIndexStats;
 
     // DELIBERATELY NOT IMPLEMENTED
     StripeStatisticsImpl(const StripeStatisticsImpl&);
     StripeStatisticsImpl& operator=(const StripeStatisticsImpl&);
 
-  public:
-    StripeStatisticsImpl(
-                const proto::StripeStatistics& stripeStats,
-                std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
-                const StatContext& statContext);
+   public:
+    StripeStatisticsImpl(const proto::StripeStatistics& stripeStats,
+                         std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
+                         const StatContext& statContext);
 
-    virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId
-                                                        ) const override {
+    virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId) const override {
       return columnStats->getColumnStatistics(columnId);
     }
 
@@ -1699,8 +1733,7 @@ namespace orc {
     }
 
     virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId,
-                                                          uint32_t rowIndex
-                                                        ) const override {
+                                                          uint32_t rowIndex) const override {
       // check id indices are valid
       return rowIndexStats[columnId][rowIndex].get();
     }
@@ -1717,9 +1750,8 @@ namespace orc {
    * @param type of column
    * @return MutableColumnStatistics instances
    */
-  std::unique_ptr<MutableColumnStatistics> createColumnStatistics(
-                                                            const Type& type);
+  std::unique_ptr<MutableColumnStatistics> createColumnStatistics(const Type& type);
 
-}// namespace
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/src/StripeStream.cc b/contrib/libs/apache/orc/c++/src/StripeStream.cc
index 6d6dda8328..8507e95767 100644
--- a/contrib/libs/apache/orc/c++/src/StripeStream.cc
+++ b/contrib/libs/apache/orc/c++/src/StripeStream.cc
@@ -16,10 +16,10 @@
  * limitations under the License.
  */
 
-#include "orc/Exceptions.hh"
+#include "StripeStream.hh"
 #include "RLE.hh"
 #include "Reader.hh"
-#include "StripeStream.hh"
+#include "orc/Exceptions.hh"
 
 #include "wrap/coded-stream-wrapper.h"
 
@@ -27,19 +27,17 @@ namespace orc {
 
   StripeStreamsImpl::StripeStreamsImpl(const RowReaderImpl& _reader, uint64_t _index,
                                        const proto::StripeInformation& _stripeInfo,
-                                       const proto::StripeFooter& _footer,
-                                       uint64_t _stripeStart,
-                                       InputStream& _input,
-                                       const Timezone& _writerTimezone,
-                                       const Timezone& _readerTimezone
-                                       ): reader(_reader),
-                                          stripeInfo(_stripeInfo),
-                                          footer(_footer),
-                                          stripeIndex(_index),
-                                          stripeStart(_stripeStart),
-                                          input(_input),
-                                          writerTimezone(_writerTimezone),
-                                          readerTimezone(_readerTimezone) {
+                                       const proto::StripeFooter& _footer, uint64_t _stripeStart,
+                                       InputStream& _input, const Timezone& _writerTimezone,
+                                       const Timezone& _readerTimezone)
+      : reader(_reader),
+        stripeInfo(_stripeInfo),
+        footer(_footer),
+        stripeIndex(_index),
+        stripeStart(_stripeStart),
+        input(_input),
+        writerTimezone(_writerTimezone),
+        readerTimezone(_readerTimezone) {
     // PASS
   }
 
@@ -55,7 +53,6 @@ namespace orc {
     // PASS
   }
 
-
   StreamInformationImpl::~StreamInformationImpl() {
     // PASS
   }
@@ -64,8 +61,7 @@ namespace orc {
     return reader.getSelectedColumns();
   }
 
-  proto::ColumnEncoding StripeStreamsImpl::getEncoding(uint64_t columnId
-                                                       ) const {
+  proto::ColumnEncoding StripeStreamsImpl::getEncoding(uint64_t columnId) const {
     return footer.columns(static_cast<int>(columnId));
   }
 
@@ -81,48 +77,46 @@ namespace orc {
     return reader.getFileContents().errorStream;
   }
 
-    std::unique_ptr<SeekableInputStream>
-  StripeStreamsImpl::getStream(uint64_t columnId,
-                               proto::Stream_Kind kind,
-                               bool shouldStream) const {
+  std::unique_ptr<SeekableInputStream> StripeStreamsImpl::getStream(uint64_t columnId,
+                                                                    proto::Stream_Kind kind,
+                                                                    bool shouldStream) const {
     uint64_t offset = stripeStart;
-    uint64_t dataEnd = stripeInfo.offset() + stripeInfo.indexlength() + stripeInfo.datalength();
-    MemoryPool *pool = reader.getFileContents().pool;
-    for(int i = 0; i < footer.streams_size(); ++i) {
+    uint64_t dataEnd = stripeInfo.offset() + stripeInfo.index_length() + stripeInfo.data_length();
+    MemoryPool* pool = reader.getFileContents().pool;
+    for (int i = 0; i < footer.streams_size(); ++i) {
       const proto::Stream& stream = footer.streams(i);
-      if (stream.has_kind() &&
-          stream.kind() == kind &&
+      if (stream.has_kind() && stream.kind() == kind &&
           stream.column() == static_cast<uint64_t>(columnId)) {
         uint64_t streamLength = stream.length();
-        uint64_t myBlock = shouldStream ? input.getNaturalReadSize(): streamLength;
+        uint64_t myBlock = shouldStream ? input.getNaturalReadSize() : streamLength;
         if (offset + streamLength > dataEnd) {
           std::stringstream msg;
           msg << "Malformed stream meta at stream index " << i << " in stripe " << stripeIndex
               << ": streamOffset=" << offset << ", streamLength=" << streamLength
-              << ", stripeOffset=" << stripeInfo.offset() << ", stripeIndexLength="
-              << stripeInfo.indexlength() << ", stripeDataLength=" << stripeInfo.datalength();
+              << ", stripeOffset=" << stripeInfo.offset()
+              << ", stripeIndexLength=" << stripeInfo.index_length()
+              << ", stripeDataLength=" << stripeInfo.data_length();
           throw ParseError(msg.str());
         }
         return createDecompressor(reader.getCompression(),
-                                  std::unique_ptr<SeekableInputStream>
-                                  (new SeekableFileInputStream
-                                   (&input,
-                                    offset,
-                                    stream.length(),
-                                    *pool,
-                                    myBlock)),
-                                  reader.getCompressionSize(),
-                                  *pool);
+                                  std::make_unique<SeekableFileInputStream>(
+                                      &input, offset, stream.length(), *pool, myBlock),
+                                  reader.getCompressionSize(), *pool,
+                                  reader.getFileContents().readerMetrics);
       }
       offset += stream.length();
     }
-    return std::unique_ptr<SeekableInputStream>();
+    return nullptr;
   }
 
   MemoryPool& StripeStreamsImpl::getMemoryPool() const {
     return *reader.getFileContents().pool;
   }
 
+  ReaderMetrics* StripeStreamsImpl::getReaderMetrics() const {
+    return reader.getFileContents().readerMetrics;
+  }
+
   bool StripeStreamsImpl::getThrowOnHive11DecimalOverflow() const {
     return reader.getThrowOnHive11DecimalOverflow();
   }
@@ -135,37 +129,33 @@ namespace orc {
     return reader.getForcedScaleOnHive11Decimal();
   }
 
+  const SchemaEvolution* StripeStreamsImpl::getSchemaEvolution() const {
+    return reader.getSchemaEvolution();
+  }
+
   void StripeInformationImpl::ensureStripeFooterLoaded() const {
     if (stripeFooter.get() == nullptr) {
       std::unique_ptr<SeekableInputStream> pbStream =
-        createDecompressor(compression,
-                           std::unique_ptr<SeekableInputStream>
-                             (new SeekableFileInputStream(stream,
-                                                          offset +
-                                                            indexLength +
-                                                            dataLength,
-                                                          footerLength,
-                                                          memory)),
-                           blockSize,
-                           memory);
-      stripeFooter.reset(new proto::StripeFooter());
+          createDecompressor(compression,
+                             std::make_unique<SeekableFileInputStream>(
+                                 stream, offset + indexLength + dataLength, footerLength, memory),
+                             blockSize, memory, metrics);
+      stripeFooter = std::make_unique<proto::StripeFooter>();
       if (!stripeFooter->ParseFromZeroCopyStream(pbStream.get())) {
         throw ParseError("Failed to parse the stripe footer");
       }
     }
   }
 
-  std::unique_ptr<StreamInformation>
-     StripeInformationImpl::getStreamInformation(uint64_t streamId) const {
+  std::unique_ptr<StreamInformation> StripeInformationImpl::getStreamInformation(
+      uint64_t streamId) const {
     ensureStripeFooterLoaded();
     uint64_t streamOffset = offset;
-    for(uint64_t s=0; s < streamId; ++s) {
+    for (uint64_t s = 0; s < streamId; ++s) {
       streamOffset += stripeFooter->streams(static_cast<int>(s)).length();
     }
-    return ORC_UNIQUE_PTR<StreamInformation>
-      (new StreamInformationImpl(streamOffset,
-                                 stripeFooter->
-                                   streams(static_cast<int>(streamId))));
+    return std::make_unique<StreamInformationImpl>(
+        streamOffset, stripeFooter->streams(static_cast<int>(streamId)));
   }
 
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/StripeStream.hh b/contrib/libs/apache/orc/c++/src/StripeStream.hh
index 8d9fb06527..eae6ce0c31 100644
--- a/contrib/libs/apache/orc/c++/src/StripeStream.hh
+++ b/contrib/libs/apache/orc/c++/src/StripeStream.hh
@@ -23,6 +23,7 @@
 #include "orc/OrcFile.hh"
 #include "orc/Reader.hh"
 
+#include "ColumnReader.hh"
 #include "Timezone.hh"
 #include "TypeImpl.hh"
 
@@ -31,11 +32,11 @@ namespace orc {
   class RowReaderImpl;
 
   /**
-  * StripeStream Implementation
-  */
+   * StripeStream Implementation
+   */
 
-  class StripeStreamsImpl: public StripeStreams {
-  private:
+  class StripeStreamsImpl : public StripeStreams {
+   private:
     const RowReaderImpl& reader;
     const proto::StripeInformation& stripeInfo;
     const proto::StripeFooter& footer;
@@ -45,29 +46,26 @@ namespace orc {
     const Timezone& writerTimezone;
     const Timezone& readerTimezone;
 
-  public:
+   public:
     StripeStreamsImpl(const RowReaderImpl& reader, uint64_t index,
-                      const proto::StripeInformation& stripeInfo,
-                      const proto::StripeFooter& footer,
-                      uint64_t stripeStart,
-                      InputStream& input,
-                      const Timezone& writerTimezone,
+                      const proto::StripeInformation& stripeInfo, const proto::StripeFooter& footer,
+                      uint64_t stripeStart, InputStream& input, const Timezone& writerTimezone,
                       const Timezone& readerTimezone);
 
     virtual ~StripeStreamsImpl() override;
 
     virtual const std::vector<bool> getSelectedColumns() const override;
 
-    virtual proto::ColumnEncoding getEncoding(uint64_t columnId
-                                              ) const override;
+    virtual proto::ColumnEncoding getEncoding(uint64_t columnId) const override;
 
-    virtual std::unique_ptr<SeekableInputStream>
-    getStream(uint64_t columnId,
-              proto::Stream_Kind kind,
-              bool shouldStream) const override;
+    virtual std::unique_ptr<SeekableInputStream> getStream(uint64_t columnId,
+                                                           proto::Stream_Kind kind,
+                                                           bool shouldStream) const override;
 
     MemoryPool& getMemoryPool() const override;
 
+    ReaderMetrics* getReaderMetrics() const override;
+
     const Timezone& getWriterTimezone() const override;
 
     const Timezone& getReaderTimezone() const override;
@@ -79,25 +77,27 @@ namespace orc {
     bool isDecimalAsLong() const override;
 
     int32_t getForcedScaleOnHive11Decimal() const override;
+
+    const SchemaEvolution* getSchemaEvolution() const override;
   };
 
- /**
-  * StreamInformation Implementation
-  */
+  /**
+   * StreamInformation Implementation
+   */
 
-  class StreamInformationImpl: public StreamInformation {
-  private:
+  class StreamInformationImpl : public StreamInformation {
+   private:
     StreamKind kind;
     uint64_t column;
     uint64_t offset;
     uint64_t length;
-  public:
-    StreamInformationImpl(uint64_t _offset,
-                          const proto::Stream& stream
-                          ): kind(static_cast<StreamKind>(stream.kind())),
-                             column(stream.column()),
-                             offset(_offset),
-                             length(stream.length()) {
+
+   public:
+    StreamInformationImpl(uint64_t _offset, const proto::Stream& stream)
+        : kind(static_cast<StreamKind>(stream.kind())),
+          column(stream.column()),
+          offset(_offset),
+          length(stream.length()) {
       // PASS
     }
 
@@ -120,9 +120,9 @@ namespace orc {
     }
   };
 
- /**
- * StripeInformation Implementation
- */
+  /**
+   * StripeInformation Implementation
+   */
 
   class StripeInformationImpl : public StripeInformation {
     uint64_t offset;
@@ -135,27 +135,24 @@ namespace orc {
     CompressionKind compression;
     uint64_t blockSize;
     mutable std::unique_ptr<proto::StripeFooter> stripeFooter;
+    ReaderMetrics* metrics;
     void ensureStripeFooterLoaded() const;
-  public:
-
-    StripeInformationImpl(uint64_t _offset,
-                          uint64_t _indexLength,
-                          uint64_t _dataLength,
-                          uint64_t _footerLength,
-                          uint64_t _numRows,
-                          InputStream* _stream,
-                          MemoryPool& _memory,
-                          CompressionKind _compression,
-                          uint64_t _blockSize
-                          ) : offset(_offset),
-                              indexLength(_indexLength),
-                              dataLength(_dataLength),
-                              footerLength(_footerLength),
-                              numRows(_numRows),
-                              stream(_stream),
-                              memory(_memory),
-                              compression(_compression),
-                              blockSize(_blockSize) {
+
+   public:
+    StripeInformationImpl(uint64_t _offset, uint64_t _indexLength, uint64_t _dataLength,
+                          uint64_t _footerLength, uint64_t _numRows, InputStream* _stream,
+                          MemoryPool& _memory, CompressionKind _compression, uint64_t _blockSize,
+                          ReaderMetrics* _metrics)
+        : offset(_offset),
+          indexLength(_indexLength),
+          dataLength(_dataLength),
+          footerLength(_footerLength),
+          numRows(_numRows),
+          stream(_stream),
+          memory(_memory),
+          compression(_compression),
+          blockSize(_blockSize),
+          metrics(_metrics) {
       // PASS
     }
 
@@ -174,7 +171,7 @@ namespace orc {
       return indexLength;
     }
 
-    uint64_t getDataLength()const override {
+    uint64_t getDataLength() const override {
       return dataLength;
     }
 
@@ -191,29 +188,25 @@ namespace orc {
       return static_cast<uint64_t>(stripeFooter->streams_size());
     }
 
-    std::unique_ptr<StreamInformation> getStreamInformation(uint64_t streamId
-                                                            ) const override;
+    std::unique_ptr<StreamInformation> getStreamInformation(uint64_t streamId) const override;
 
     ColumnEncodingKind getColumnEncoding(uint64_t colId) const override {
       ensureStripeFooterLoaded();
-      return static_cast<ColumnEncodingKind>(stripeFooter->
-                                             columns(static_cast<int>(colId))
-                                             .kind());
+      return static_cast<ColumnEncodingKind>(stripeFooter->columns(static_cast<int>(colId)).kind());
     }
 
     uint64_t getDictionarySize(uint64_t colId) const override {
       ensureStripeFooterLoaded();
-      return static_cast<ColumnEncodingKind>(stripeFooter->
-                                             columns(static_cast<int>(colId))
-                                             .dictionarysize());
+      return static_cast<ColumnEncodingKind>(
+          stripeFooter->columns(static_cast<int>(colId)).dictionary_size());
     }
 
     const std::string& getWriterTimezone() const override {
       ensureStripeFooterLoaded();
-      return stripeFooter->writertimezone();
+      return stripeFooter->writer_timezone();
     }
   };
 
-}
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/src/Timezone.cc b/contrib/libs/apache/orc/c++/src/Timezone.cc
index 318e5bcc12..27e14480d5 100644
--- a/contrib/libs/apache/orc/c++/src/Timezone.cc
+++ b/contrib/libs/apache/orc/c++/src/Timezone.cc
@@ -16,16 +16,17 @@
  * limitations under the License.
  */
 
-#include "orc/OrcFile.hh"
 #include "Timezone.hh"
+#include "orc/OrcFile.hh"
 
 #include <errno.h>
-#include <map>
-#include <sstream>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
+#include <filesystem>
+#include <map>
+#include <sstream>
 
 namespace orc {
 
@@ -35,25 +36,21 @@ namespace orc {
   // location of a symlink to the local timezone
   static const char LOCAL_TIMEZONE[] = "/etc/localtime";
 
-  enum TransitionKind {
-    TRANSITION_JULIAN,
-    TRANSITION_DAY,
-    TRANSITION_MONTH
-  };
+  enum TransitionKind { TRANSITION_JULIAN, TRANSITION_DAY, TRANSITION_MONTH };
 
   static const int64_t MONTHS_PER_YEAR = 12;
   /**
    * The number of days in each month in non-leap and leap years.
    */
-  static const int64_t DAYS_PER_MONTH[2][MONTHS_PER_YEAR] =
-     {{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
+  static const int64_t DAYS_PER_MONTH[2][MONTHS_PER_YEAR] = {
+      {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
       {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}};
   static const int64_t DAYS_PER_WEEK = 7;
 
   // Leap years and day of the week repeat every 400 years, which makes it
   // a good cycle length.
   static const int64_t SECONDS_PER_400_YEARS =
-    SECONDS_PER_DAY * (365 * (300 + 3) + 366 * (100 - 3));
+      SECONDS_PER_DAY * (365 * (300 + 3) + 366 * (100 - 3));
 
   /**
    * Is the given year a leap year?
@@ -68,7 +65,7 @@ namespace orc {
    * @return -1 if the target < array[0] or array is empty or
    *          i if array[i] <= target and (i == n or array[i] < array[i+1])
    */
-  int64_t binarySearch(const std::vector<int64_t> &array, int64_t target) {
+  int64_t binarySearch(const std::vector<int64_t>& array, int64_t target) {
     uint64_t size = array.size();
     if (size == 0) {
       return -1;
@@ -103,18 +100,17 @@ namespace orc {
     std::string toString() const {
       std::stringstream buffer;
       switch (kind) {
-      case TRANSITION_JULIAN:
-        buffer << "julian " << day;
-        break;
-      case TRANSITION_DAY:
-        buffer << "day " << day;
-        break;
-      case TRANSITION_MONTH:
-        buffer << "month " << month << " week " << week << " day " << day;
-        break;
+        case TRANSITION_JULIAN:
+          buffer << "julian " << day;
+          break;
+        case TRANSITION_DAY:
+          buffer << "day " << day;
+          break;
+        case TRANSITION_MONTH:
+          buffer << "month " << month << " week " << week << " day " << day;
+          break;
       }
-      buffer << " at " << (time / (60 * 60)) << ":" << ((time / 60) % 60)
-             << ":" << (time % 60);
+      buffer << " at " << (time / (60 * 60)) << ":" << ((time / 60) % 60) << ":" << (time % 60);
       return buffer.str();
     }
 
@@ -127,48 +123,48 @@ namespace orc {
     int64_t getTime(int64_t year) const {
       int64_t result = time;
       switch (kind) {
-      case TRANSITION_JULIAN:
-        result += SECONDS_PER_DAY * day;
-        if (day > 60 && isLeap(year)) {
-          result += SECONDS_PER_DAY;
-        }
-        break;
-      case TRANSITION_DAY:
-        result += SECONDS_PER_DAY * day;
-        break;
-      case TRANSITION_MONTH: {
-        bool inLeap = isLeap(year);
-        int64_t adjustedMonth = (month + 9) % 12 + 1;
-        int64_t adjustedYear = (month <= 2) ? (year - 1) : year;
-        int64_t adjustedCentury = adjustedYear / 100;
-        int64_t adjustedRemainder = adjustedYear % 100;
-
-        // day of the week of the first day of month
-        int64_t dayOfWeek = ((26 * adjustedMonth - 2) / 10 +
-                             1 + adjustedRemainder + adjustedRemainder / 4 +
-                             adjustedCentury / 4 - 2 * adjustedCentury) % 7;
-        if (dayOfWeek < 0) {
-          dayOfWeek += DAYS_PER_WEEK;
-        }
+        case TRANSITION_JULIAN:
+          result += SECONDS_PER_DAY * day;
+          if (day > 60 && isLeap(year)) {
+            result += SECONDS_PER_DAY;
+          }
+          break;
+        case TRANSITION_DAY:
+          result += SECONDS_PER_DAY * day;
+          break;
+        case TRANSITION_MONTH: {
+          bool inLeap = isLeap(year);
+          int64_t adjustedMonth = (month + 9) % 12 + 1;
+          int64_t adjustedYear = (month <= 2) ? (year - 1) : year;
+          int64_t adjustedCentury = adjustedYear / 100;
+          int64_t adjustedRemainder = adjustedYear % 100;
+
+          // day of the week of the first day of month
+          int64_t dayOfWeek = ((26 * adjustedMonth - 2) / 10 + 1 + adjustedRemainder +
+                               adjustedRemainder / 4 + adjustedCentury / 4 - 2 * adjustedCentury) %
+                              7;
+          if (dayOfWeek < 0) {
+            dayOfWeek += DAYS_PER_WEEK;
+          }
 
-        int64_t d = day - dayOfWeek;
-        if (d < 0) {
-          d += DAYS_PER_WEEK;
-        }
-        for (int w = 1; w < week; ++w) {
-          if (d + DAYS_PER_WEEK >= DAYS_PER_MONTH[inLeap][month - 1]) {
-            break;
+          int64_t d = day - dayOfWeek;
+          if (d < 0) {
+            d += DAYS_PER_WEEK;
           }
-          d += DAYS_PER_WEEK;
-        }
-        result += d * SECONDS_PER_DAY;
+          for (int w = 1; w < week; ++w) {
+            if (d + DAYS_PER_WEEK >= DAYS_PER_MONTH[inLeap][month - 1]) {
+              break;
+            }
+            d += DAYS_PER_WEEK;
+          }
+          result += d * SECONDS_PER_DAY;
 
-        // Add in the time for the month
-        for(int m=0; m < month - 1; ++m) {
-          result += DAYS_PER_MONTH[inLeap][m] * SECONDS_PER_DAY;
+          // Add in the time for the month
+          for (int m = 0; m < month - 1; ++m) {
+            result += DAYS_PER_MONTH[inLeap][m] * SECONDS_PER_DAY;
+          }
+          break;
         }
-        break;
-      }
       }
       return result;
     }
@@ -187,7 +183,7 @@ namespace orc {
    * daylight = <name><offset>,<start day>(/<offset>)?,<end day>(/<offset>)?
    * day = J<day without 2/29>|<day with 2/29>|M<month>.<week>.<day of week>
    */
-  class FutureRuleImpl: public FutureRule {
+  class FutureRuleImpl : public FutureRule {
     std::string ruleString;
     TimezoneVariant standard;
     bool hasDst;
@@ -215,17 +211,17 @@ namespace orc {
         offsets.resize(400 * 2 + 1);
         startInStd = start.getTime(1970) < end.getTime(1970);
         int64_t base = 0;
-        for(int64_t year = 1970; year < 1970 + 400; ++year) {
+        for (int64_t year = 1970; year < 1970 + 400; ++year) {
           if (startInStd) {
             offsets[static_cast<uint64_t>(year - 1970) * 2 + 1] =
-              base + start.getTime(year) - standard.gmtOffset;
+                base + start.getTime(year) - standard.gmtOffset;
             offsets[static_cast<uint64_t>(year - 1970) * 2 + 2] =
-              base + end.getTime(year) - dst.gmtOffset;
+                base + end.getTime(year) - dst.gmtOffset;
           } else {
             offsets[static_cast<uint64_t>(year - 1970) * 2 + 1] =
-              base + end.getTime(year) - dst.gmtOffset;
+                base + end.getTime(year) - dst.gmtOffset;
             offsets[static_cast<uint64_t>(year - 1970) * 2 + 2] =
-              base + start.getTime(year) - standard.gmtOffset;
+                base + start.getTime(year) - standard.gmtOffset;
           }
           base += (isLeap(year) ? 366 : 365) * SECONDS_PER_DAY;
         }
@@ -233,7 +229,7 @@ namespace orc {
       offsets[0] = 0;
     }
 
-  public:
+   public:
     virtual ~FutureRuleImpl() override;
     bool isDefined() const override;
     const TimezoneVariant& getVariant(int64_t clk) const override;
@@ -287,13 +283,9 @@ namespace orc {
    * A parser for the future rule strings.
    */
   class FutureRuleParser {
-  public:
-    FutureRuleParser(const std::string& str,
-                     FutureRuleImpl* rule
-                     ): ruleString(str),
-                        length(str.size()),
-                        position(0),
-                        output(*rule) {
+   public:
+    FutureRuleParser(const std::string& str, FutureRuleImpl* rule)
+        : ruleString(str), length(str.size()), position(0), output(*rule) {
       output.ruleString = str;
       if (position != length) {
         parseName(output.standard.name);
@@ -318,14 +310,13 @@ namespace orc {
       }
     }
 
-  private:
-
+   private:
     const std::string& ruleString;
     size_t length;
     size_t position;
-    FutureRuleImpl &output;
+    FutureRuleImpl& output;
 
-    void throwError(const char *msg) {
+    void throwError(const char* msg) {
       std::stringstream buffer;
       buffer << msg << " at " << position << " in '" << ruleString << "'";
       throw TimezoneError(buffer.str());
@@ -348,7 +339,7 @@ namespace orc {
         if (position == length) {
           throwError("missing close '>'");
         }
-        position +=1;
+        position += 1;
       } else {
         while (position < length) {
           char ch = ruleString[position];
@@ -456,9 +447,8 @@ namespace orc {
    * Parse the POSIX TZ string.
    */
   std::shared_ptr<FutureRule> parseFutureRule(const std::string& ruleString) {
-    std::shared_ptr<FutureRule> result(new FutureRuleImpl());
-    FutureRuleParser parser(ruleString,
-                            dynamic_cast<FutureRuleImpl*>(result.get()));
+    auto result = std::make_shared<FutureRuleImpl>();
+    FutureRuleParser parser(ruleString, dynamic_cast<FutureRuleImpl*>(result.get()));
     return result;
   }
 
@@ -475,7 +465,7 @@ namespace orc {
    * An abstraction of the differences between versions.
    */
   class VersionParser {
-  public:
+   public:
     virtual ~VersionParser();
 
     /**
@@ -496,8 +486,7 @@ namespace orc {
     /**
      * Parse the future string
      */
-    virtual std::string parseFutureString(const unsigned char *ptr,
-                                          uint64_t offset,
+    virtual std::string parseFutureString(const unsigned char* ptr, uint64_t offset,
                                           uint64_t length) const = 0;
   };
 
@@ -506,14 +495,12 @@ namespace orc {
   }
 
   static uint32_t decode32(const unsigned char* ptr) {
-    return static_cast<uint32_t>(ptr[0] << 24) |
-      static_cast<uint32_t>(ptr[1] << 16) |
-      static_cast<uint32_t>(ptr[2] << 8) |
-      static_cast<uint32_t>(ptr[3]);
+    return static_cast<uint32_t>(ptr[0] << 24) | static_cast<uint32_t>(ptr[1] << 16) |
+           static_cast<uint32_t>(ptr[2] << 8) | static_cast<uint32_t>(ptr[3]);
   }
 
-  class Version1Parser: public VersionParser {
-  public:
+  class Version1Parser : public VersionParser {
+   public:
     virtual ~Version1Parser() override;
 
     virtual uint64_t getVersion() const override {
@@ -535,9 +522,7 @@ namespace orc {
       return static_cast<int32_t>(decode32(ptr));
     }
 
-    virtual std::string parseFutureString(const unsigned char *,
-                                          uint64_t,
-                                          uint64_t) const override {
+    virtual std::string parseFutureString(const unsigned char*, uint64_t, uint64_t) const override {
       return "";
     }
   };
@@ -546,8 +531,8 @@ namespace orc {
     // PASS
   }
 
-  class Version2Parser: public VersionParser {
-  public:
+  class Version2Parser : public VersionParser {
+   public:
     virtual ~Version2Parser() override;
 
     virtual uint64_t getVersion() const override {
@@ -568,11 +553,9 @@ namespace orc {
       return static_cast<int64_t>(decode32(ptr)) << 32 | decode32(ptr + 4);
     }
 
-    virtual std::string parseFutureString(const unsigned char *ptr,
-                                          uint64_t offset,
+    virtual std::string parseFutureString(const unsigned char* ptr, uint64_t offset,
                                           uint64_t length) const override {
-      return std::string(reinterpret_cast<const char*>(ptr) + offset + 1,
-                         length - 2);
+      return std::string(reinterpret_cast<const char*>(ptr) + offset + 1, length - 2);
     }
   };
 
@@ -580,10 +563,9 @@ namespace orc {
     // PASS
   }
 
-  class TimezoneImpl: public Timezone {
-  public:
-    TimezoneImpl(const std::string& name,
-                 const std::vector<unsigned char> bytes);
+  class TimezoneImpl : public Timezone {
+   public:
+    TimezoneImpl(const std::string& _filename, const std::vector<unsigned char>& buffer);
     virtual ~TimezoneImpl() override;
 
     /**
@@ -605,15 +587,16 @@ namespace orc {
       return clk + getVariant(clk).gmtOffset;
     }
 
-  private:
-    void parseTimeVariants(const unsigned char* ptr,
-                           uint64_t variantOffset,
-                           uint64_t variantCount,
-                           uint64_t nameOffset,
-                           uint64_t nameCount);
-    void parseZoneFile(const unsigned char* ptr,
-                       uint64_t sectionOffset,
-                       uint64_t fileLength,
+    int64_t convertFromUTC(int64_t clk) const override {
+      int64_t adjustedTime = clk - getVariant(clk).gmtOffset;
+      const auto& adjustedReader = getVariant(adjustedTime);
+      return clk - adjustedReader.gmtOffset;
+    }
+
+   private:
+    void parseTimeVariants(const unsigned char* ptr, uint64_t variantOffset, uint64_t variantCount,
+                           uint64_t nameOffset, uint64_t nameCount);
+    void parseZoneFile(const unsigned char* ptr, uint64_t sectionOffset, uint64_t fileLength,
                        const VersionParser& version);
     // filename
     std::string filename;
@@ -644,10 +627,10 @@ namespace orc {
   };
 
   DIAGNOSTIC_PUSH
-  #ifdef __clang__
-    DIAGNOSTIC_IGNORE("-Wglobal-constructors")
-    DIAGNOSTIC_IGNORE("-Wexit-time-destructors")
-  #endif
+#ifdef __clang__
+  DIAGNOSTIC_IGNORE("-Wglobal-constructors")
+  DIAGNOSTIC_IGNORE("-Wexit-time-destructors")
+#endif
   static std::mutex timezone_mutex;
   static std::map<std::string, std::shared_ptr<Timezone> > timezoneCache;
   DIAGNOSTIC_POP
@@ -656,9 +639,8 @@ namespace orc {
     // PASS
   }
 
-  TimezoneImpl::TimezoneImpl(const std::string& _filename,
-                             const std::vector<unsigned char> buffer
-                             ): filename(_filename) {
+  TimezoneImpl::TimezoneImpl(const std::string& _filename, const std::vector<unsigned char>& buffer)
+      : filename(_filename) {
     parseZoneFile(&buffer[0], 0, buffer.size(), Version1Parser());
     // Build the literal for the ORC epoch
     // 2015 Jan 1 00:00:00
@@ -675,7 +657,7 @@ namespace orc {
   }
 
   const char* getTimezoneDirectory() {
-    const char *dir = getenv("TZDIR");
+    const char* dir = getenv("TZDIR");
     if (!dir) {
       dir = DEFAULT_TZDIR;
     }
@@ -689,18 +671,23 @@ namespace orc {
   const Timezone& getTimezoneByFilename(const std::string& filename) {
     // ORC-110
     std::lock_guard<std::mutex> timezone_lock(timezone_mutex);
-    std::map<std::string, std::shared_ptr<Timezone> >::iterator itr =
-      timezoneCache.find(filename);
+    std::map<std::string, std::shared_ptr<Timezone> >::iterator itr = timezoneCache.find(filename);
     if (itr != timezoneCache.end()) {
       return *(itr->second).get();
     }
+    if (!std::filesystem::exists(std::filesystem::path(filename))) {
+      std::stringstream ss;
+      ss << "Time zone file " << filename << " does not exist."
+         << " Please install IANA time zone database and set TZDIR env.";
+      throw TimezoneError(ss.str());
+    }
     try {
-      ORC_UNIQUE_PTR<InputStream> file = readFile(filename);
+      std::unique_ptr<InputStream> file = readFile(filename);
       size_t size = static_cast<size_t>(file->getLength());
       std::vector<unsigned char> buffer(size);
       file->read(&buffer[0], size, 0);
-      timezoneCache[filename] = std::shared_ptr<Timezone>(new TimezoneImpl(filename, buffer));
-    } catch(ParseError& err) {
+      timezoneCache[filename] = std::make_shared<TimezoneImpl>(filename, buffer);
+    } catch (ParseError& err) {
       throw TimezoneError(err.what());
     }
     return *timezoneCache[filename].get();
@@ -732,32 +719,30 @@ namespace orc {
    * Parse a set of bytes as a timezone file as if they came from filename.
    */
   std::unique_ptr<Timezone> getTimezone(const std::string& filename,
-                                        const std::vector<unsigned char>& b){
-    return std::unique_ptr<Timezone>(new TimezoneImpl(filename, b));
+                                        const std::vector<unsigned char>& b) {
+    return std::make_unique<TimezoneImpl>(filename, b);
   }
 
   TimezoneImpl::~TimezoneImpl() {
     // PASS
   }
 
-  void TimezoneImpl::parseTimeVariants(const unsigned char* ptr,
-                                       uint64_t variantOffset,
-                                       uint64_t variantCount,
-                                       uint64_t nameOffset,
+  void TimezoneImpl::parseTimeVariants(const unsigned char* ptr, uint64_t variantOffset,
+                                       uint64_t variantCount, uint64_t nameOffset,
                                        uint64_t nameCount) {
-    for(uint64_t variant=0; variant < variantCount; ++variant) {
+    for (uint64_t variant = 0; variant < variantCount; ++variant) {
       variants[variant].gmtOffset =
-        static_cast<int32_t>(decode32(ptr + variantOffset + 6 * variant));
+          static_cast<int32_t>(decode32(ptr + variantOffset + 6 * variant));
       variants[variant].isDst = ptr[variantOffset + 6 * variant + 4] != 0;
       uint64_t nameStart = ptr[variantOffset + 6 * variant + 5];
       if (nameStart >= nameCount) {
         std::stringstream buffer;
-        buffer << "name out of range in variant " << variant
-               << " - " << nameStart << " >= " << nameCount;
+        buffer << "name out of range in variant " << variant << " - " << nameStart
+               << " >= " << nameCount;
         throw TimezoneError(buffer.str());
       }
-      variants[variant].name = std::string(reinterpret_cast<const char*>(ptr)
-                                           + nameOffset + nameStart);
+      variants[variant].name =
+          std::string(reinterpret_cast<const char*>(ptr) + nameOffset + nameStart);
     }
   }
 
@@ -787,17 +772,14 @@ namespace orc {
    *   IsGmt
    *   FutureString
    */
-  void TimezoneImpl::parseZoneFile(const unsigned char *ptr,
-                                   uint64_t sectionOffset,
-                                   uint64_t fileLength,
-                                   const VersionParser& versionParser) {
-    const uint64_t magicOffset  = sectionOffset + 0;
+  void TimezoneImpl::parseZoneFile(const unsigned char* ptr, uint64_t sectionOffset,
+                                   uint64_t fileLength, const VersionParser& versionParser) {
+    const uint64_t magicOffset = sectionOffset + 0;
     const uint64_t headerOffset = magicOffset + 20;
 
     // check for validity before we start parsing
     if (fileLength < headerOffset + 6 * 4 ||
-        strncmp(reinterpret_cast<const char*>(ptr) + magicOffset, "TZif", 4)
-          != 0) {
+        strncmp(reinterpret_cast<const char*>(ptr) + magicOffset, "TZif", 4) != 0) {
       std::stringstream buffer;
       buffer << "non-tzfile " << filename;
       throw TimezoneError(buffer.str());
@@ -805,24 +787,23 @@ namespace orc {
 
     const uint64_t isGmtCount = decode32(ptr + headerOffset + 0);
     const uint64_t isStdCount = decode32(ptr + headerOffset + 4);
-    const uint64_t leapCount  = decode32(ptr + headerOffset + 8);
-    const uint64_t timeCount  = decode32(ptr + headerOffset + 12);
-    const uint64_t variantCount  = decode32(ptr + headerOffset + 16);
-    const uint64_t nameCount  = decode32(ptr + headerOffset + 20);
+    const uint64_t leapCount = decode32(ptr + headerOffset + 8);
+    const uint64_t timeCount = decode32(ptr + headerOffset + 12);
+    const uint64_t variantCount = decode32(ptr + headerOffset + 16);
+    const uint64_t nameCount = decode32(ptr + headerOffset + 20);
 
     const uint64_t timeOffset = headerOffset + 24;
-    const uint64_t timeVariantOffset =
-      timeOffset + versionParser.getTimeSize() * timeCount;
+    const uint64_t timeVariantOffset = timeOffset + versionParser.getTimeSize() * timeCount;
     const uint64_t variantOffset = timeVariantOffset + timeCount;
     const uint64_t nameOffset = variantOffset + variantCount * 6;
-    const uint64_t sectionLength = nameOffset + nameCount
-      + (versionParser.getTimeSize() + 4) * leapCount
-      + isGmtCount + isStdCount;
+    const uint64_t sectionLength = nameOffset + nameCount +
+                                   (versionParser.getTimeSize() + 4) * leapCount + isGmtCount +
+                                   isStdCount;
 
     if (sectionLength > fileLength) {
       std::stringstream buffer;
-      buffer << "tzfile too short " << filename
-             << " needs " << sectionLength << " and has " << fileLength;
+      buffer << "tzfile too short " << filename << " needs " << sectionLength << " and has "
+             << fileLength;
       throw TimezoneError(buffer.str());
     }
 
@@ -835,24 +816,19 @@ namespace orc {
     variants.resize(variantCount);
     transitions.resize(timeCount);
     currentVariant.resize(timeCount);
-    parseTimeVariants(ptr, variantOffset, variantCount, nameOffset,
-                      nameCount);
+    parseTimeVariants(ptr, variantOffset, variantCount, nameOffset, nameCount);
     bool foundAncient = false;
-    for(uint64_t t=0; t < timeCount; ++t) {
-      transitions[t] =
-        versionParser.parseTime(ptr + timeOffset +
-                                t * versionParser.getTimeSize());
+    for (uint64_t t = 0; t < timeCount; ++t) {
+      transitions[t] = versionParser.parseTime(ptr + timeOffset + t * versionParser.getTimeSize());
       currentVariant[t] = ptr[timeVariantOffset + t];
       if (currentVariant[t] >= variantCount) {
         std::stringstream buffer;
-        buffer << "tzfile rule out of range " << filename
-               << " references rule " << currentVariant[t]
-               << " of " << variantCount;
+        buffer << "tzfile rule out of range " << filename << " references rule "
+               << currentVariant[t] << " of " << variantCount;
         throw TimezoneError(buffer.str());
       }
       // find the oldest standard time and use that as the ancient value
-      if (!foundAncient &&
-          !variants[currentVariant[t]].isDst) {
+      if (!foundAncient && !variants[currentVariant[t]].isDst) {
         foundAncient = true;
         ancientVariant = currentVariant[t];
       }
@@ -860,9 +836,8 @@ namespace orc {
     if (!foundAncient) {
       ancientVariant = 0;
     }
-    futureRule = parseFutureRule(versionParser.parseFutureString
-                                   (ptr, sectionLength,
-                                    fileLength - sectionLength));
+    futureRule = parseFutureRule(
+        versionParser.parseFutureString(ptr, sectionLength, fileLength - sectionLength));
 
     // find the lower bound for applying the future rule
     if (futureRule->isDefined()) {
@@ -897,11 +872,10 @@ namespace orc {
     out << "Timezone file: " << filename << "\n";
     out << "  Version: " << version << "\n";
     futureRule->print(out);
-    for(uint64_t r=0; r < variants.size(); ++r) {
-      out <<  "  Variant " << r << ": "
-          << variants[r].toString() << "\n";
+    for (uint64_t r = 0; r < variants.size(); ++r) {
+      out << "  Variant " << r << ": " << variants[r].toString() << "\n";
     }
-    for(uint64_t t=0; t < transitions.size(); ++t) {
+    for (uint64_t t = 0; t < transitions.size(); ++t) {
       tm timeStruct;
       tm* result = nullptr;
       char buffer[25];
@@ -912,25 +886,21 @@ namespace orc {
           strftime(buffer, sizeof(buffer), "%F %H:%M:%S", &timeStruct);
         }
       }
-      std::cout << "  Transition: " << (result == nullptr ? "null" : buffer)
-                << " (" << transitions[t] << ") -> "
-                << variants[currentVariant[t]].name
-                << "\n";
+      out << "  Transition: " << (result == nullptr ? "null" : buffer) << " (" << transitions[t]
+          << ") -> " << variants[currentVariant[t]].name << "\n";
     }
   }
 
-  TimezoneError::TimezoneError(const std::string& what
-                               ): std::runtime_error(what) {
+  TimezoneError::TimezoneError(const std::string& what) : std::runtime_error(what) {
     // PASS
   }
 
-  TimezoneError::TimezoneError(const TimezoneError& other
-                               ): std::runtime_error(other) {
+  TimezoneError::TimezoneError(const TimezoneError& other) : std::runtime_error(other) {
     // PASS
   }
 
-  TimezoneError::~TimezoneError() ORC_NOEXCEPT {
+  TimezoneError::~TimezoneError() noexcept {
     // PASS
   }
 
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/Timezone.hh b/contrib/libs/apache/orc/c++/src/Timezone.hh
index 6c8b861259..0716c5a3f2 100644
--- a/contrib/libs/apache/orc/c++/src/Timezone.hh
+++ b/contrib/libs/apache/orc/c++/src/Timezone.hh
@@ -23,9 +23,9 @@
 
 #include "Adaptor.hh"
 
+#include <stdint.h>
 #include <memory>
 #include <stdexcept>
-#include <stdint.h>
 #include <string>
 #include <vector>
 
@@ -55,7 +55,7 @@ namespace orc {
    * city in the region (eg. America/Los_Angeles or America/Mexico_City).
    */
   class Timezone {
-  public:
+   public:
     virtual ~Timezone();
 
     /**
@@ -79,12 +79,17 @@ namespace orc {
     /**
      * Get the version of the zone file.
      */
-    virtual uint64_t getVersion() const =0;
+    virtual uint64_t getVersion() const = 0;
 
     /**
      * Convert wall clock time of current timezone to UTC timezone
      */
     virtual int64_t convertToUTC(int64_t clk) const = 0;
+
+    /**
+     * Convert UTC timezone to wall clock time of current timezone
+     */
+    virtual int64_t convertFromUTC(int64_t clk) const = 0;
   };
 
   /**
@@ -105,11 +110,11 @@ namespace orc {
   std::unique_ptr<Timezone> getTimezone(const std::string& filename,
                                         const std::vector<unsigned char>& b);
 
-  class TimezoneError: public std::runtime_error {
-  public:
-    TimezoneError(const std::string& what);
-    TimezoneError(const TimezoneError&);
-    virtual ~TimezoneError() ORC_NOEXCEPT;
+  class TimezoneError : public std::runtime_error {
+   public:
+    explicit TimezoneError(const std::string& what);
+    explicit TimezoneError(const TimezoneError&);
+    ~TimezoneError() noexcept override;
   };
 
   /**
@@ -118,7 +123,7 @@ namespace orc {
    * the future.
    */
   class FutureRule {
-  public:
+   public:
     virtual ~FutureRule();
     virtual bool isDefined() const = 0;
     virtual const TimezoneVariant& getVariant(int64_t clk) const = 0;
@@ -129,6 +134,6 @@ namespace orc {
    * Parse the POSIX TZ string.
    */
   std::shared_ptr<FutureRule> parseFutureRule(const std::string& ruleString);
-}
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/src/TypeImpl.cc b/contrib/libs/apache/orc/c++/src/TypeImpl.cc
index 14517ce164..c427a962b5 100644
--- a/contrib/libs/apache/orc/c++/src/TypeImpl.cc
+++ b/contrib/libs/apache/orc/c++/src/TypeImpl.cc
@@ -16,9 +16,9 @@
  * limitations under the License.
  */
 
+#include "TypeImpl.hh"
 #include "Adaptor.hh"
 #include "orc/Exceptions.hh"
-#include "TypeImpl.hh"
 
 #include <iostream>
 #include <sstream>
@@ -51,8 +51,7 @@ namespace orc {
     subtypeCount = 0;
   }
 
-  TypeImpl::TypeImpl(TypeKind _kind, uint64_t _precision,
-                     uint64_t _scale) {
+  TypeImpl::TypeImpl(TypeKind _kind, uint64_t _precision, uint64_t _scale) {
     parent = nullptr;
     columnId = -1;
     maximumColumnId = -1;
@@ -66,7 +65,7 @@ namespace orc {
   uint64_t TypeImpl::assignIds(uint64_t root) const {
     columnId = static_cast<int64_t>(root);
     uint64_t current = root + 1;
-    for(uint64_t i=0; i < subtypeCount; ++i) {
+    for (uint64_t i = 0; i < subtypeCount; ++i) {
       current = dynamic_cast<TypeImpl*>(subTypes[i].get())->assignIds(current);
     }
     maximumColumnId = static_cast<int64_t>(current) - 1;
@@ -121,8 +120,7 @@ namespace orc {
     return scale;
   }
 
-  Type& TypeImpl::setAttribute(const std::string& key,
-                     const std::string& value) {
+  Type& TypeImpl::setAttribute(const std::string& key, const std::string& value) {
     attributes[key] = value;
     return *this;
   }
@@ -171,8 +169,7 @@ namespace orc {
     subtypeCount += 1;
   }
 
-  Type* TypeImpl::addStructField(const std::string& fieldName,
-                                 std::unique_ptr<Type> fieldType) {
+  Type* TypeImpl::addStructField(const std::string& fieldName, std::unique_ptr<Type> fieldType) {
     addChildType(std::move(fieldType));
     fieldNames.push_back(fieldName);
     return this;
@@ -184,299 +181,301 @@ namespace orc {
   }
 
   bool isUnquotedFieldName(std::string fieldName) {
-    for (auto &ch : fieldName) {
-        if (!isalnum(ch) && ch != '_') {
-          return false;
-        }
+    for (auto& ch : fieldName) {
+      if (!isalnum(ch) && ch != '_') {
+        return false;
+      }
     }
     return true;
   }
 
   std::string TypeImpl::toString() const {
     switch (static_cast<int64_t>(kind)) {
-    case BOOLEAN:
-      return "boolean";
-    case BYTE:
-      return "tinyint";
-    case SHORT:
-      return "smallint";
-    case INT:
-      return "int";
-    case LONG:
-      return "bigint";
-    case FLOAT:
-      return "float";
-    case DOUBLE:
-      return "double";
-    case STRING:
-      return "string";
-    case BINARY:
-      return "binary";
-    case TIMESTAMP:
-      return "timestamp";
-    case TIMESTAMP_INSTANT:
-      return "timestamp with local time zone";
-    case LIST:
-      return "array<" + (subTypes[0] ? subTypes[0]->toString() : "void") + ">";
-    case MAP:
-      return "map<" + (subTypes[0] ? subTypes[0]->toString() : "void") + "," +
-        (subTypes[1] ? subTypes[1]->toString() : "void") +  ">";
-    case STRUCT: {
-      std::string result = "struct<";
-      for(size_t i=0; i < subTypes.size(); ++i) {
-        if (i != 0) {
-          result += ",";
-        }
-        if (isUnquotedFieldName(fieldNames[i])) {
-          result += fieldNames[i];
-        } else {
-          std::string name(fieldNames[i]);
-          size_t pos = 0;
-          while ((pos = name.find("`", pos)) != std::string::npos) {
-            name.replace(pos, 1, "``");
-            pos += 2;
+      case BOOLEAN:
+        return "boolean";
+      case BYTE:
+        return "tinyint";
+      case SHORT:
+        return "smallint";
+      case INT:
+        return "int";
+      case LONG:
+        return "bigint";
+      case FLOAT:
+        return "float";
+      case DOUBLE:
+        return "double";
+      case STRING:
+        return "string";
+      case BINARY:
+        return "binary";
+      case TIMESTAMP:
+        return "timestamp";
+      case TIMESTAMP_INSTANT:
+        return "timestamp with local time zone";
+      case LIST:
+        return "array<" + (subTypes[0] ? subTypes[0]->toString() : "void") + ">";
+      case MAP:
+        return "map<" + (subTypes[0] ? subTypes[0]->toString() : "void") + "," +
+               (subTypes[1] ? subTypes[1]->toString() : "void") + ">";
+      case STRUCT: {
+        std::string result = "struct<";
+        for (size_t i = 0; i < subTypes.size(); ++i) {
+          if (i != 0) {
+            result += ",";
           }
-          result += "`";
-          result += name;
-          result += "`";
+          if (isUnquotedFieldName(fieldNames[i])) {
+            result += fieldNames[i];
+          } else {
+            std::string name(fieldNames[i]);
+            size_t pos = 0;
+            while ((pos = name.find("`", pos)) != std::string::npos) {
+              name.replace(pos, 1, "``");
+              pos += 2;
+            }
+            result += "`";
+            result += name;
+            result += "`";
+          }
+          result += ":";
+          result += subTypes[i]->toString();
         }
-        result += ":";
-        result += subTypes[i]->toString();
+        result += ">";
+        return result;
       }
-      result += ">";
-      return result;
-    }
-    case UNION: {
-      std::string result = "uniontype<";
-      for(size_t i=0; i < subTypes.size(); ++i) {
-        if (i != 0) {
-          result += ",";
+      case UNION: {
+        std::string result = "uniontype<";
+        for (size_t i = 0; i < subTypes.size(); ++i) {
+          if (i != 0) {
+            result += ",";
+          }
+          result += subTypes[i]->toString();
         }
-        result += subTypes[i]->toString();
+        result += ">";
+        return result;
       }
-      result += ">";
-      return result;
-    }
-    case DECIMAL: {
-      std::stringstream result;
-      result << "decimal(" << precision << "," << scale << ")";
-      return result.str();
-    }
-    case DATE:
-      return "date";
-    case VARCHAR: {
-      std::stringstream result;
-      result << "varchar(" << maxLength << ")";
-      return result.str();
-    }
-    case CHAR: {
-      std::stringstream result;
-      result << "char(" << maxLength << ")";
-      return result.str();
-    }
-    default:
-      throw NotImplementedYet("Unknown type");
+      case DECIMAL: {
+        std::stringstream result;
+        result << "decimal(" << precision << "," << scale << ")";
+        return result.str();
+      }
+      case DATE:
+        return "date";
+      case VARCHAR: {
+        std::stringstream result;
+        result << "varchar(" << maxLength << ")";
+        return result.str();
+      }
+      case CHAR: {
+        std::stringstream result;
+        result << "char(" << maxLength << ")";
+        return result.str();
+      }
+      default:
+        throw NotImplementedYet("Unknown type");
     }
   }
 
-  std::unique_ptr<ColumnVectorBatch>
-  TypeImpl::createRowBatch(uint64_t capacity,
-                           MemoryPool& memoryPool,
-                           bool encoded) const {
+  std::unique_ptr<ColumnVectorBatch> TypeImpl::createRowBatch(uint64_t capacity,
+                                                              MemoryPool& memoryPool,
+                                                              bool encoded) const {
+    return createRowBatch(capacity, memoryPool, encoded, /*useTightNumericVector=*/false);
+  }
+
+  std::unique_ptr<ColumnVectorBatch> TypeImpl::createRowBatch(uint64_t capacity,
+                                                              MemoryPool& memoryPool, bool encoded,
+                                                              bool useTightNumericVector) const {
     switch (static_cast<int64_t>(kind)) {
-    case BOOLEAN:
-    case BYTE:
-    case SHORT:
-    case INT:
-    case LONG:
-    case DATE:
-      return std::unique_ptr<ColumnVectorBatch>
-        (new LongVectorBatch(capacity, memoryPool));
-
-    case FLOAT:
-    case DOUBLE:
-      return std::unique_ptr<ColumnVectorBatch>
-        (new DoubleVectorBatch(capacity, memoryPool));
-
-    case STRING:
-    case BINARY:
-    case CHAR:
-    case VARCHAR:
-      return encoded ?
-      std::unique_ptr<ColumnVectorBatch>
-        (new EncodedStringVectorBatch(capacity, memoryPool))
-      : std::unique_ptr<ColumnVectorBatch>
-        (new StringVectorBatch(capacity, memoryPool));
-
-    case TIMESTAMP:
-    case TIMESTAMP_INSTANT:
-      return std::unique_ptr<ColumnVectorBatch>
-        (new TimestampVectorBatch(capacity, memoryPool));
-
-    case STRUCT: {
-      StructVectorBatch *result = new StructVectorBatch(capacity, memoryPool);
-      std::unique_ptr<ColumnVectorBatch> return_value = std::unique_ptr<ColumnVectorBatch>(result);
-      for(uint64_t i=0; i < getSubtypeCount(); ++i) {
-          result->fields.push_back(getSubtype(i)->
-                                   createRowBatch(capacity,
-                                                  memoryPool, encoded).release());
+      case BOOLEAN:
+        if (useTightNumericVector) {
+          return std::make_unique<ByteVectorBatch>(capacity, memoryPool);
+        }
+        return std::make_unique<LongVectorBatch>(capacity, memoryPool);
+      case BYTE:
+        if (useTightNumericVector) {
+          return std::make_unique<ByteVectorBatch>(capacity, memoryPool);
+        }
+        return std::make_unique<LongVectorBatch>(capacity, memoryPool);
+      case SHORT:
+        if (useTightNumericVector) {
+          return std::make_unique<ShortVectorBatch>(capacity, memoryPool);
+        }
+        return std::make_unique<LongVectorBatch>(capacity, memoryPool);
+      case INT:
+        if (useTightNumericVector) {
+          return std::make_unique<IntVectorBatch>(capacity, memoryPool);
+        }
+        return std::make_unique<LongVectorBatch>(capacity, memoryPool);
+      case LONG:
+      case DATE:
+        return std::make_unique<LongVectorBatch>(capacity, memoryPool);
+
+      case FLOAT:
+        if (useTightNumericVector) {
+          return std::make_unique<FloatVectorBatch>(capacity, memoryPool);
+        }
+        return std::make_unique<DoubleVectorBatch>(capacity, memoryPool);
+      case DOUBLE:
+        return std::make_unique<DoubleVectorBatch>(capacity, memoryPool);
+
+      case STRING:
+      case BINARY:
+      case CHAR:
+      case VARCHAR:
+        return encoded ? std::make_unique<EncodedStringVectorBatch>(capacity, memoryPool)
+                       : std::make_unique<StringVectorBatch>(capacity, memoryPool);
+
+      case TIMESTAMP:
+      case TIMESTAMP_INSTANT:
+        return std::make_unique<TimestampVectorBatch>(capacity, memoryPool);
+
+      case STRUCT: {
+        auto result = std::make_unique<StructVectorBatch>(capacity, memoryPool);
+        for (uint64_t i = 0; i < getSubtypeCount(); ++i) {
+          result->fields.push_back(
+              getSubtype(i)
+                  ->createRowBatch(capacity, memoryPool, encoded, useTightNumericVector)
+                  .release());
+        }
+        return result;
       }
-      return return_value;
-    }
 
-    case LIST: {
-      ListVectorBatch* result = new ListVectorBatch(capacity, memoryPool);
-      std::unique_ptr<ColumnVectorBatch> return_value = std::unique_ptr<ColumnVectorBatch>(result);
-      if (getSubtype(0) != nullptr) {
-        result->elements = getSubtype(0)->createRowBatch(capacity, memoryPool, encoded);
+      case LIST: {
+        auto result = std::make_unique<ListVectorBatch>(capacity, memoryPool);
+        if (getSubtype(0) != nullptr) {
+          result->elements =
+              getSubtype(0)->createRowBatch(capacity, memoryPool, encoded, useTightNumericVector);
+        }
+        return result;
       }
-      return return_value;
-    }
 
-    case MAP: {
-      MapVectorBatch* result = new MapVectorBatch(capacity, memoryPool);
-      std::unique_ptr<ColumnVectorBatch> return_value = std::unique_ptr<ColumnVectorBatch>(result);
-      if (getSubtype(0) != nullptr) {
-        result->keys = getSubtype(0)->createRowBatch(capacity, memoryPool, encoded);
-      }
-      if (getSubtype(1) != nullptr) {
-        result->elements = getSubtype(1)->createRowBatch(capacity, memoryPool, encoded);
+      case MAP: {
+        auto result = std::make_unique<MapVectorBatch>(capacity, memoryPool);
+        if (getSubtype(0) != nullptr) {
+          result->keys =
+              getSubtype(0)->createRowBatch(capacity, memoryPool, encoded, useTightNumericVector);
+        }
+        if (getSubtype(1) != nullptr) {
+          result->elements =
+              getSubtype(1)->createRowBatch(capacity, memoryPool, encoded, useTightNumericVector);
+        }
+        return result;
       }
-      return return_value;
-    }
 
-    case DECIMAL: {
-      if (getPrecision() == 0 || getPrecision() > 18) {
-        return std::unique_ptr<ColumnVectorBatch>
-          (new Decimal128VectorBatch(capacity, memoryPool));
-      } else {
-        return std::unique_ptr<ColumnVectorBatch>
-          (new Decimal64VectorBatch(capacity, memoryPool));
+      case DECIMAL: {
+        if (getPrecision() == 0 || getPrecision() > 18) {
+          return std::make_unique<Decimal128VectorBatch>(capacity, memoryPool);
+        } else {
+          return std::make_unique<Decimal64VectorBatch>(capacity, memoryPool);
+        }
       }
-    }
 
-    case UNION: {
-      UnionVectorBatch *result = new UnionVectorBatch(capacity, memoryPool);
-      std::unique_ptr<ColumnVectorBatch> return_value = std::unique_ptr<ColumnVectorBatch>(result);
-      for(uint64_t i=0; i < getSubtypeCount(); ++i) {
-          result->children.push_back(getSubtype(i)->createRowBatch(capacity,
-                                                                   memoryPool, encoded)
-                                     .release());
+      case UNION: {
+        auto result = std::make_unique<UnionVectorBatch>(capacity, memoryPool);
+        for (uint64_t i = 0; i < getSubtypeCount(); ++i) {
+          result->children.push_back(
+              getSubtype(i)
+                  ->createRowBatch(capacity, memoryPool, encoded, useTightNumericVector)
+                  .release());
+        }
+        return result;
       }
-      return return_value;
-    }
 
-    default:
-      throw NotImplementedYet("not supported yet");
+      default:
+        throw NotImplementedYet("not supported yet");
     }
   }
 
   std::unique_ptr<Type> createPrimitiveType(TypeKind kind) {
-    return std::unique_ptr<Type>(new TypeImpl(kind));
+    return std::make_unique<TypeImpl>(kind);
   }
 
-  std::unique_ptr<Type> createCharType(TypeKind kind,
-                                       uint64_t maxLength) {
-    return std::unique_ptr<Type>(new TypeImpl(kind, maxLength));
+  std::unique_ptr<Type> createCharType(TypeKind kind, uint64_t maxLength) {
+    return std::make_unique<TypeImpl>(kind, maxLength);
   }
 
-  std::unique_ptr<Type> createDecimalType(uint64_t precision,
-                                          uint64_t scale) {
-    return std::unique_ptr<Type>(new TypeImpl(DECIMAL, precision, scale));
+  std::unique_ptr<Type> createDecimalType(uint64_t precision, uint64_t scale) {
+    return std::make_unique<TypeImpl>(DECIMAL, precision, scale);
   }
 
   std::unique_ptr<Type> createStructType() {
-    return std::unique_ptr<Type>(new TypeImpl(STRUCT));
+    return std::make_unique<TypeImpl>(STRUCT);
   }
 
   std::unique_ptr<Type> createListType(std::unique_ptr<Type> elements) {
-    TypeImpl* result = new TypeImpl(LIST);
-    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(result);
+    auto result = std::make_unique<TypeImpl>(LIST);
     result->addChildType(std::move(elements));
-    return return_value;
+    return result;
   }
 
-  std::unique_ptr<Type> createMapType(std::unique_ptr<Type> key,
-                                      std::unique_ptr<Type> value) {
-    TypeImpl* result = new TypeImpl(MAP);
-    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(result);
+  std::unique_ptr<Type> createMapType(std::unique_ptr<Type> key, std::unique_ptr<Type> value) {
+    auto result = std::make_unique<TypeImpl>(MAP);
     result->addChildType(std::move(key));
     result->addChildType(std::move(value));
-    return return_value;
+    return result;
   }
 
   std::unique_ptr<Type> createUnionType() {
-    return std::unique_ptr<Type>(new TypeImpl(UNION));
+    return std::make_unique<TypeImpl>(UNION);
   }
 
   std::string printProtobufMessage(const google::protobuf::Message& message);
-  std::unique_ptr<Type> convertType(const proto::Type& type,
-                                    const proto::Footer& footer) {
+  std::unique_ptr<Type> convertType(const proto::Type& type, const proto::Footer& footer) {
     std::unique_ptr<Type> ret;
     switch (static_cast<int64_t>(type.kind())) {
-
-    case proto::Type_Kind_BOOLEAN:
-    case proto::Type_Kind_BYTE:
-    case proto::Type_Kind_SHORT:
-    case proto::Type_Kind_INT:
-    case proto::Type_Kind_LONG:
-    case proto::Type_Kind_FLOAT:
-    case proto::Type_Kind_DOUBLE:
-    case proto::Type_Kind_STRING:
-    case proto::Type_Kind_BINARY:
-    case proto::Type_Kind_TIMESTAMP:
-    case proto::Type_Kind_TIMESTAMP_INSTANT:
-    case proto::Type_Kind_DATE:
-      ret = std::unique_ptr<Type>
-        (new TypeImpl(static_cast<TypeKind>(type.kind())));
-      break;
-
-    case proto::Type_Kind_CHAR:
-    case proto::Type_Kind_VARCHAR:
-      ret = std::unique_ptr<Type>
-        (new TypeImpl(static_cast<TypeKind>(type.kind()),
-                      type.maximumlength()));
-      break;
-
-    case proto::Type_Kind_DECIMAL:
-      ret = std::unique_ptr<Type>
-        (new TypeImpl(DECIMAL, type.precision(), type.scale()));
-      break;
-
-    case proto::Type_Kind_LIST:
-    case proto::Type_Kind_MAP:
-    case proto::Type_Kind_UNION: {
-      TypeImpl* result = new TypeImpl(static_cast<TypeKind>(type.kind()));
-      ret = std::unique_ptr<Type>(result);
-      if (type.kind() == proto::Type_Kind_LIST && type.subtypes_size() != 1)
-        throw ParseError("Illegal LIST type that doesn't contain one subtype");
-      if (type.kind() == proto::Type_Kind_MAP && type.subtypes_size() != 2)
-        throw ParseError("Illegal MAP type that doesn't contain two subtypes");
-      if (type.kind() == proto::Type_Kind_UNION && type.subtypes_size() == 0)
-        throw ParseError("Illegal UNION type that doesn't contain any subtypes");
-      for(int i=0; i < type.subtypes_size(); ++i) {
-        result->addUnionChild(convertType(footer.types(static_cast<int>
-                                                       (type.subtypes(i))),
-                                          footer));
+      case proto::Type_Kind_BOOLEAN:
+      case proto::Type_Kind_BYTE:
+      case proto::Type_Kind_SHORT:
+      case proto::Type_Kind_INT:
+      case proto::Type_Kind_LONG:
+      case proto::Type_Kind_FLOAT:
+      case proto::Type_Kind_DOUBLE:
+      case proto::Type_Kind_STRING:
+      case proto::Type_Kind_BINARY:
+      case proto::Type_Kind_TIMESTAMP:
+      case proto::Type_Kind_TIMESTAMP_INSTANT:
+      case proto::Type_Kind_DATE:
+        ret = std::make_unique<TypeImpl>(static_cast<TypeKind>(type.kind()));
+        break;
+
+      case proto::Type_Kind_CHAR:
+      case proto::Type_Kind_VARCHAR:
+        ret = std::make_unique<TypeImpl>(static_cast<TypeKind>(type.kind()), type.maximum_length());
+        break;
+
+      case proto::Type_Kind_DECIMAL:
+        ret = std::make_unique<TypeImpl>(DECIMAL, type.precision(), type.scale());
+        break;
+
+      case proto::Type_Kind_LIST:
+      case proto::Type_Kind_MAP:
+      case proto::Type_Kind_UNION: {
+        ret = std::make_unique<TypeImpl>(static_cast<TypeKind>(type.kind()));
+        if (type.kind() == proto::Type_Kind_LIST && type.subtypes_size() != 1)
+          throw ParseError("Illegal LIST type that doesn't contain one subtype");
+        if (type.kind() == proto::Type_Kind_MAP && type.subtypes_size() != 2)
+          throw ParseError("Illegal MAP type that doesn't contain two subtypes");
+        if (type.kind() == proto::Type_Kind_UNION && type.subtypes_size() == 0)
+          throw ParseError("Illegal UNION type that doesn't contain any subtypes");
+        for (int i = 0; i < type.subtypes_size(); ++i) {
+          ret->addUnionChild(convertType(footer.types(static_cast<int>(type.subtypes(i))), footer));
+        }
+        break;
       }
-      break;
-    }
-
-    case proto::Type_Kind_STRUCT: {
-      TypeImpl* result = new TypeImpl(STRUCT);
-      ret = std::unique_ptr<Type>(result);
-      if (type.subtypes_size() > type.fieldnames_size())
-        throw ParseError("Illegal STRUCT type that contains less fieldnames than subtypes");
-      for(int i=0; i < type.subtypes_size(); ++i) {
-        result->addStructField(type.fieldnames(i),
-                               convertType(footer.types(static_cast<int>
-                                                        (type.subtypes(i))),
-                                           footer));
+
+      case proto::Type_Kind_STRUCT: {
+        ret = std::make_unique<TypeImpl>(STRUCT);
+        if (type.subtypes_size() > type.field_names_size())
+          throw ParseError("Illegal STRUCT type that contains less field_names than subtypes");
+        for (int i = 0; i < type.subtypes_size(); ++i) {
+          ret->addStructField(
+              type.field_names(i),
+              convertType(footer.types(static_cast<int>(type.subtypes(i))), footer));
+        }
+        break;
       }
-      break;
-    }
-    default:
-      throw NotImplementedYet("Unknown type kind");
+      default:
+        throw NotImplementedYet("Unknown type kind");
     }
     for (int i = 0; i < type.attributes_size(); ++i) {
       const auto& attribute = type.attributes(i);
@@ -493,143 +492,126 @@ namespace orc {
    * @param selected is each column by id selected
    * @return a clone of the fileType filtered by the selection array
    */
-  std::unique_ptr<Type> buildSelectedType(const Type *fileType,
-                                          const std::vector<bool>& selected) {
+  std::unique_ptr<Type> buildSelectedType(const Type* fileType, const std::vector<bool>& selected) {
     if (fileType == nullptr || !selected[fileType->getColumnId()]) {
-      return std::unique_ptr<Type>();
+      return nullptr;
     }
 
-    TypeImpl* result;
+    std::unique_ptr<TypeImpl> result;
     switch (static_cast<int>(fileType->getKind())) {
-    case BOOLEAN:
-    case BYTE:
-    case SHORT:
-    case INT:
-    case LONG:
-    case FLOAT:
-    case DOUBLE:
-    case STRING:
-    case BINARY:
-    case TIMESTAMP:
-    case TIMESTAMP_INSTANT:
-    case DATE:
-      result = new TypeImpl(fileType->getKind());
-      break;
-
-    case DECIMAL:
-      result= new TypeImpl(fileType->getKind(),
-                           fileType->getPrecision(), fileType->getScale());
-      break;
-
-    case VARCHAR:
-    case CHAR:
-      result = new TypeImpl(fileType->getKind(), fileType->getMaximumLength());
-      break;
-
-    case LIST:
-      result = new TypeImpl(fileType->getKind());
-      result->addChildType(buildSelectedType(fileType->getSubtype(0),
-                                             selected));
-      break;
-
-    case MAP:
-      result = new TypeImpl(fileType->getKind());
-      result->addChildType(buildSelectedType(fileType->getSubtype(0),
-                                             selected));
-      result->addChildType(buildSelectedType(fileType->getSubtype(1),
-                                             selected));
-      break;
-
-    case STRUCT: {
-      result = new TypeImpl(fileType->getKind());
-      for(uint64_t child=0; child < fileType->getSubtypeCount(); ++child) {
-        std::unique_ptr<Type> childType =
-          buildSelectedType(fileType->getSubtype(child), selected);
-        if (childType.get() != nullptr) {
-          result->addStructField(fileType->getFieldName(child),
-                                 std::move(childType));
+      case BOOLEAN:
+      case BYTE:
+      case SHORT:
+      case INT:
+      case LONG:
+      case FLOAT:
+      case DOUBLE:
+      case STRING:
+      case BINARY:
+      case TIMESTAMP:
+      case TIMESTAMP_INSTANT:
+      case DATE:
+        result = std::make_unique<TypeImpl>(fileType->getKind());
+        break;
+
+      case DECIMAL:
+        result = std::make_unique<TypeImpl>(fileType->getKind(), fileType->getPrecision(),
+                                            fileType->getScale());
+        break;
+
+      case VARCHAR:
+      case CHAR:
+        result = std::make_unique<TypeImpl>(fileType->getKind(), fileType->getMaximumLength());
+        break;
+
+      case LIST:
+        result = std::make_unique<TypeImpl>(fileType->getKind());
+        result->addChildType(buildSelectedType(fileType->getSubtype(0), selected));
+        break;
+
+      case MAP:
+        result = std::make_unique<TypeImpl>(fileType->getKind());
+        result->addChildType(buildSelectedType(fileType->getSubtype(0), selected));
+        result->addChildType(buildSelectedType(fileType->getSubtype(1), selected));
+        break;
+
+      case STRUCT: {
+        result = std::make_unique<TypeImpl>(fileType->getKind());
+        for (uint64_t child = 0; child < fileType->getSubtypeCount(); ++child) {
+          std::unique_ptr<Type> childType =
+              buildSelectedType(fileType->getSubtype(child), selected);
+          if (childType.get() != nullptr) {
+            result->addStructField(fileType->getFieldName(child), std::move(childType));
+          }
         }
+        break;
       }
-      break;
-    }
 
-    case UNION: {
-      result = new TypeImpl(fileType->getKind());
-      for(uint64_t child=0; child < fileType->getSubtypeCount(); ++child) {
-        std::unique_ptr<Type> childType =
-          buildSelectedType(fileType->getSubtype(child), selected);
-        if (childType.get() != nullptr) {
-          result->addUnionChild(std::move(childType));
+      case UNION: {
+        result = std::make_unique<TypeImpl>(fileType->getKind());
+        for (uint64_t child = 0; child < fileType->getSubtypeCount(); ++child) {
+          std::unique_ptr<Type> childType =
+              buildSelectedType(fileType->getSubtype(child), selected);
+          if (childType.get() != nullptr) {
+            result->addUnionChild(std::move(childType));
+          }
         }
+        break;
       }
-      break;
-    }
 
-    default:
-      throw NotImplementedYet("Unknown type kind");
+      default:
+        throw NotImplementedYet("Unknown type kind");
     }
     result->setIds(fileType->getColumnId(), fileType->getMaximumColumnId());
     for (auto& key : fileType->getAttributeKeys()) {
       const auto& value = fileType->getAttributeValue(key);
       result->setAttribute(key, value);
     }
-    return std::unique_ptr<Type>(result);
+    return result;
   }
 
-  ORC_UNIQUE_PTR<Type> Type::buildTypeFromString(const std::string& input) {
+  std::unique_ptr<Type> Type::buildTypeFromString(const std::string& input) {
     size_t size = input.size();
-    std::pair<ORC_UNIQUE_PTR<Type>, size_t> res =
-      TypeImpl::parseType(input, 0, size);
+    std::pair<std::unique_ptr<Type>, size_t> res = TypeImpl::parseType(input, 0, size);
     if (res.second != size) {
       throw std::logic_error("Invalid type string.");
     }
     return std::move(res.first);
   }
 
-  std::unique_ptr<Type> TypeImpl::parseArrayType(const std::string &input,
-                                                 size_t start,
+  std::unique_ptr<Type> TypeImpl::parseArrayType(const std::string& input, size_t start,
                                                  size_t end) {
-    TypeImpl* arrayType = new TypeImpl(LIST);
-    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(arrayType);
+    auto result = std::make_unique<TypeImpl>(LIST);
     if (input[start] != '<') {
       throw std::logic_error("Missing < after array.");
     }
-    std::pair<ORC_UNIQUE_PTR<Type>, size_t> res =
-      TypeImpl::parseType(input, start + 1, end);
+    std::pair<std::unique_ptr<Type>, size_t> res = TypeImpl::parseType(input, start + 1, end);
     if (res.second != end) {
-      throw std::logic_error(
-        "Array type must contain exactly one sub type.");
+      throw std::logic_error("Array type must contain exactly one sub type.");
     }
-    arrayType->addChildType(std::move(res.first));
-    return return_value;
+    result->addChildType(std::move(res.first));
+    return result;
   }
 
-  std::unique_ptr<Type> TypeImpl::parseMapType(const std::string &input,
-                                               size_t start,
-                                               size_t end) {
-    TypeImpl* mapType = new TypeImpl(MAP);
-    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(mapType);
+  std::unique_ptr<Type> TypeImpl::parseMapType(const std::string& input, size_t start, size_t end) {
+    auto result = std::make_unique<TypeImpl>(MAP);
     if (input[start] != '<') {
       throw std::logic_error("Missing < after map.");
     }
-    std::pair<ORC_UNIQUE_PTR<Type>, size_t> key =
-      TypeImpl::parseType(input, start + 1, end);
+    std::pair<std::unique_ptr<Type>, size_t> key = TypeImpl::parseType(input, start + 1, end);
     if (input[key.second] != ',') {
       throw std::logic_error("Missing comma after key.");
     }
-    std::pair<ORC_UNIQUE_PTR<Type>, size_t> val =
-      TypeImpl::parseType(input, key.second + 1, end);
+    std::pair<std::unique_ptr<Type>, size_t> val = TypeImpl::parseType(input, key.second + 1, end);
     if (val.second != end) {
-      throw std::logic_error(
-        "Map type must contain exactly two sub types.");
+      throw std::logic_error("Map type must contain exactly two sub types.");
     }
-    mapType->addChildType(std::move(key.first));
-    mapType->addChildType(std::move(val.first));
-    return return_value;
+    result->addChildType(std::move(key.first));
+    result->addChildType(std::move(val.first));
+    return result;
   }
 
-  std::pair<std::string, size_t> TypeImpl::parseName(const std::string &input,
-                                                     const size_t start,
+  std::pair<std::string, size_t> TypeImpl::parseName(const std::string& input, const size_t start,
                                                      const size_t end) {
     size_t pos = start;
     if (input[pos] == '`') {
@@ -638,7 +620,7 @@ namespace orc {
       while (pos < end) {
         char ch = input[++pos];
         if (ch == '`') {
-          if (pos < end && input[pos+1] == '`') {
+          if (pos < end && input[pos + 1] == '`') {
             ++pos;
             oss.put('`');
           } else {
@@ -667,11 +649,9 @@ namespace orc {
     }
   }
 
-  std::unique_ptr<Type> TypeImpl::parseStructType(const std::string &input,
-                                                  size_t start,
+  std::unique_ptr<Type> TypeImpl::parseStructType(const std::string& input, size_t start,
                                                   size_t end) {
-    TypeImpl* structType = new TypeImpl(STRUCT);
-    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(structType);
+    auto result = std::make_unique<TypeImpl>(STRUCT);
     size_t pos = start + 1;
     if (input[start] != '<') {
       throw std::logic_error("Missing < after struct.");
@@ -682,9 +662,8 @@ namespace orc {
       if (input[pos] != ':') {
         throw std::logic_error("Invalid struct type. No field name set.");
       }
-      std::pair<ORC_UNIQUE_PTR<Type>, size_t> typeRes =
-        TypeImpl::parseType(input, ++pos, end);
-      structType->addStructField(nameRes.first, std::move(typeRes.first));
+      std::pair<std::unique_ptr<Type>, size_t> typeRes = TypeImpl::parseType(input, ++pos, end);
+      result->addStructField(nameRes.first, std::move(typeRes.first));
       pos = typeRes.second;
       if (pos != end && input[pos] != ',') {
         throw std::logic_error("Missing comma after field.");
@@ -692,22 +671,19 @@ namespace orc {
       ++pos;
     }
 
-    return return_value;
+    return result;
   }
 
-  std::unique_ptr<Type> TypeImpl::parseUnionType(const std::string &input,
-                                                 size_t start,
+  std::unique_ptr<Type> TypeImpl::parseUnionType(const std::string& input, size_t start,
                                                  size_t end) {
-    TypeImpl* unionType = new TypeImpl(UNION);
-    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(unionType);
+    auto result = std::make_unique<TypeImpl>(UNION);
     size_t pos = start + 1;
     if (input[start] != '<') {
       throw std::logic_error("Missing < after uniontype.");
     }
     while (pos < end) {
-      std::pair<ORC_UNIQUE_PTR<Type>, size_t> res =
-        TypeImpl::parseType(input, pos, end);
-      unionType->addChildType(std::move(res.first));
+      std::pair<std::unique_ptr<Type>, size_t> res = TypeImpl::parseType(input, pos, end);
+      result->addChildType(std::move(res.first));
       pos = res.second;
       if (pos != end && input[pos] != ',') {
         throw std::logic_error("Missing comma after union sub type.");
@@ -715,11 +691,10 @@ namespace orc {
       ++pos;
     }
 
-    return return_value;
+    return result;
   }
 
-  std::unique_ptr<Type> TypeImpl::parseDecimalType(const std::string &input,
-                                                   size_t start,
+  std::unique_ptr<Type> TypeImpl::parseDecimalType(const std::string& input, size_t start,
                                                    size_t end) {
     if (input[start] != '(') {
       throw std::logic_error("Missing ( after decimal.");
@@ -729,61 +704,54 @@ namespace orc {
     if (sep + 1 >= end || sep == std::string::npos) {
       throw std::logic_error("Decimal type must specify precision and scale.");
     }
-    uint64_t precision =
-      static_cast<uint64_t>(atoi(input.substr(pos, sep - pos).c_str()));
-    uint64_t scale =
-      static_cast<uint64_t>(atoi(input.substr(sep + 1, end - sep - 1).c_str()));
-    return std::unique_ptr<Type>(new TypeImpl(DECIMAL, precision, scale));
+    uint64_t precision = static_cast<uint64_t>(atoi(input.substr(pos, sep - pos).c_str()));
+    uint64_t scale = static_cast<uint64_t>(atoi(input.substr(sep + 1, end - sep - 1).c_str()));
+    return std::make_unique<TypeImpl>(DECIMAL, precision, scale);
   }
 
-  void validatePrimitiveType(std::string category,
-                             const std::string &input,
-                             const size_t pos) {
+  void validatePrimitiveType(std::string category, const std::string& input, const size_t pos) {
     if (input[pos] == '<' || input[pos] == '(') {
       std::ostringstream oss;
-      oss << "Invalid " << input[pos] << " after "
-        << category << " type.";
+      oss << "Invalid " << input[pos] << " after " << category << " type.";
       throw std::logic_error(oss.str());
     }
   }
 
-  std::unique_ptr<Type> TypeImpl::parseCategory(std::string category,
-                                                const std::string &input,
-                                                size_t start,
-                                                size_t end) {
+  std::unique_ptr<Type> TypeImpl::parseCategory(std::string category, const std::string& input,
+                                                size_t start, size_t end) {
     if (category == "boolean") {
       validatePrimitiveType(category, input, start);
-      return std::unique_ptr<Type>(new TypeImpl(BOOLEAN));
+      return std::make_unique<TypeImpl>(BOOLEAN);
     } else if (category == "tinyint") {
       validatePrimitiveType(category, input, start);
-      return std::unique_ptr<Type>(new TypeImpl(BYTE));
+      return std::make_unique<TypeImpl>(BYTE);
     } else if (category == "smallint") {
       validatePrimitiveType(category, input, start);
-      return std::unique_ptr<Type>(new TypeImpl(SHORT));
+      return std::make_unique<TypeImpl>(SHORT);
     } else if (category == "int") {
       validatePrimitiveType(category, input, start);
-      return std::unique_ptr<Type>(new TypeImpl(INT));
+      return std::make_unique<TypeImpl>(INT);
     } else if (category == "bigint") {
       validatePrimitiveType(category, input, start);
-      return std::unique_ptr<Type>(new TypeImpl(LONG));
+      return std::make_unique<TypeImpl>(LONG);
     } else if (category == "float") {
       validatePrimitiveType(category, input, start);
-      return std::unique_ptr<Type>(new TypeImpl(FLOAT));
+      return std::make_unique<TypeImpl>(FLOAT);
     } else if (category == "double") {
       validatePrimitiveType(category, input, start);
-      return std::unique_ptr<Type>(new TypeImpl(DOUBLE));
+      return std::make_unique<TypeImpl>(DOUBLE);
     } else if (category == "string") {
       validatePrimitiveType(category, input, start);
-      return std::unique_ptr<Type>(new TypeImpl(STRING));
+      return std::make_unique<TypeImpl>(STRING);
     } else if (category == "binary") {
       validatePrimitiveType(category, input, start);
-      return std::unique_ptr<Type>(new TypeImpl(BINARY));
+      return std::make_unique<TypeImpl>(BINARY);
     } else if (category == "timestamp") {
       validatePrimitiveType(category, input, start);
-      return std::unique_ptr<Type>(new TypeImpl(TIMESTAMP));
+      return std::make_unique<TypeImpl>(TIMESTAMP);
     } else if (category == "timestamp with local time zone") {
       validatePrimitiveType(category, input, start);
-      return std::unique_ptr<Type>(new TypeImpl(TIMESTAMP_INSTANT));
+      return std::make_unique<TypeImpl>(TIMESTAMP_INSTANT);
     } else if (category == "array") {
       return parseArrayType(input, start, end);
     } else if (category == "map") {
@@ -796,27 +764,28 @@ namespace orc {
       return parseDecimalType(input, start, end);
     } else if (category == "date") {
       validatePrimitiveType(category, input, start);
-      return std::unique_ptr<Type>(new TypeImpl(DATE));
+      return std::make_unique<TypeImpl>(DATE);
     } else if (category == "varchar") {
       if (input[start] != '(') {
         throw std::logic_error("Missing ( after varchar.");
       }
-      uint64_t maxLength = static_cast<uint64_t>(
-        atoi(input.substr(start + 1, end - start + 1).c_str()));
-      return std::unique_ptr<Type>(new TypeImpl(VARCHAR, maxLength));
+      uint64_t maxLength =
+          static_cast<uint64_t>(atoi(input.substr(start + 1, end - start + 1).c_str()));
+      return std::make_unique<TypeImpl>(VARCHAR, maxLength);
     } else if (category == "char") {
       if (input[start] != '(') {
         throw std::logic_error("Missing ( after char.");
       }
-      uint64_t maxLength = static_cast<uint64_t>(
-        atoi(input.substr(start + 1, end - start + 1).c_str()));
-      return std::unique_ptr<Type>(new TypeImpl(CHAR, maxLength));
+      uint64_t maxLength =
+          static_cast<uint64_t>(atoi(input.substr(start + 1, end - start + 1).c_str()));
+      return std::make_unique<TypeImpl>(CHAR, maxLength);
     } else {
       throw std::logic_error("Unknown type " + category);
     }
   }
 
-  std::pair<ORC_UNIQUE_PTR<Type>, size_t> TypeImpl::parseType(const std::string &input, size_t start, size_t end) {
+  std::pair<std::unique_ptr<Type>, size_t> TypeImpl::parseType(const std::string& input,
+                                                               size_t start, size_t end) {
     size_t pos = start;
     while (pos < end && (isalpha(input[pos]) || input[pos] == ' ')) {
       ++pos;
@@ -854,4 +823,18 @@ namespace orc {
     return std::make_pair(parseCategory(category, input, pos, nextPos), endPos);
   }
 
-}
+  const Type* TypeImpl::getTypeByColumnId(uint64_t colIdx) const {
+    if (getColumnId() == colIdx) {
+      return this;
+    }
+
+    for (uint64_t i = 0; i != getSubtypeCount(); ++i) {
+      const Type* ret = getSubtype(i)->getTypeByColumnId(colIdx);
+      if (ret != nullptr) {
+        return ret;
+      }
+    }
+    return nullptr;
+  }
+
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/TypeImpl.hh b/contrib/libs/apache/orc/c++/src/TypeImpl.hh
index 88c4737d18..6d0743793a 100644
--- a/contrib/libs/apache/orc/c++/src/TypeImpl.hh
+++ b/contrib/libs/apache/orc/c++/src/TypeImpl.hh
@@ -28,8 +28,8 @@
 
 namespace orc {
 
-  class TypeImpl: public Type {
-  private:
+  class TypeImpl : public Type {
+   private:
     TypeImpl* parent;
     mutable int64_t columnId;
     mutable int64_t maximumColumnId;
@@ -42,7 +42,7 @@ namespace orc {
     uint64_t scale;
     std::map<std::string, std::string> attributes;
 
-  public:
+   public:
     /**
      * Create most of the primitive types.
      */
@@ -56,8 +56,7 @@ namespace orc {
     /**
      * Create decimal type.
      */
-    TypeImpl(TypeKind kind, uint64_t precision,
-             uint64_t scale);
+    TypeImpl(TypeKind kind, uint64_t precision, uint64_t scale);
 
     uint64_t getColumnId() const override;
 
@@ -77,8 +76,7 @@ namespace orc {
 
     uint64_t getScale() const override;
 
-    Type& setAttribute(const std::string& key,
-                       const std::string& value) override;
+    Type& setAttribute(const std::string& key, const std::string& value) override;
 
     bool hasAttributeKey(const std::string& key) const override;
 
@@ -90,14 +88,16 @@ namespace orc {
 
     std::string toString() const override;
 
-    Type* addStructField(const std::string& fieldName,
-                         std::unique_ptr<Type> fieldType) override;
+    const Type* getTypeByColumnId(uint64_t colIdx) const override;
+    Type* addStructField(const std::string& fieldName, std::unique_ptr<Type> fieldType) override;
     Type* addUnionChild(std::unique_ptr<Type> fieldType) override;
 
-    std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size,
-                                                      MemoryPool& memoryPool,
-                                                      bool encoded = false
-                                                      ) const override;
+    std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size, MemoryPool& memoryPool,
+                                                      bool encoded = false) const override;
+
+    std::unique_ptr<ColumnVectorBatch> createRowBatch(
+        uint64_t size, MemoryPool& memoryPool, bool encoded = false,
+        bool useTightNumericVector = false) const override;
 
     /**
      * Explicitly set the column ids. Only for internal usage.
@@ -109,12 +109,10 @@ namespace orc {
      */
     void addChildType(std::unique_ptr<Type> childType);
 
-    static std::pair<ORC_UNIQUE_PTR<Type>, size_t> parseType(
-      const std::string &input,
-      size_t start,
-      size_t end);
+    static std::pair<std::unique_ptr<Type>, size_t> parseType(const std::string& input,
+                                                              size_t start, size_t end);
 
-  private:
+   private:
     /**
      * Assign ids to this node and its children giving this
      * node rootId.
@@ -133,9 +131,7 @@ namespace orc {
      * @param start start position of the input string
      * @param end end position of the input string
      */
-    static std::unique_ptr<Type> parseArrayType(const std::string &input,
-                                                size_t start,
-                                                size_t end);
+    static std::unique_ptr<Type> parseArrayType(const std::string& input, size_t start, size_t end);
 
     /**
      * Parse map type from string
@@ -143,9 +139,7 @@ namespace orc {
      * @param start start position of the input string
      * @param end end position of the input string
      */
-    static std::unique_ptr<Type> parseMapType(const std::string &input,
-                                              size_t start,
-                                              size_t end);
+    static std::unique_ptr<Type> parseMapType(const std::string& input, size_t start, size_t end);
 
     /**
      * Parse field name from string
@@ -153,8 +147,7 @@ namespace orc {
      * @param start start position of the input string
      * @param end end position of the input string
      */
-    static std::pair<std::string, size_t> parseName(const std::string &input,
-                                                    const size_t start,
+    static std::pair<std::string, size_t> parseName(const std::string& input, const size_t start,
                                                     const size_t end);
 
     /**
@@ -163,8 +156,7 @@ namespace orc {
      * @param start start position of the input string
      * @param end end position of the input string
      */
-    static std::unique_ptr<Type> parseStructType(const std::string &input,
-                                                 size_t start,
+    static std::unique_ptr<Type> parseStructType(const std::string& input, size_t start,
                                                  size_t end);
 
     /**
@@ -173,9 +165,7 @@ namespace orc {
      * @param start start position of the input string
      * @param end end position of the input string
      */
-    static std::unique_ptr<Type> parseUnionType(const std::string &input,
-                                                size_t start,
-                                                size_t end);
+    static std::unique_ptr<Type> parseUnionType(const std::string& input, size_t start, size_t end);
 
     /**
      * Parse decimal type from string
@@ -183,8 +173,7 @@ namespace orc {
      * @param start start position of the input string
      * @param end end position of the input string
      */
-    static std::unique_ptr<Type> parseDecimalType(const std::string &input,
-                                                  size_t start,
+    static std::unique_ptr<Type> parseDecimalType(const std::string& input, size_t start,
                                                   size_t end);
 
     /**
@@ -194,14 +183,11 @@ namespace orc {
      * @param start start position of the input string
      * @param end end position of the input string
      */
-    static std::unique_ptr<Type> parseCategory(std::string category,
-                                               const std::string &input,
-                                               size_t start,
-                                               size_t end);
+    static std::unique_ptr<Type> parseCategory(std::string category, const std::string& input,
+                                               size_t start, size_t end);
   };
 
-  std::unique_ptr<Type> convertType(const proto::Type& type,
-                                    const proto::Footer& footer);
+  std::unique_ptr<Type> convertType(const proto::Type& type, const proto::Footer& footer);
 
   /**
    * Build a clone of the file type, projecting columns from the selected
@@ -211,8 +197,7 @@ namespace orc {
    * @param selected is each column by id selected
    * @return a clone of the fileType filtered by the selection array
    */
-  std::unique_ptr<Type> buildSelectedType(const Type *fileType,
-                                          const std::vector<bool>& selected);
-}
+  std::unique_ptr<Type> buildSelectedType(const Type* fileType, const std::vector<bool>& selected);
+}  // namespace orc
 
 #endif
diff --git a/contrib/libs/apache/orc/c++/src/Utils.hh b/contrib/libs/apache/orc/c++/src/Utils.hh
new file mode 100644
index 0000000000..751c09b205
--- /dev/null
+++ b/contrib/libs/apache/orc/c++/src/Utils.hh
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_UTILS_HH
+#define ORC_UTILS_HH
+
+#include <atomic>
+#include <chrono>
+
+namespace orc {
+
+  class AutoStopwatch {
+    std::chrono::high_resolution_clock::time_point start;
+    std::atomic<uint64_t>* latencyUs;
+    std::atomic<uint64_t>* count;
+    bool minus;
+
+   public:
+    AutoStopwatch(std::atomic<uint64_t>* _latencyUs, std::atomic<uint64_t>* _count,
+                  bool _minus = false)
+        : latencyUs(_latencyUs), count(_count), minus(_minus) {
+      if (latencyUs) {
+        start = std::chrono::high_resolution_clock::now();
+      }
+    }
+
+    ~AutoStopwatch() {
+      if (latencyUs) {
+        std::chrono::microseconds elapsedTime =
+            std::chrono::duration_cast<std::chrono::microseconds>(
+                std::chrono::high_resolution_clock::now() - start);
+        if (!minus) {
+          latencyUs->fetch_add(static_cast<uint64_t>(elapsedTime.count()));
+        } else {
+          latencyUs->fetch_sub(static_cast<uint64_t>(elapsedTime.count()));
+        }
+      }
+
+      if (count) {
+        count->fetch_add(1);
+      }
+    }
+  };
+
+#if ENABLE_METRICS
+#define SCOPED_STOPWATCH(METRICS_PTR, LATENCY_VAR, COUNT_VAR)                           \
+  AutoStopwatch measure((METRICS_PTR == nullptr ? nullptr : &METRICS_PTR->LATENCY_VAR), \
+                        (METRICS_PTR == nullptr ? nullptr : &METRICS_PTR->COUNT_VAR))
+
+#define SCOPED_MINUS_STOPWATCH(METRICS_PTR, LATENCY_VAR)                                         \
+  AutoStopwatch measure((METRICS_PTR == nullptr ? nullptr : &METRICS_PTR->LATENCY_VAR), nullptr, \
+                        true)
+#else
+#define SCOPED_STOPWATCH(METRICS_PTR, LATENCY_VAR, COUNT_VAR)
+#define SCOPED_MINUS_STOPWATCH(METRICS_PTR, LATENCY_VAR)
+#endif
+
+}  // namespace orc
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/Vector.cc b/contrib/libs/apache/orc/c++/src/Vector.cc
index fefaaad4b1..b9e2854586 100644
--- a/contrib/libs/apache/orc/c++/src/Vector.cc
+++ b/contrib/libs/apache/orc/c++/src/Vector.cc
@@ -20,21 +20,21 @@
 
 #include "Adaptor.hh"
 #include "orc/Exceptions.hh"
+#include "orc/MemoryPool.hh"
 
+#include <cstdlib>
 #include <iostream>
 #include <sstream>
-#include <cstdlib>
 
 namespace orc {
 
-  ColumnVectorBatch::ColumnVectorBatch(uint64_t cap,
-                                       MemoryPool& pool
-                                       ): capacity(cap),
-                                          numElements(0),
-                                          notNull(pool, cap),
-                                          hasNulls(false),
-                                          isEncoded(false),
-                                          memoryPool(pool) {
+  ColumnVectorBatch::ColumnVectorBatch(uint64_t cap, MemoryPool& pool)
+      : capacity(cap),
+        numElements(0),
+        notNull(pool, cap),
+        hasNulls(false),
+        isEncoded(false),
+        memoryPool(pool) {
     std::memset(notNull.data(), 1, capacity);
   }
 
@@ -61,81 +61,13 @@ namespace orc {
     return false;
   }
 
-  LongVectorBatch::LongVectorBatch(uint64_t _capacity, MemoryPool& pool
-                     ): ColumnVectorBatch(_capacity, pool),
-                        data(pool, _capacity) {
-    // PASS
-  }
-
-  LongVectorBatch::~LongVectorBatch() {
-    // PASS
-  }
-
-  std::string LongVectorBatch::toString() const {
-    std::ostringstream buffer;
-    buffer << "Long vector <" << numElements << " of " << capacity << ">";
-    return buffer.str();
-  }
-
-  void LongVectorBatch::resize(uint64_t cap) {
-    if (capacity < cap) {
-      ColumnVectorBatch::resize(cap);
-      data.resize(cap);
-    }
-  }
-
-  void LongVectorBatch::clear() {
-    numElements = 0;
-  }
-
-  uint64_t LongVectorBatch::getMemoryUsage() {
-    return ColumnVectorBatch::getMemoryUsage() +
-        static_cast<uint64_t>(data.capacity() * sizeof(int64_t));
-  }
-
-  DoubleVectorBatch::DoubleVectorBatch(uint64_t _capacity, MemoryPool& pool
-                   ): ColumnVectorBatch(_capacity, pool),
-                      data(pool, _capacity) {
-    // PASS
-  }
-
-  DoubleVectorBatch::~DoubleVectorBatch() {
-    // PASS
-  }
-
-  std::string DoubleVectorBatch::toString() const {
-    std::ostringstream buffer;
-    buffer << "Double vector <" << numElements << " of " << capacity << ">";
-    return buffer.str();
-  }
-
-  void DoubleVectorBatch::resize(uint64_t cap) {
-    if (capacity < cap) {
-      ColumnVectorBatch::resize(cap);
-      data.resize(cap);
-    }
-  }
-
-  void DoubleVectorBatch::clear() {
-    numElements = 0;
-  }
-
-  uint64_t DoubleVectorBatch::getMemoryUsage() {
-    return ColumnVectorBatch::getMemoryUsage()
-          + static_cast<uint64_t>(data.capacity() * sizeof(double));
-  }
-
   StringDictionary::StringDictionary(MemoryPool& pool)
-              : dictionaryBlob(pool),
-                dictionaryOffset(pool) {
+      : dictionaryBlob(pool), dictionaryOffset(pool) {
     // PASS
   }
 
-  EncodedStringVectorBatch::EncodedStringVectorBatch(uint64_t _capacity,
-                                                     MemoryPool& pool)
-                      : StringVectorBatch(_capacity, pool),
-                        dictionary(),
-                        index(pool, _capacity) {
+  EncodedStringVectorBatch::EncodedStringVectorBatch(uint64_t _capacity, MemoryPool& pool)
+      : StringVectorBatch(_capacity, pool), dictionary(), index(pool, _capacity) {
     // PASS
   }
 
@@ -156,11 +88,11 @@ namespace orc {
     }
   }
 
-  StringVectorBatch::StringVectorBatch(uint64_t _capacity, MemoryPool& pool
-               ): ColumnVectorBatch(_capacity, pool),
-                  data(pool, _capacity),
-                  length(pool, _capacity),
-                  blob(pool) {
+  StringVectorBatch::StringVectorBatch(uint64_t _capacity, MemoryPool& pool)
+      : ColumnVectorBatch(_capacity, pool),
+        data(pool, _capacity),
+        length(pool, _capacity),
+        blob(pool) {
     // PASS
   }
 
@@ -187,28 +119,27 @@ namespace orc {
   }
 
   uint64_t StringVectorBatch::getMemoryUsage() {
-    return ColumnVectorBatch::getMemoryUsage()
-          + static_cast<uint64_t>(data.capacity() * sizeof(char*)
-          + length.capacity() * sizeof(int64_t));
+    return ColumnVectorBatch::getMemoryUsage() +
+           static_cast<uint64_t>(data.capacity() * sizeof(char*) +
+                                 length.capacity() * sizeof(int64_t));
   }
 
-  StructVectorBatch::StructVectorBatch(uint64_t cap, MemoryPool& pool
-                                        ): ColumnVectorBatch(cap, pool) {
+  StructVectorBatch::StructVectorBatch(uint64_t cap, MemoryPool& pool)
+      : ColumnVectorBatch(cap, pool) {
     // PASS
   }
 
   StructVectorBatch::~StructVectorBatch() {
-    for (uint64_t i=0; i<this->fields.size(); i++) {
+    for (uint64_t i = 0; i < this->fields.size(); i++) {
       delete this->fields[i];
     }
   }
 
   std::string StructVectorBatch::toString() const {
     std::ostringstream buffer;
-    buffer << "Struct vector <" << numElements << " of " << capacity
-           << "; ";
-    for(std::vector<ColumnVectorBatch*>::const_iterator ptr=fields.begin();
-        ptr != fields.end(); ++ptr) {
+    buffer << "Struct vector <" << numElements << " of " << capacity << "; ";
+    for (std::vector<ColumnVectorBatch*>::const_iterator ptr = fields.begin(); ptr != fields.end();
+         ++ptr) {
       buffer << (*ptr)->toString() << "; ";
     }
     buffer << ">";
@@ -220,7 +151,7 @@ namespace orc {
   }
 
   void StructVectorBatch::clear() {
-    for(size_t i=0; i < fields.size(); i++) {
+    for (size_t i = 0; i < fields.size(); i++) {
       fields[i]->clear();
     }
     numElements = 0;
@@ -228,14 +159,14 @@ namespace orc {
 
   uint64_t StructVectorBatch::getMemoryUsage() {
     uint64_t memory = ColumnVectorBatch::getMemoryUsage();
-    for (unsigned int i=0; i < fields.size(); i++) {
+    for (unsigned int i = 0; i < fields.size(); i++) {
       memory += fields[i]->getMemoryUsage();
     }
     return memory;
   }
 
   bool StructVectorBatch::hasVariableLength() {
-    for (unsigned int i=0; i < fields.size(); i++) {
+    for (unsigned int i = 0; i < fields.size(); i++) {
       if (fields[i]->hasVariableLength()) {
         return true;
       }
@@ -243,10 +174,9 @@ namespace orc {
     return false;
   }
 
-  ListVectorBatch::ListVectorBatch(uint64_t cap, MemoryPool& pool
-                   ): ColumnVectorBatch(cap, pool),
-                      offsets(pool, cap+1) {
-    // PASS
+  ListVectorBatch::ListVectorBatch(uint64_t cap, MemoryPool& pool)
+      : ColumnVectorBatch(cap, pool), offsets(pool, cap + 1) {
+    offsets.zeroOut();
   }
 
   ListVectorBatch::~ListVectorBatch() {
@@ -255,8 +185,8 @@ namespace orc {
 
   std::string ListVectorBatch::toString() const {
     std::ostringstream buffer;
-    buffer << "List vector <" << elements->toString() << " with "
-           << numElements << " of " << capacity << ">";
+    buffer << "List vector <" << elements->toString() << " with " << numElements << " of "
+           << capacity << ">";
     return buffer.str();
   }
 
@@ -273,19 +203,17 @@ namespace orc {
   }
 
   uint64_t ListVectorBatch::getMemoryUsage() {
-    return ColumnVectorBatch::getMemoryUsage()
-           + static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
-           + elements->getMemoryUsage();
+    return ColumnVectorBatch::getMemoryUsage() +
+           static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t)) + elements->getMemoryUsage();
   }
 
   bool ListVectorBatch::hasVariableLength() {
     return true;
   }
 
-  MapVectorBatch::MapVectorBatch(uint64_t cap, MemoryPool& pool
-                 ): ColumnVectorBatch(cap, pool),
-                    offsets(pool, cap+1) {
-    // PASS
+  MapVectorBatch::MapVectorBatch(uint64_t cap, MemoryPool& pool)
+      : ColumnVectorBatch(cap, pool), offsets(pool, cap + 1) {
+    offsets.zeroOut();
   }
 
   MapVectorBatch::~MapVectorBatch() {
@@ -294,9 +222,9 @@ namespace orc {
 
   std::string MapVectorBatch::toString() const {
     std::ostringstream buffer;
-    buffer << "Map vector <" << (keys ? keys->toString(): "key not selected") << ", "
-           << (elements ? elements->toString(): "value not selected")  << " with "
-           << numElements << " of " << capacity << ">";
+    buffer << "Map vector <" << (keys ? keys->toString() : "key not selected") << ", "
+           << (elements ? elements->toString() : "value not selected") << " with " << numElements
+           << " of " << capacity << ">";
     return buffer.str();
   }
 
@@ -314,25 +242,23 @@ namespace orc {
   }
 
   uint64_t MapVectorBatch::getMemoryUsage() {
-    return ColumnVectorBatch::getMemoryUsage()
-           + static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
-           + (keys ? keys->getMemoryUsage() : 0)
-           + (elements ? elements->getMemoryUsage() : 0);
+    return ColumnVectorBatch::getMemoryUsage() +
+           static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t)) +
+           (keys ? keys->getMemoryUsage() : 0) + (elements ? elements->getMemoryUsage() : 0);
   }
 
   bool MapVectorBatch::hasVariableLength() {
     return true;
   }
 
-  UnionVectorBatch::UnionVectorBatch(uint64_t cap, MemoryPool& pool
-                                     ): ColumnVectorBatch(cap, pool),
-                                        tags(pool, cap),
-                                        offsets(pool, cap) {
-    // PASS
+  UnionVectorBatch::UnionVectorBatch(uint64_t cap, MemoryPool& pool)
+      : ColumnVectorBatch(cap, pool), tags(pool, cap), offsets(pool, cap) {
+    tags.zeroOut();
+    offsets.zeroOut();
   }
 
   UnionVectorBatch::~UnionVectorBatch() {
-    for (uint64_t i=0; i < children.size(); i++) {
+    for (uint64_t i = 0; i < children.size(); i++) {
       delete children[i];
     }
   }
@@ -340,7 +266,7 @@ namespace orc {
   std::string UnionVectorBatch::toString() const {
     std::ostringstream buffer;
     buffer << "Union vector <";
-    for(size_t i=0; i < children.size(); ++i) {
+    for (size_t i = 0; i < children.size(); ++i) {
       if (i != 0) {
         buffer << ", ";
       }
@@ -359,24 +285,24 @@ namespace orc {
   }
 
   void UnionVectorBatch::clear() {
-    for(size_t i=0; i < children.size(); i++) {
+    for (size_t i = 0; i < children.size(); i++) {
       children[i]->clear();
     }
     numElements = 0;
   }
 
   uint64_t UnionVectorBatch::getMemoryUsage() {
-    uint64_t memory = ColumnVectorBatch::getMemoryUsage()
-               + static_cast<uint64_t>(tags.capacity() * sizeof(unsigned char)
-               + offsets.capacity() * sizeof(uint64_t));
-    for(size_t i=0; i < children.size(); ++i) {
+    uint64_t memory = ColumnVectorBatch::getMemoryUsage() +
+                      static_cast<uint64_t>(tags.capacity() * sizeof(unsigned char) +
+                                            offsets.capacity() * sizeof(uint64_t));
+    for (size_t i = 0; i < children.size(); ++i) {
       memory += children[i]->getMemoryUsage();
     }
     return memory;
   }
 
   bool UnionVectorBatch::hasVariableLength() {
-    for(size_t i=0; i < children.size(); ++i) {
+    for (size_t i = 0; i < children.size(); ++i) {
       if (children[i]->hasVariableLength()) {
         return true;
       }
@@ -384,12 +310,12 @@ namespace orc {
     return false;
   }
 
-  Decimal64VectorBatch::Decimal64VectorBatch(uint64_t cap, MemoryPool& pool
-                 ): ColumnVectorBatch(cap, pool),
-                    precision(0),
-                    scale(0),
-                    values(pool, cap),
-                    readScales(pool, cap) {
+  Decimal64VectorBatch::Decimal64VectorBatch(uint64_t cap, MemoryPool& pool)
+      : ColumnVectorBatch(cap, pool),
+        precision(0),
+        scale(0),
+        values(pool, cap),
+        readScales(pool, cap) {
     // PASS
   }
 
@@ -399,8 +325,7 @@ namespace orc {
 
   std::string Decimal64VectorBatch::toString() const {
     std::ostringstream buffer;
-    buffer << "Decimal64 vector  with "
-           << numElements << " of " << capacity << ">";
+    buffer << "Decimal64 vector  with " << numElements << " of " << capacity << ">";
     return buffer.str();
   }
 
@@ -417,17 +342,16 @@ namespace orc {
   }
 
   uint64_t Decimal64VectorBatch::getMemoryUsage() {
-    return ColumnVectorBatch::getMemoryUsage()
-          + static_cast<uint64_t>(
-              (values.capacity() + readScales.capacity()) * sizeof(int64_t));
+    return ColumnVectorBatch::getMemoryUsage() +
+           static_cast<uint64_t>((values.capacity() + readScales.capacity()) * sizeof(int64_t));
   }
 
-  Decimal128VectorBatch::Decimal128VectorBatch(uint64_t cap, MemoryPool& pool
-               ): ColumnVectorBatch(cap, pool),
-                  precision(0),
-                  scale(0),
-                  values(pool, cap),
-                  readScales(pool, cap) {
+  Decimal128VectorBatch::Decimal128VectorBatch(uint64_t cap, MemoryPool& pool)
+      : ColumnVectorBatch(cap, pool),
+        precision(0),
+        scale(0),
+        values(pool, cap),
+        readScales(pool, cap) {
     // PASS
   }
 
@@ -437,8 +361,7 @@ namespace orc {
 
   std::string Decimal128VectorBatch::toString() const {
     std::ostringstream buffer;
-    buffer << "Decimal128 vector  with "
-           << numElements << " of " << capacity << ">";
+    buffer << "Decimal128 vector  with " << numElements << " of " << capacity << ">";
     return buffer.str();
   }
 
@@ -455,23 +378,22 @@ namespace orc {
   }
 
   uint64_t Decimal128VectorBatch::getMemoryUsage() {
-    return ColumnVectorBatch::getMemoryUsage()
-          + static_cast<uint64_t>(values.capacity() * sizeof(Int128)
-          + readScales.capacity() * sizeof(int64_t));
+    return ColumnVectorBatch::getMemoryUsage() +
+           static_cast<uint64_t>(values.capacity() * sizeof(Int128) +
+                                 readScales.capacity() * sizeof(int64_t));
   }
 
-  Decimal::Decimal(const Int128& _value,
-                   int32_t _scale): value(_value), scale(_scale) {
+  Decimal::Decimal(const Int128& _value, int32_t _scale) : value(_value), scale(_scale) {
     // PASS
   }
 
   Decimal::Decimal(const std::string& str) {
     std::size_t foundPoint = str.find(".");
     // no decimal point, it is int
-    if(foundPoint == std::string::npos){
+    if (foundPoint == std::string::npos) {
       value = Int128(str);
       scale = 0;
-    }else{
+    } else {
       std::string copy(str);
       scale = static_cast<int32_t>(str.length() - foundPoint - 1);
       value = Int128(copy.replace(foundPoint, 1, ""));
@@ -486,12 +408,8 @@ namespace orc {
     return value.toDecimalString(scale, trimTrailingZeros);
   }
 
-  TimestampVectorBatch::TimestampVectorBatch(uint64_t _capacity,
-                                             MemoryPool& pool
-                                             ): ColumnVectorBatch(_capacity,
-                                                                  pool),
-                                                data(pool, _capacity),
-                                                nanoseconds(pool, _capacity) {
+  TimestampVectorBatch::TimestampVectorBatch(uint64_t _capacity, MemoryPool& pool)
+      : ColumnVectorBatch(_capacity, pool), data(pool, _capacity), nanoseconds(pool, _capacity) {
     // PASS
   }
 
@@ -518,8 +436,7 @@ namespace orc {
   }
 
   uint64_t TimestampVectorBatch::getMemoryUsage() {
-    return ColumnVectorBatch::getMemoryUsage()
-          + static_cast<uint64_t>(
-              (data.capacity() + nanoseconds.capacity()) * sizeof(int64_t));
+    return ColumnVectorBatch::getMemoryUsage() +
+           static_cast<uint64_t>((data.capacity() + nanoseconds.capacity()) * sizeof(int64_t));
   }
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/Writer.cc b/contrib/libs/apache/orc/c++/src/Writer.cc
index 8a7d10ba81..19b71190a3 100644
--- a/contrib/libs/apache/orc/c++/src/Writer.cc
+++ b/contrib/libs/apache/orc/c++/src/Writer.cc
@@ -21,6 +21,7 @@
 
 #include "ColumnWriter.hh"
 #include "Timezone.hh"
+#include "Utils.hh"
 
 #include <memory>
 
@@ -42,37 +43,41 @@ namespace orc {
     double bloomFilterFalsePositiveProb;
     BloomFilterVersion bloomFilterVersion;
     std::string timezone;
+    WriterMetrics* metrics;
+    bool useTightNumericVector;
+    uint64_t outputBufferCapacity;
 
-    WriterOptionsPrivate() :
-                            fileVersion(FileVersion::v_0_12()) { // default to Hive_0_12
-      stripeSize = 64 * 1024 * 1024; // 64M
-      compressionBlockSize = 64 * 1024; // 64K
+    WriterOptionsPrivate() : fileVersion(FileVersion::v_0_12()) {  // default to Hive_0_12
+      stripeSize = 64 * 1024 * 1024;                               // 64M
+      compressionBlockSize = 64 * 1024;                            // 64K
       rowIndexStride = 10000;
-      compression = CompressionKind_ZLIB;
+      compression = CompressionKind_ZSTD;
       compressionStrategy = CompressionStrategy_SPEED;
       memoryPool = getDefaultPool();
       paddingTolerance = 0.0;
       errorStream = &std::cerr;
       dictionaryKeySizeThreshold = 0.0;
       enableIndex = true;
-      bloomFilterFalsePositiveProb = 0.05;
+      bloomFilterFalsePositiveProb = 0.01;
       bloomFilterVersion = UTF8;
-      //Writer timezone uses "GMT" by default to get rid of potential issues
-      //introduced by moving timestamps between different timezones.
-      //Explictly set the writer timezone if the use case depends on it.
+      // Writer timezone uses "GMT" by default to get rid of potential issues
+      // introduced by moving timestamps between different timezones.
+      // Explictly set the writer timezone if the use case depends on it.
       timezone = "GMT";
+      metrics = nullptr;
+      useTightNumericVector = false;
+      outputBufferCapacity = 1024 * 1024;
     }
   };
 
-  WriterOptions::WriterOptions():
-    privateBits(std::unique_ptr<WriterOptionsPrivate>
-                (new WriterOptionsPrivate())) {
+  WriterOptions::WriterOptions()
+      : privateBits(std::unique_ptr<WriterOptionsPrivate>(new WriterOptionsPrivate())) {
     // PASS
   }
 
-  WriterOptions::WriterOptions(const WriterOptions& rhs):
-    privateBits(std::unique_ptr<WriterOptionsPrivate>
-                (new WriterOptionsPrivate(*(rhs.privateBits.get())))) {
+  WriterOptions::WriterOptions(const WriterOptions& rhs)
+      : privateBits(std::unique_ptr<WriterOptionsPrivate>(
+            new WriterOptionsPrivate(*(rhs.privateBits.get())))) {
     // PASS
   }
 
@@ -92,8 +97,7 @@ namespace orc {
     // PASS
   }
   RleVersion WriterOptions::getRleVersion() const {
-    if(privateBits->fileVersion == FileVersion::v_0_11())
-    {
+    if (privateBits->fileVersion == FileVersion::v_0_11()) {
       return RleVersion_1;
     }
 
@@ -110,6 +114,9 @@ namespace orc {
   }
 
   WriterOptions& WriterOptions::setCompressionBlockSize(uint64_t size) {
+    if (size >= (1 << 23)) {
+      throw std::invalid_argument("Compression block size cannot be greater or equal than 8M");
+    }
     privateBits->compressionBlockSize = size;
     return *this;
   }
@@ -167,8 +174,7 @@ namespace orc {
     return privateBits->compression;
   }
 
-  WriterOptions& WriterOptions::setCompressionStrategy(
-    CompressionStrategy strategy) {
+  WriterOptions& WriterOptions::setCompressionStrategy(CompressionStrategy strategy) {
     privateBits->compressionStrategy = strategy;
     return *this;
   }
@@ -216,8 +222,7 @@ namespace orc {
     return privateBits->dictionaryKeySizeThreshold > 0.0;
   }
 
-  WriterOptions& WriterOptions::setColumnsUseBloomFilter(
-    const std::set<uint64_t>& columns) {
+  WriterOptions& WriterOptions::setColumnsUseBloomFilter(const std::set<uint64_t>& columns) {
     privateBits->columnsUseBloomFilter = columns;
     return *this;
   }
@@ -255,12 +260,39 @@ namespace orc {
     return *this;
   }
 
+  WriterMetrics* WriterOptions::getWriterMetrics() const {
+    return privateBits->metrics;
+  }
+
+  WriterOptions& WriterOptions::setWriterMetrics(WriterMetrics* metrics) {
+    privateBits->metrics = metrics;
+    return *this;
+  }
+
+  WriterOptions& WriterOptions::setUseTightNumericVector(bool useTightNumericVector) {
+    privateBits->useTightNumericVector = useTightNumericVector;
+    return *this;
+  }
+
+  bool WriterOptions::getUseTightNumericVector() const {
+    return privateBits->useTightNumericVector;
+  }
+
+  WriterOptions& WriterOptions::setOutputBufferCapacity(uint64_t capacity) {
+    privateBits->outputBufferCapacity = capacity;
+    return *this;
+  }
+
+  uint64_t WriterOptions::getOutputBufferCapacity() const {
+    return privateBits->outputBufferCapacity;
+  }
+
   Writer::~Writer() {
     // PASS
   }
 
   class WriterImpl : public Writer {
-  private:
+   private:
     std::unique_ptr<ColumnWriter> columnWriter;
     std::unique_ptr<BufferedOutputStream> compressionStream;
     std::unique_ptr<BufferedOutputStream> bufferedStream;
@@ -277,23 +309,24 @@ namespace orc {
 
     static const char* magicId;
     static const WriterId writerId;
+    bool useTightNumericVector;
+    int32_t stripesAtLastFlush;
+    uint64_t lastFlushOffset;
 
-  public:
-    WriterImpl(
-               const Type& type,
-               OutputStream* stream,
-               const WriterOptions& options);
+   public:
+    WriterImpl(const Type& type, OutputStream* stream, const WriterOptions& options);
 
-    std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size)
-                                                            const override;
+    std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size) const override;
 
     void add(ColumnVectorBatch& rowsToAdd) override;
 
     void close() override;
 
-    void addUserMetadata(const std::string name, const std::string value) override;
+    void addUserMetadata(const std::string& name, const std::string& value) override;
 
-  private:
+    uint64_t writeIntermediateFooter() override;
+
+   private:
     void init();
     void initStripe();
     void writeStripe();
@@ -301,48 +334,41 @@ namespace orc {
     void writeFileFooter();
     void writePostscript();
     void buildFooterType(const Type& t, proto::Footer& footer, uint32_t& index);
-    static proto::CompressionKind convertCompressionKind(
-                                                  const CompressionKind& kind);
+    static proto::CompressionKind convertCompressionKind(const CompressionKind& kind);
   };
 
-  const char * WriterImpl::magicId = "ORC";
+  const char* WriterImpl::magicId = "ORC";
 
   const WriterId WriterImpl::writerId = WriterId::ORC_CPP_WRITER;
 
-  WriterImpl::WriterImpl(
-                         const Type& t,
-                         OutputStream* stream,
-                         const WriterOptions& opts) :
-                         outStream(stream),
-                         options(opts),
-                         type(t) {
+  WriterImpl::WriterImpl(const Type& t, OutputStream* stream, const WriterOptions& opts)
+      : outStream(stream), options(opts), type(t) {
     streamsFactory = createStreamsFactory(options, outStream);
     columnWriter = buildWriter(type, *streamsFactory, options);
     stripeRows = totalRows = indexRows = 0;
     currentOffset = 0;
+    stripesAtLastFlush = 0;
+    lastFlushOffset = 0;
+
+    useTightNumericVector = opts.getUseTightNumericVector();
 
     // compression stream for stripe footer, file footer and metadata
-    compressionStream = createCompressor(
-                                  options.getCompression(),
-                                  outStream,
-                                  options.getCompressionStrategy(),
-                                  1 * 1024 * 1024, // buffer capacity: 1M
-                                  options.getCompressionBlockSize(),
-                                  *options.getMemoryPool());
+    compressionStream =
+        createCompressor(options.getCompression(), outStream, options.getCompressionStrategy(),
+                         options.getOutputBufferCapacity(), options.getCompressionBlockSize(),
+                         *options.getMemoryPool(), options.getWriterMetrics());
 
     // uncompressed stream for post script
-    bufferedStream.reset(new BufferedOutputStream(
-                                            *options.getMemoryPool(),
-                                            outStream,
-                                            1024, // buffer capacity: 1024 bytes
-                                            options.getCompressionBlockSize()));
+    bufferedStream.reset(new BufferedOutputStream(*options.getMemoryPool(), outStream,
+                                                  1024,  // buffer capacity: 1024 bytes
+                                                  options.getCompressionBlockSize(),
+                                                  options.getWriterMetrics()));
 
     init();
   }
 
-  std::unique_ptr<ColumnVectorBatch> WriterImpl::createRowBatch(uint64_t size)
-                                                                         const {
-    return type.createRowBatch(size, *options.getMemoryPool());
+  std::unique_ptr<ColumnVectorBatch> WriterImpl::createRowBatch(uint64_t size) const {
+    return type.createRowBatch(size, *options.getMemoryPool(), false, useTightNumericVector);
   }
 
   void WriterImpl::add(ColumnVectorBatch& rowsToAdd) {
@@ -351,8 +377,7 @@ namespace orc {
       uint64_t chunkSize = 0;
       uint64_t rowIndexStride = options.getRowIndexStride();
       while (pos < rowsToAdd.numElements) {
-        chunkSize = std::min(rowsToAdd.numElements - pos,
-                             rowIndexStride - indexRows);
+        chunkSize = std::min(rowsToAdd.numElements - pos, rowIndexStride - indexRows);
         columnWriter->add(rowsToAdd, pos, chunkSize, nullptr);
 
         pos += chunkSize;
@@ -384,7 +409,25 @@ namespace orc {
     outStream->close();
   }
 
-  void WriterImpl::addUserMetadata(const std::string name, const std::string value){
+  uint64_t WriterImpl::writeIntermediateFooter() {
+    if (stripeRows > 0) {
+      writeStripe();
+    }
+    if (stripesAtLastFlush != fileFooter.stripes_size()) {
+      writeMetadata();
+      writeFileFooter();
+      writePostscript();
+      stripesAtLastFlush = fileFooter.stripes_size();
+      outStream->flush();
+      lastFlushOffset = outStream->getLength();
+      currentOffset = lastFlushOffset;
+      // init stripe now that we adjusted the currentOffset
+      initStripe();
+    }
+    return lastFlushOffset;
+  }
+
+  void WriterImpl::addUserMetadata(const std::string& name, const std::string& value) {
     proto::UserMetadataItem* userMetadataItem = fileFooter.add_metadata();
     userMetadataItem->set_name(TString(name));
     userMetadataItem->set_value(TString(value));
@@ -393,31 +436,32 @@ namespace orc {
   void WriterImpl::init() {
     // Write file header
     const static size_t magicIdLength = strlen(WriterImpl::magicId);
-    outStream->write(WriterImpl::magicId, magicIdLength);
+    {
+      SCOPED_STOPWATCH(options.getWriterMetrics(), IOBlockingLatencyUs, IOCount);
+      outStream->write(WriterImpl::magicId, magicIdLength);
+    }
     currentOffset += magicIdLength;
 
     // Initialize file footer
-    fileFooter.set_headerlength(currentOffset);
-    fileFooter.set_contentlength(0);
-    fileFooter.set_numberofrows(0);
-    fileFooter.set_rowindexstride(
-                          static_cast<uint32_t>(options.getRowIndexStride()));
+    fileFooter.set_header_length(currentOffset);
+    fileFooter.set_content_length(0);
+    fileFooter.set_number_of_rows(0);
+    fileFooter.set_row_index_stride(static_cast<uint32_t>(options.getRowIndexStride()));
     fileFooter.set_writer(writerId);
-    fileFooter.set_softwareversion(ORC_VERSION);
+    fileFooter.set_software_version(ORC_VERSION);
 
     uint32_t index = 0;
     buildFooterType(type, fileFooter, index);
 
     // Initialize post script
-    postScript.set_footerlength(0);
-    postScript.set_compression(
-                  WriterImpl::convertCompressionKind(options.getCompression()));
-    postScript.set_compressionblocksize(options.getCompressionBlockSize());
+    postScript.set_footer_length(0);
+    postScript.set_compression(WriterImpl::convertCompressionKind(options.getCompression()));
+    postScript.set_compression_block_size(options.getCompressionBlockSize());
 
     postScript.add_version(options.getFileVersion().getMajor());
     postScript.add_version(options.getFileVersion().getMinor());
 
-    postScript.set_writerversion(WriterVersion_ORC_135);
+    postScript.set_writer_version(WriterVersion_ORC_135);
     postScript.set_magic("ORC");
 
     // Initialize first stripe
@@ -426,10 +470,10 @@ namespace orc {
 
   void WriterImpl::initStripe() {
     stripeInfo.set_offset(currentOffset);
-    stripeInfo.set_indexlength(0);
-    stripeInfo.set_datalength(0);
-    stripeInfo.set_footerlength(0);
-    stripeInfo.set_numberofrows(0);
+    stripeInfo.set_index_length(0);
+    stripeInfo.set_data_length(0);
+    stripeInfo.set_footer_length(0);
+    stripeInfo.set_number_of_rows(0);
 
     stripeRows = indexRows = 0;
   }
@@ -466,14 +510,14 @@ namespace orc {
       *stripeFooter.add_columns() = encodings[i];
     }
 
-    stripeFooter.set_writertimezone(TString(options.getTimezoneName()));
+    stripeFooter.set_writer_timezone(TString(options.getTimezoneName()));
 
     // add stripe statistics to metadata
-    proto::StripeStatistics* stripeStats = metadata.add_stripestats();
+    proto::StripeStatistics* stripeStats = metadata.add_stripe_stats();
     std::vector<proto::ColumnStatistics> colStats;
     columnWriter->getStripeStatistics(colStats);
     for (uint32_t i = 0; i != colStats.size(); ++i) {
-      *stripeStats->add_colstats() = colStats[i];
+      *stripeStats->add_col_stats() = colStats[i];
     }
     // merge stripe stats into file stats and clear stripe stats
     columnWriter->mergeStripeStatsIntoFileStats();
@@ -496,10 +540,10 @@ namespace orc {
     }
 
     // update stripe info
-    stripeInfo.set_indexlength(indexLength);
-    stripeInfo.set_datalength(dataLength);
-    stripeInfo.set_footerlength(footerLength);
-    stripeInfo.set_numberofrows(stripeRows);
+    stripeInfo.set_index_length(indexLength);
+    stripeInfo.set_data_length(dataLength);
+    stripeInfo.set_footer_length(footerLength);
+    stripeInfo.set_number_of_rows(stripeRows);
 
     *fileFooter.add_stripes() = stripeInfo;
 
@@ -515,16 +559,17 @@ namespace orc {
     if (!metadata.SerializeToZeroCopyStream(compressionStream.get())) {
       throw std::logic_error("Failed to write metadata.");
     }
-    postScript.set_metadatalength(compressionStream.get()->flush());
+    postScript.set_metadata_length(compressionStream.get()->flush());
   }
 
   void WriterImpl::writeFileFooter() {
-    fileFooter.set_contentlength(currentOffset - fileFooter.headerlength());
-    fileFooter.set_numberofrows(totalRows);
+    fileFooter.set_content_length(currentOffset - fileFooter.header_length());
+    fileFooter.set_number_of_rows(totalRows);
 
     // update file statistics
     std::vector<proto::ColumnStatistics> colStats;
     columnWriter->getFileStatistics(colStats);
+    fileFooter.clear_statistics();
     for (uint32_t i = 0; i != colStats.size(); ++i) {
       *fileFooter.add_statistics() = colStats[i];
     }
@@ -532,106 +577,103 @@ namespace orc {
     if (!fileFooter.SerializeToZeroCopyStream(compressionStream.get())) {
       throw std::logic_error("Failed to write file footer.");
     }
-    postScript.set_footerlength(compressionStream->flush());
+    postScript.set_footer_length(compressionStream->flush());
   }
 
   void WriterImpl::writePostscript() {
     if (!postScript.SerializeToZeroCopyStream(bufferedStream.get())) {
       throw std::logic_error("Failed to write post script.");
     }
-    unsigned char psLength =
-                      static_cast<unsigned char>(bufferedStream->flush());
+    unsigned char psLength = static_cast<unsigned char>(bufferedStream->flush());
+    SCOPED_STOPWATCH(options.getWriterMetrics(), IOBlockingLatencyUs, IOCount);
     outStream->write(&psLength, sizeof(unsigned char));
   }
 
-  void WriterImpl::buildFooterType(
-                                   const Type& t,
-                                   proto::Footer& footer,
-                                   uint32_t & index) {
+  void WriterImpl::buildFooterType(const Type& t, proto::Footer& footer, uint32_t& index) {
     proto::Type protoType;
-    protoType.set_maximumlength(static_cast<uint32_t>(t.getMaximumLength()));
+    protoType.set_maximum_length(static_cast<uint32_t>(t.getMaximumLength()));
     protoType.set_precision(static_cast<uint32_t>(t.getPrecision()));
     protoType.set_scale(static_cast<uint32_t>(t.getScale()));
 
     switch (t.getKind()) {
-    case BOOLEAN: {
-      protoType.set_kind(proto::Type_Kind_BOOLEAN);
-      break;
-    }
-    case BYTE: {
-      protoType.set_kind(proto::Type_Kind_BYTE);
-      break;
-    }
-    case SHORT: {
-      protoType.set_kind(proto::Type_Kind_SHORT);
-      break;
-    }
-    case INT: {
-      protoType.set_kind(proto::Type_Kind_INT);
-      break;
-    }
-    case LONG: {
-      protoType.set_kind(proto::Type_Kind_LONG);
-      break;
-    }
-    case FLOAT: {
-      protoType.set_kind(proto::Type_Kind_FLOAT);
-      break;
-    }
-    case DOUBLE: {
-      protoType.set_kind(proto::Type_Kind_DOUBLE);
-      break;
-    }
-    case STRING: {
-      protoType.set_kind(proto::Type_Kind_STRING);
-      break;
-    }
-    case BINARY: {
-      protoType.set_kind(proto::Type_Kind_BINARY);
-      break;
-    }
-    case TIMESTAMP: {
-      protoType.set_kind(proto::Type_Kind_TIMESTAMP);
-      break;
-    }
-    case TIMESTAMP_INSTANT: {
-      protoType.set_kind(proto::Type_Kind_TIMESTAMP_INSTANT);
-      break;
-    }
-    case LIST: {
-      protoType.set_kind(proto::Type_Kind_LIST);
-      break;
-    }
-    case MAP: {
-      protoType.set_kind(proto::Type_Kind_MAP);
-      break;
-    }
-    case STRUCT: {
-      protoType.set_kind(proto::Type_Kind_STRUCT);
-      break;
-    }
-    case UNION: {
-      protoType.set_kind(proto::Type_Kind_UNION);
-      break;
-    }
-    case DECIMAL: {
-      protoType.set_kind(proto::Type_Kind_DECIMAL);
-      break;
-    }
-    case DATE: {
-      protoType.set_kind(proto::Type_Kind_DATE);
-      break;
-    }
-    case VARCHAR: {
-      protoType.set_kind(proto::Type_Kind_VARCHAR);
-      break;
-    }
-    case CHAR: {
-      protoType.set_kind(proto::Type_Kind_CHAR);
-      break;
-    }
-    default:
-      throw std::logic_error("Unknown type.");
+      case BOOLEAN: {
+        protoType.set_kind(proto::Type_Kind_BOOLEAN);
+        break;
+      }
+      case BYTE: {
+        protoType.set_kind(proto::Type_Kind_BYTE);
+        break;
+      }
+      case SHORT: {
+        protoType.set_kind(proto::Type_Kind_SHORT);
+        break;
+      }
+      case INT: {
+        protoType.set_kind(proto::Type_Kind_INT);
+        break;
+      }
+      case LONG: {
+        protoType.set_kind(proto::Type_Kind_LONG);
+        break;
+      }
+      case FLOAT: {
+        protoType.set_kind(proto::Type_Kind_FLOAT);
+        break;
+      }
+      case DOUBLE: {
+        protoType.set_kind(proto::Type_Kind_DOUBLE);
+        break;
+      }
+      case STRING: {
+        protoType.set_kind(proto::Type_Kind_STRING);
+        break;
+      }
+      case BINARY: {
+        protoType.set_kind(proto::Type_Kind_BINARY);
+        break;
+      }
+      case TIMESTAMP: {
+        protoType.set_kind(proto::Type_Kind_TIMESTAMP);
+        break;
+      }
+      case TIMESTAMP_INSTANT: {
+        protoType.set_kind(proto::Type_Kind_TIMESTAMP_INSTANT);
+        break;
+      }
+      case LIST: {
+        protoType.set_kind(proto::Type_Kind_LIST);
+        break;
+      }
+      case MAP: {
+        protoType.set_kind(proto::Type_Kind_MAP);
+        break;
+      }
+      case STRUCT: {
+        protoType.set_kind(proto::Type_Kind_STRUCT);
+        break;
+      }
+      case UNION: {
+        protoType.set_kind(proto::Type_Kind_UNION);
+        break;
+      }
+      case DECIMAL: {
+        protoType.set_kind(proto::Type_Kind_DECIMAL);
+        break;
+      }
+      case DATE: {
+        protoType.set_kind(proto::Type_Kind_DATE);
+        break;
+      }
+      case VARCHAR: {
+        protoType.set_kind(proto::Type_Kind_VARCHAR);
+        break;
+      }
+      case CHAR: {
+        protoType.set_kind(proto::Type_Kind_CHAR);
+        break;
+      }
+      default:
+        throw std::logic_error("Unknown type.");
     }
 
     for (auto& key : t.getAttributeKeys()) {
@@ -647,28 +689,20 @@ namespace orc {
     for (uint64_t i = 0; i < t.getSubtypeCount(); ++i) {
       // only add subtypes' field names if this type is STRUCT
       if (t.getKind() == STRUCT) {
-        footer.mutable_types(pos)->add_fieldnames(TString(t.getFieldName(i)));
+        footer.mutable_types(pos)->add_field_names(TString(t.getFieldName(i)));
       }
       footer.mutable_types(pos)->add_subtypes(++index);
       buildFooterType(*t.getSubtype(i), footer, index);
     }
   }
 
-  proto::CompressionKind WriterImpl::convertCompressionKind(
-                                      const CompressionKind& kind) {
+  proto::CompressionKind WriterImpl::convertCompressionKind(const CompressionKind& kind) {
     return static_cast<proto::CompressionKind>(kind);
   }
 
-  std::unique_ptr<Writer> createWriter(
-                                       const Type& type,
-                                       OutputStream* stream,
+  std::unique_ptr<Writer> createWriter(const Type& type, OutputStream* stream,
                                        const WriterOptions& options) {
-    return std::unique_ptr<Writer>(
-                                   new WriterImpl(
-                                            type,
-                                            stream,
-                                            options));
+    return std::unique_ptr<Writer>(new WriterImpl(type, stream, options));
   }
 
-}
-
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/io/InputStream.cc b/contrib/libs/apache/orc/c++/src/io/InputStream.cc
index ec798d4ed7..3bf1781747 100644
--- a/contrib/libs/apache/orc/c++/src/io/InputStream.cc
+++ b/contrib/libs/apache/orc/c++/src/io/InputStream.cc
@@ -16,26 +16,22 @@
  * limitations under the License.
  */
 
-#include "orc/Exceptions.hh"
 #include "InputStream.hh"
+#include "orc/Exceptions.hh"
 
 #include <algorithm>
 #include <iomanip>
 
 namespace orc {
 
-  void printBuffer(std::ostream& out,
-                   const char *buffer,
-                   uint64_t length) {
+  void printBuffer(std::ostream& out, const char* buffer, uint64_t length) {
     const uint64_t width = 24;
     out << std::hex;
-    for(uint64_t line = 0; line < (length + width - 1) / width; ++line) {
+    for (uint64_t line = 0; line < (length + width - 1) / width; ++line) {
       out << std::setfill('0') << std::setw(7) << (line * width);
-      for(uint64_t byte = 0;
-          byte < width && line * width + byte < length; ++byte) {
+      for (uint64_t byte = 0; byte < width && line * width + byte < length; ++byte) {
         out << " " << std::setfill('0') << std::setw(2)
-            << static_cast<uint64_t>(0xff & buffer[line * width +
-                                                   byte]);
+            << static_cast<uint64_t>(0xff & buffer[line * width + byte]);
       }
       out << "\n";
     }
@@ -64,26 +60,23 @@ namespace orc {
     // PASS
   }
 
-  SeekableArrayInputStream::SeekableArrayInputStream
-               (const unsigned char* values,
-                uint64_t size,
-                uint64_t blkSize
-               ): data(reinterpret_cast<const char*>(values)) {
+  SeekableArrayInputStream::SeekableArrayInputStream(const unsigned char* values, uint64_t size,
+                                                     uint64_t blkSize)
+      : data(reinterpret_cast<const char*>(values)) {
     length = size;
     position = 0;
     blockSize = blkSize == 0 ? length : static_cast<uint64_t>(blkSize);
   }
 
-  SeekableArrayInputStream::SeekableArrayInputStream(const char* values,
-                                                     uint64_t size,
-                                                     uint64_t blkSize
-  ): data(values) {
+  SeekableArrayInputStream::SeekableArrayInputStream(const char* values, uint64_t size,
+                                                     uint64_t blkSize)
+      : data(values) {
     length = size;
     position = 0;
     blockSize = blkSize == 0 ? length : static_cast<uint64_t>(blkSize);
   }
 
-  bool SeekableArrayInputStream::Next(const void** buffer, int*size) {
+  bool SeekableArrayInputStream::Next(const void** buffer, int* size) {
     uint64_t currentSize = std::min(length - position, blockSize);
     if (currentSize > 0) {
       *buffer = data + position;
@@ -137,19 +130,14 @@ namespace orc {
     return std::min(length, request == 0 ? 256 * 1024 : request);
   }
 
-  SeekableFileInputStream::SeekableFileInputStream(InputStream* stream,
-                                                   uint64_t offset,
-                                                   uint64_t byteCount,
-                                                   MemoryPool& _pool,
-                                                   uint64_t _blockSize
-                                                   ):pool(_pool),
-                                                     input(stream),
-                                                     start(offset),
-                                                     length(byteCount),
-                                                     blockSize(computeBlock
-                                                               (_blockSize,
-                                                                length)) {
-
+  SeekableFileInputStream::SeekableFileInputStream(InputStream* stream, uint64_t offset,
+                                                   uint64_t byteCount, MemoryPool& _pool,
+                                                   uint64_t _blockSize)
+      : pool(_pool),
+        input(stream),
+        start(offset),
+        length(byteCount),
+        blockSize(computeBlock(_blockSize, length)) {
     position = 0;
     buffer.reset(new DataBuffer<char>(pool));
     pushBack = 0;
@@ -159,7 +147,7 @@ namespace orc {
     // PASS
   }
 
-  bool SeekableFileInputStream::Next(const void** data, int*size) {
+  bool SeekableFileInputStream::Next(const void** data, int* size) {
     uint64_t bytesRead;
     if (pushBack != 0) {
       *data = buffer->data() + (buffer->size() - pushBack);
@@ -168,7 +156,7 @@ namespace orc {
       bytesRead = std::min(length - position, blockSize);
       buffer->resize(bytesRead);
       if (bytesRead > 0) {
-        input->read(buffer->data(), bytesRead, start+position);
+        input->read(buffer->data(), bytesRead, start + position);
         *data = static_cast<void*>(buffer->data());
       }
     }
@@ -218,9 +206,8 @@ namespace orc {
 
   std::string SeekableFileInputStream::getName() const {
     std::ostringstream result;
-    result << input->getName() << " from " << start << " for "
-           << length;
+    result << input->getName() << " from " << start << " for " << length;
     return result.str();
   }
 
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/io/InputStream.hh b/contrib/libs/apache/orc/c++/src/io/InputStream.hh
index ab7ecedb44..33c64f8809 100644
--- a/contrib/libs/apache/orc/c++/src/io/InputStream.hh
+++ b/contrib/libs/apache/orc/c++/src/io/InputStream.hh
@@ -23,22 +23,21 @@
 #include "orc/OrcFile.hh"
 #include "wrap/zero-copy-stream-wrapper.h"
 
-#include <list>
 #include <fstream>
 #include <iostream>
+#include <list>
 #include <sstream>
 #include <vector>
 
 namespace orc {
 
-  void printBuffer(std::ostream& out,
-                   const char *buffer,
-                   uint64_t length);
+  void printBuffer(std::ostream& out, const char* buffer, uint64_t length);
 
   class PositionProvider {
-  private:
+   private:
     std::list<uint64_t>::const_iterator position;
-  public:
+
+   public:
     PositionProvider(const std::list<uint64_t>& positions);
     uint64_t next();
     uint64_t current();
@@ -49,9 +48,9 @@ namespace orc {
    * By extending Google's class, we get the ability to pass it directly
    * to the protobuf readers.
    */
-  class SeekableInputStream: public google::protobuf::io::ZeroCopyInputStream {
-  public:
-    virtual ~SeekableInputStream();
+  class SeekableInputStream : public google::protobuf::io::ZeroCopyInputStream {
+   public:
+    ~SeekableInputStream() override;
     virtual void seek(PositionProvider& position) = 0;
     virtual std::string getName() const = 0;
   };
@@ -59,22 +58,18 @@ namespace orc {
   /**
    * Create a seekable input stream based on a memory range.
    */
-  class SeekableArrayInputStream: public SeekableInputStream {
-  private:
+  class SeekableArrayInputStream : public SeekableInputStream {
+   private:
     const char* data;
     uint64_t length;
     uint64_t position;
     uint64_t blockSize;
 
-  public:
-    SeekableArrayInputStream(const unsigned char* list,
-                             uint64_t length,
-                             uint64_t block_size = 0);
-    SeekableArrayInputStream(const char* list,
-                             uint64_t length,
-                             uint64_t block_size = 0);
+   public:
+    SeekableArrayInputStream(const unsigned char* list, uint64_t length, uint64_t block_size = 0);
+    SeekableArrayInputStream(const char* list, uint64_t length, uint64_t block_size = 0);
     virtual ~SeekableArrayInputStream() override;
-    virtual bool Next(const void** data, int*size) override;
+    virtual bool Next(const void** data, int* size) override;
     virtual void BackUp(int count) override;
     virtual bool Skip(int count) override;
     virtual int64_t ByteCount() const override;
@@ -85,8 +80,8 @@ namespace orc {
   /**
    * Create a seekable input stream based on an input stream.
    */
-  class SeekableFileInputStream: public SeekableInputStream {
-  private:
+  class SeekableFileInputStream : public SeekableInputStream {
+   private:
     MemoryPool& pool;
     InputStream* const input;
     const uint64_t start;
@@ -96,15 +91,12 @@ namespace orc {
     uint64_t position;
     uint64_t pushBack;
 
-  public:
-    SeekableFileInputStream(InputStream* input,
-                            uint64_t offset,
-                            uint64_t byteCount,
-                            MemoryPool& pool,
-                            uint64_t blockSize = 0);
+   public:
+    SeekableFileInputStream(InputStream* input, uint64_t offset, uint64_t byteCount,
+                            MemoryPool& pool, uint64_t blockSize = 0);
     virtual ~SeekableFileInputStream() override;
 
-    virtual bool Next(const void** data, int*size) override;
+    virtual bool Next(const void** data, int* size) override;
     virtual void BackUp(int count) override;
     virtual bool Skip(int count) override;
     virtual int64_t ByteCount() const override;
@@ -112,6 +104,6 @@ namespace orc {
     virtual std::string getName() const override;
   };
 
-}
+}  // namespace orc
 
-#endif //ORC_INPUTSTREAM_HH
+#endif  // ORC_INPUTSTREAM_HH
diff --git a/contrib/libs/apache/orc/c++/src/io/OutputStream.cc b/contrib/libs/apache/orc/c++/src/io/OutputStream.cc
index 14d5e5e7c4..7d9fb92206 100644
--- a/contrib/libs/apache/orc/c++/src/io/OutputStream.cc
+++ b/contrib/libs/apache/orc/c++/src/io/OutputStream.cc
@@ -16,8 +16,9 @@
  * limitations under the License.
  */
 
-#include "orc/Exceptions.hh"
 #include "OutputStream.hh"
+#include "Utils.hh"
+#include "orc/Exceptions.hh"
 
 #include <sstream>
 
@@ -27,14 +28,11 @@ namespace orc {
     // PASS
   }
 
-  BufferedOutputStream::BufferedOutputStream(
-                                    MemoryPool& pool,
-                                    OutputStream * outStream,
-                                    uint64_t capacity_,
-                                    uint64_t blockSize_)
-                                    : outputStream(outStream),
-                                      blockSize(blockSize_) {
-    dataBuffer.reset(new DataBuffer<char>(pool));
+  BufferedOutputStream::BufferedOutputStream(MemoryPool& pool, OutputStream* outStream,
+                                             uint64_t capacity_, uint64_t blockSize_,
+                                             WriterMetrics* metrics_)
+      : outputStream(outStream), blockSize(blockSize_), metrics(metrics_) {
+    dataBuffer.reset(new BlockBuffer(pool, blockSize));
     dataBuffer->reserve(capacity_);
   }
 
@@ -43,16 +41,12 @@ namespace orc {
   }
 
   bool BufferedOutputStream::Next(void** buffer, int* size) {
-    *size = static_cast<int>(blockSize);
-    uint64_t oldSize = dataBuffer->size();
-    uint64_t newSize = oldSize + blockSize;
-    uint64_t newCapacity = dataBuffer->capacity();
-    while (newCapacity < newSize) {
-      newCapacity += dataBuffer->capacity();
+    auto block = dataBuffer->getNextBlock();
+    if (block.data == nullptr) {
+      throw std::logic_error("Failed to get next buffer from block buffer.");
     }
-    dataBuffer->reserve(newCapacity);
-    dataBuffer->resize(newSize);
-    *buffer = dataBuffer->data() + oldSize;
+    *buffer = block.data;
+    *size = static_cast<int>(block.size);
     return true;
   }
 
@@ -71,7 +65,7 @@ namespace orc {
     return static_cast<google::protobuf::int64>(dataBuffer->size());
   }
 
-  bool BufferedOutputStream::WriteAliasedRaw(const void *, int) {
+  bool BufferedOutputStream::WriteAliasedRaw(const void*, int) {
     throw NotImplementedYet("WriteAliasedRaw is not supported.");
   }
 
@@ -81,8 +75,7 @@ namespace orc {
 
   std::string BufferedOutputStream::getName() const {
     std::ostringstream result;
-    result << "BufferedOutputStream " << dataBuffer->size() << " of "
-                                              << dataBuffer->capacity();
+    result << "BufferedOutputStream " << dataBuffer->size() << " of " << dataBuffer->capacity();
     return result.str();
   }
 
@@ -92,7 +85,11 @@ namespace orc {
 
   uint64_t BufferedOutputStream::flush() {
     uint64_t dataSize = dataBuffer->size();
-    outputStream->write(dataBuffer->data(), dataSize);
+    // flush data buffer into outputStream
+    if (dataSize > 0) {
+      SCOPED_STOPWATCH(metrics, IOBlockingLatencyUs, IOCount);
+      dataBuffer->writeTo(outputStream, metrics);
+    }
     dataBuffer->resize(0);
     return dataSize;
   }
@@ -101,20 +98,16 @@ namespace orc {
     dataBuffer->resize(0);
   }
 
-  void AppendOnlyBufferedStream::write(const char * data, size_t size) {
+  void AppendOnlyBufferedStream::write(const char* data, size_t size) {
     size_t dataOffset = 0;
     while (size > 0) {
       if (bufferOffset == bufferLength) {
-        if (!outStream->Next(
-                              reinterpret_cast<void **>(&buffer),
-                              &bufferLength)) {
+        if (!outStream->Next(reinterpret_cast<void**>(&buffer), &bufferLength)) {
           throw std::logic_error("Failed to allocate buffer.");
         }
         bufferOffset = 0;
       }
-      size_t len = std::min(
-                           static_cast<size_t>(bufferLength - bufferOffset),
-                           size);
+      size_t len = std::min(static_cast<size_t>(bufferLength - bufferOffset), size);
       memcpy(buffer + bufferOffset, data + dataOffset, len);
       bufferOffset += static_cast<int>(len);
       dataOffset += len;
@@ -148,4 +141,4 @@ namespace orc {
     }
   }
 
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/io/OutputStream.hh b/contrib/libs/apache/orc/c++/src/io/OutputStream.hh
index 0fb92465e9..d8bc21ce6d 100644
--- a/contrib/libs/apache/orc/c++/src/io/OutputStream.hh
+++ b/contrib/libs/apache/orc/c++/src/io/OutputStream.hh
@@ -20,6 +20,7 @@
 #define ORC_OUTPUTSTREAM_HH
 
 #include "Adaptor.hh"
+#include "BlockBuffer.hh"
 #include "orc/OrcFile.hh"
 #include "wrap/zero-copy-stream-wrapper.h"
 
@@ -27,36 +28,41 @@ namespace orc {
 
   /**
    * Record write position for creating index stream
-  */
+   */
   class PositionRecorder {
-  public:
+   public:
     virtual ~PositionRecorder();
     virtual void add(uint64_t pos) = 0;
   };
 
+  DIAGNOSTIC_PUSH
+
+#ifdef __clang__
+  DIAGNOSTIC_IGNORE("-Wunused-private-field")
+#endif
+  struct WriterMetrics;
   /**
    * A subclass of Google's ZeroCopyOutputStream that supports output to memory
    * buffer, and flushing to OutputStream.
    * By extending Google's class, we get the ability to pass it directly
    * to the protobuf writers.
    */
-  class BufferedOutputStream: public google::protobuf::io::ZeroCopyOutputStream {
-  private:
-    OutputStream * outputStream;
-    std::unique_ptr<DataBuffer<char> > dataBuffer;
+  class BufferedOutputStream : public google::protobuf::io::ZeroCopyOutputStream {
+   private:
+    OutputStream* outputStream;
+    std::unique_ptr<BlockBuffer> dataBuffer;
     uint64_t blockSize;
+    WriterMetrics* metrics;
 
-  public:
-    BufferedOutputStream(MemoryPool& pool,
-                      OutputStream * outStream,
-                      uint64_t capacity,
-                      uint64_t block_size);
+   public:
+    BufferedOutputStream(MemoryPool& pool, OutputStream* outStream, uint64_t capacity,
+                         uint64_t block_size, WriterMetrics* metrics);
     virtual ~BufferedOutputStream() override;
 
-    virtual bool Next(void** data, int*size) override;
+    virtual bool Next(void** data, int* size) override;
     virtual void BackUp(int count) override;
     virtual int64_t ByteCount() const override;
-    virtual bool WriteAliasedRaw(const void * data, int size) override;
+    virtual bool WriteAliasedRaw(const void* data, int size) override;
     virtual bool AllowsAliasing() const override;
 
     virtual std::string getName() const;
@@ -64,8 +70,11 @@ namespace orc {
     virtual uint64_t flush();
     virtual void suppress();
 
-    virtual bool isCompressed() const { return false; }
+    virtual bool isCompressed() const {
+      return false;
+    }
   };
+  DIAGNOSTIC_POP
 
   /**
    * An append only buffered stream that allows
@@ -74,24 +83,24 @@ namespace orc {
    * to the protobuf writers.
    */
   class AppendOnlyBufferedStream {
-  private:
+   private:
     std::unique_ptr<BufferedOutputStream> outStream;
-    char * buffer;
+    char* buffer;
     int bufferOffset, bufferLength;
 
-  public:
-    AppendOnlyBufferedStream(std::unique_ptr<BufferedOutputStream> _outStream) :
-                                              outStream(std::move(_outStream)) {
+   public:
+    AppendOnlyBufferedStream(std::unique_ptr<BufferedOutputStream> _outStream)
+        : outStream(std::move(_outStream)) {
       buffer = nullptr;
       bufferOffset = bufferLength = 0;
     }
 
-    void write(const char * data, size_t size);
+    void write(const char* data, size_t size);
     uint64_t getSize() const;
     uint64_t flush();
 
     void recordPosition(PositionRecorder* recorder) const;
   };
-}
+}  // namespace orc
 
-#endif // ORC_OUTPUTSTREAM_HH
+#endif  // ORC_OUTPUTSTREAM_HH
diff --git a/contrib/libs/apache/orc/c++/src/sargs/ExpressionTree.cc b/contrib/libs/apache/orc/c++/src/sargs/ExpressionTree.cc
index e7d87083d8..9176c1f6c3 100644
--- a/contrib/libs/apache/orc/c++/src/sargs/ExpressionTree.cc
+++ b/contrib/libs/apache/orc/c++/src/sargs/ExpressionTree.cc
@@ -24,41 +24,28 @@
 namespace orc {
 
   ExpressionTree::ExpressionTree(Operator op)
-                                : mOperator(op)
-                                , mLeaf(UNUSED_LEAF)
-                                , mConstant(TruthValue::YES_NO_NULL) {
-  }
-
+      : mOperator(op), mLeaf(UNUSED_LEAF), mConstant(TruthValue::YES_NO_NULL) {}
 
-  ExpressionTree::ExpressionTree(Operator op,
-                                 std::initializer_list<TreeNode> children)
-                                : mOperator(op)
-                                , mChildren(children.begin(), children.end())
-                                , mLeaf(UNUSED_LEAF)
-                                , mConstant(TruthValue::YES_NO_NULL) {
+  ExpressionTree::ExpressionTree(Operator op, std::initializer_list<TreeNode> children)
+      : mOperator(op),
+        mChildren(children.begin(), children.end()),
+        mLeaf(UNUSED_LEAF),
+        mConstant(TruthValue::YES_NO_NULL) {
     // PASS
   }
 
   ExpressionTree::ExpressionTree(size_t leaf)
-                                : mOperator(Operator::LEAF)
-                                , mChildren()
-                                , mLeaf(leaf)
-                                , mConstant(TruthValue::YES_NO_NULL) {
+      : mOperator(Operator::LEAF), mChildren(), mLeaf(leaf), mConstant(TruthValue::YES_NO_NULL) {
     // PASS
   }
 
   ExpressionTree::ExpressionTree(TruthValue constant)
-                                : mOperator(Operator::CONSTANT)
-                                , mChildren()
-                                , mLeaf(UNUSED_LEAF)
-                                , mConstant(constant) {
+      : mOperator(Operator::CONSTANT), mChildren(), mLeaf(UNUSED_LEAF), mConstant(constant) {
     // PASS
   }
 
   ExpressionTree::ExpressionTree(const ExpressionTree& other)
-                                : mOperator(other.mOperator)
-                                , mLeaf(other.mLeaf)
-                                , mConstant(other.mConstant) {
+      : mOperator(other.mOperator), mLeaf(other.mLeaf), mConstant(other.mConstant) {
     for (TreeNode child : other.mChildren) {
       mChildren.emplace_back(std::make_shared<ExpressionTree>(*child));
     }
@@ -74,7 +61,7 @@ namespace orc {
 
   std::vector<TreeNode>& ExpressionTree::getChildren() {
     return const_cast<std::vector<TreeNode>&>(
-      const_cast<const ExpressionTree *>(this)->getChildren());
+        const_cast<const ExpressionTree*>(this)->getChildren());
   }
 
   const TreeNode ExpressionTree::getChild(size_t i) const {
@@ -83,7 +70,7 @@ namespace orc {
 
   TreeNode ExpressionTree::getChild(size_t i) {
     return std::const_pointer_cast<ExpressionTree>(
-      const_cast<const ExpressionTree *>(this)->getChild(i));
+        const_cast<const ExpressionTree*>(this)->getChild(i));
   }
 
   TruthValue ExpressionTree::getConstant() const {
@@ -105,20 +92,17 @@ namespace orc {
     mChildren.push_back(child);
   }
 
-  TruthValue ExpressionTree::evaluate(
-                                 const std::vector<TruthValue>& leaves) const {
+  TruthValue ExpressionTree::evaluate(const std::vector<TruthValue>& leaves) const {
     TruthValue result;
     switch (mOperator) {
-      case Operator::OR:
-      {
+      case Operator::OR: {
         result = mChildren.at(0)->evaluate(leaves);
         for (size_t i = 1; i < mChildren.size() && !isNeeded(result); ++i) {
           result = mChildren.at(i)->evaluate(leaves) || result;
         }
         return result;
       }
-      case Operator::AND:
-      {
+      case Operator::AND: {
         result = mChildren.at(0)->evaluate(leaves);
         for (size_t i = 1; i < mChildren.size() && isNeeded(result); ++i) {
           result = mChildren.at(i)->evaluate(leaves) && result;
@@ -189,4 +173,4 @@ namespace orc {
     return sstream.str();
   }
 
-} // namespace orc
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/sargs/ExpressionTree.hh b/contrib/libs/apache/orc/c++/src/sargs/ExpressionTree.hh
index bb3d16e924..3e0b331a2d 100644
--- a/contrib/libs/apache/orc/c++/src/sargs/ExpressionTree.hh
+++ b/contrib/libs/apache/orc/c++/src/sargs/ExpressionTree.hh
@@ -40,7 +40,7 @@ namespace orc {
    * the SearchArgument into an internal form.
    */
   class ExpressionTree {
-  public:
+   public:
     enum class Operator { OR, AND, NOT, LEAF, CONSTANT };
 
     ExpressionTree(Operator op);
@@ -73,13 +73,13 @@ namespace orc {
 
     TruthValue evaluate(const std::vector<TruthValue>& leaves) const;
 
-  private:
+   private:
     Operator mOperator;
     std::vector<TreeNode> mChildren;
     size_t mLeaf;
     TruthValue mConstant;
   };
 
-} // namespace orc
+}  // namespace orc
 
-#endif //ORC_EXPRESSIONTREE_HH
+#endif  // ORC_EXPRESSIONTREE_HH
diff --git a/contrib/libs/apache/orc/c++/src/sargs/Literal.cc b/contrib/libs/apache/orc/c++/src/sargs/Literal.cc
index da4cdd0d47..c0cdd62201 100644
--- a/contrib/libs/apache/orc/c++/src/sargs/Literal.cc
+++ b/contrib/libs/apache/orc/c++/src/sargs/Literal.cc
@@ -78,7 +78,7 @@ namespace orc {
     mHashCode = hashCode();
   }
 
-  Literal::Literal(const char * str, size_t size) {
+  Literal::Literal(const char* str, size_t size) {
     mType = PredicateDataType::STRING;
     mValue.Buffer = new char[size];
     memcpy(mValue.Buffer, str, size);
@@ -110,10 +110,8 @@ namespace orc {
     mHashCode = hashCode();
   }
 
-  Literal::Literal(const Literal& r): mType(r.mType)
-                                    , mSize(r.mSize)
-                                    , mIsNull(r.mIsNull)
-                                    , mHashCode(r.mHashCode) {
+  Literal::Literal(const Literal& r)
+      : mType(r.mType), mSize(r.mSize), mIsNull(r.mIsNull), mHashCode(r.mHashCode) {
     if (mType == PredicateDataType::STRING) {
       mValue.Buffer = new char[r.mSize];
       memcpy(mValue.Buffer, r.mValue.Buffer, r.mSize);
@@ -134,7 +132,7 @@ namespace orc {
 
   Literal::~Literal() {
     if (mType == PredicateDataType::STRING && mValue.Buffer) {
-      delete [] mValue.Buffer;
+      delete[] mValue.Buffer;
       mValue.Buffer = nullptr;
     }
   }
@@ -142,7 +140,7 @@ namespace orc {
   Literal& Literal::operator=(const Literal& r) {
     if (this != &r) {
       if (mType == PredicateDataType::STRING && mValue.Buffer) {
-        delete [] mValue.Buffer;
+        delete[] mValue.Buffer;
         mValue.Buffer = nullptr;
       }
 
@@ -178,8 +176,7 @@ namespace orc {
         sstream << mValue.DateVal;
         break;
       case PredicateDataType::TIMESTAMP:
-        sstream << mValue.TimeStampVal.second << "."
-                << mValue.TimeStampVal.nanos;
+        sstream << mValue.TimeStampVal.second << "." << mValue.TimeStampVal.nanos;
         break;
       case PredicateDataType::FLOAT:
         sstream << mValue.DoubleVal;
@@ -209,14 +206,13 @@ namespace orc {
         return std::hash<int64_t>{}(mValue.DateVal);
       case PredicateDataType::TIMESTAMP:
         return std::hash<int64_t>{}(mValue.TimeStampVal.second) * 17 +
-          std::hash<int32_t>{}(mValue.TimeStampVal.nanos);
+               std::hash<int32_t>{}(mValue.TimeStampVal.nanos);
       case PredicateDataType::FLOAT:
         return std::hash<double>{}(mValue.DoubleVal);
       case PredicateDataType::BOOLEAN:
         return std::hash<bool>{}(mValue.BooleanVal);
       case PredicateDataType::STRING:
-        return std::hash<std::string>{}(
-          std::string(mValue.Buffer, mSize));
+        return std::hash<std::string>{}(std::string(mValue.Buffer, mSize));
       case PredicateDataType::DECIMAL:
         // current glibc does not support hash<int128_t>
         return std::hash<int64_t>{}(mValue.IntVal);
@@ -246,12 +242,11 @@ namespace orc {
         return mValue.TimeStampVal == r.mValue.TimeStampVal;
       case PredicateDataType::FLOAT:
         return std::fabs(mValue.DoubleVal - r.mValue.DoubleVal) <
-          std::numeric_limits<double>::epsilon();
+               std::numeric_limits<double>::epsilon();
       case PredicateDataType::BOOLEAN:
         return mValue.BooleanVal == r.mValue.BooleanVal;
       case PredicateDataType::STRING:
-        return mSize == r.mSize && memcmp(
-          mValue.Buffer, r.mValue.Buffer, mSize) == 0;
+        return mSize == r.mSize && memcmp(mValue.Buffer, r.mValue.Buffer, mSize) == 0;
       case PredicateDataType::DECIMAL:
         return mValue.DecimalVal == r.mValue.DecimalVal;
       default:
@@ -263,8 +258,7 @@ namespace orc {
     return !(*this == r);
   }
 
-  inline void validate(const bool& isNull,
-                       const PredicateDataType& type,
+  inline void validate(const bool& isNull, const PredicateDataType& type,
                        const PredicateDataType& expected) {
     if (isNull) {
       throw std::logic_error("cannot get value when it is null!");
@@ -309,4 +303,4 @@ namespace orc {
     return Decimal(mValue.DecimalVal, mScale);
   }
 
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/sargs/PredicateLeaf.cc b/contrib/libs/apache/orc/c++/src/sargs/PredicateLeaf.cc
index 3b012cece4..5fceedd854 100644
--- a/contrib/libs/apache/orc/c++/src/sargs/PredicateLeaf.cc
+++ b/contrib/libs/apache/orc/c++/src/sargs/PredicateLeaf.cc
@@ -16,10 +16,10 @@
  * limitations under the License.
  */
 
+#include "PredicateLeaf.hh"
 #include "orc/BloomFilter.hh"
 #include "orc/Common.hh"
 #include "orc/Type.hh"
-#include "PredicateLeaf.hh"
 
 #include <algorithm>
 #include <functional>
@@ -28,81 +28,62 @@
 
 namespace orc {
 
-  PredicateLeaf::PredicateLeaf(Operator op,
-                               PredicateDataType type,
-                               const std::string& colName,
+  PredicateLeaf::PredicateLeaf(Operator op, PredicateDataType type, const std::string& colName,
                                Literal literal)
-                              : mOperator(op)
-                              , mType(type)
-                              , mColumnName(colName)
-                              , mHasColumnName(true)
-                              , mColumnId(0) {
+      : mOperator(op), mType(type), mColumnName(colName), mHasColumnName(true), mColumnId(0) {
     mLiterals.emplace_back(literal);
     mHashCode = hashCode();
     validate();
   }
 
-  PredicateLeaf::PredicateLeaf(Operator op,
-                               PredicateDataType type,
-                               uint64_t columnId,
+  PredicateLeaf::PredicateLeaf(Operator op, PredicateDataType type, uint64_t columnId,
                                Literal literal)
-                              : mOperator(op)
-                              , mType(type)
-                              , mHasColumnName(false)
-                              , mColumnId(columnId) {
+      : mOperator(op), mType(type), mHasColumnName(false), mColumnId(columnId) {
     mLiterals.emplace_back(literal);
     mHashCode = hashCode();
     validate();
   }
 
-  PredicateLeaf::PredicateLeaf(Operator op,
-                               PredicateDataType type,
-                               const std::string& colName,
+  PredicateLeaf::PredicateLeaf(Operator op, PredicateDataType type, const std::string& colName,
                                const std::initializer_list<Literal>& literals)
-                              : mOperator(op)
-                              , mType(type)
-                              , mColumnName(colName)
-                              , mHasColumnName(true)
-                              , mLiterals(literals.begin(), literals.end()) {
+      : mOperator(op),
+        mType(type),
+        mColumnName(colName),
+        mHasColumnName(true),
+        mLiterals(literals.begin(), literals.end()) {
     mHashCode = hashCode();
     validate();
   }
 
-  PredicateLeaf::PredicateLeaf(Operator op,
-                               PredicateDataType type,
-                               uint64_t columnId,
+  PredicateLeaf::PredicateLeaf(Operator op, PredicateDataType type, uint64_t columnId,
                                const std::initializer_list<Literal>& literals)
-                              : mOperator(op)
-                              , mType(type)
-                              , mHasColumnName(false)
-                              , mColumnId(columnId)
-                              , mLiterals(literals.begin(), literals.end()) {
+      : mOperator(op),
+        mType(type),
+        mHasColumnName(false),
+        mColumnId(columnId),
+        mLiterals(literals.begin(), literals.end()) {
     mHashCode = hashCode();
     validate();
   }
 
-  PredicateLeaf::PredicateLeaf(Operator op,
-                               PredicateDataType type,
-                               const std::string& colName,
+  PredicateLeaf::PredicateLeaf(Operator op, PredicateDataType type, const std::string& colName,
                                const std::vector<Literal>& literals)
-                              : mOperator(op)
-                              , mType(type)
-                              , mColumnName(colName)
-                              , mHasColumnName(true)
-                              , mLiterals(literals.begin(), literals.end()) {
+      : mOperator(op),
+        mType(type),
+        mColumnName(colName),
+        mHasColumnName(true),
+        mLiterals(literals.begin(), literals.end()) {
     mHashCode = hashCode();
     validate();
   }
 
-  PredicateLeaf::PredicateLeaf(Operator op,
-                               PredicateDataType type,
-                               uint64_t columnId,
+  PredicateLeaf::PredicateLeaf(Operator op, PredicateDataType type, uint64_t columnId,
                                const std::vector<Literal>& literals)
-                              : mOperator(op)
-                              , mType(type)
-                              , mHasColumnName(false)
-                              , mColumnId(columnId)
-                              , mLiterals(literals.begin(), literals.end()) {
+      : mOperator(op),
+        mType(type),
+        mHasColumnName(false),
+        mColumnId(columnId),
+        mLiterals(literals.begin(), literals.end()) {
     mHashCode = hashCode();
     validate();
   }
@@ -131,8 +112,7 @@ namespace orc {
         if (mLiterals.size() != 1) {
           throw std::invalid_argument("One literal is required!");
         }
-        if (static_cast<int>(mLiterals.at(0).getType()) !=
-            static_cast<int>(mType)) {
+        if (static_cast<int>(mLiterals.at(0).getType()) != static_cast<int>(mType)) {
           throw std::invalid_argument("leaf and literal types do not match!");
         }
         break;
@@ -232,8 +212,7 @@ namespace orc {
         sstream << columnDebugString() << " = " << getLiteralString(mLiterals);
         break;
       case Operator::NULL_SAFE_EQUALS:
-        sstream << columnDebugString() << " null_safe_= "
-                << getLiteralString(mLiterals);
+        sstream << columnDebugString() << " null_safe_= " << getLiteralString(mLiterals);
         break;
       case Operator::LESS_THAN:
         sstream << columnDebugString() << " < " << getLiteralString(mLiterals);
@@ -248,9 +227,8 @@ namespace orc {
         sstream << columnDebugString() << " between " << getLiteralsString(mLiterals);
         break;
       default:
-        sstream << "unknown operator, column: "
-                << columnDebugString() << ", literals: "
-                << getLiteralsString(mLiterals);
+        sstream << "unknown operator, column: " << columnDebugString()
+                << ", literals: " << getLiteralsString(mLiterals);
     }
     sstream << ')';
     return sstream.str();
@@ -259,16 +237,11 @@ namespace orc {
   size_t PredicateLeaf::hashCode() const {
     size_t value = 0;
     std::for_each(mLiterals.cbegin(), mLiterals.cend(),
-      [&](const Literal& literal) {
-      value = value * 17 + literal.getHashCode();
-    });
-    auto colHash = mHasColumnName ?
-        std::hash<std::string>{}(mColumnName) :
-        std::hash<uint64_t>{}(mColumnId);
-    return value * 103 * 101 * 3 * 17 +
-      std::hash<int>{}(static_cast<int>(mOperator)) +
-      std::hash<int>{}(static_cast<int>(mType)) * 17 +
-      colHash * 3 * 17;
+                  [&](const Literal& literal) { value = value * 17 + literal.getHashCode(); });
+    auto colHash =
+        mHasColumnName ? std::hash<std::string>{}(mColumnName) : std::hash<uint64_t>{}(mColumnId);
+    return value * 103 * 101 * 3 * 17 + std::hash<int>{}(static_cast<int>(mOperator)) +
+           std::hash<int>{}(static_cast<int>(mType)) * 17 + colHash * 3 * 17;
   }
 
   bool PredicateLeaf::operator==(const PredicateLeaf& r) const {
@@ -289,9 +262,7 @@ namespace orc {
   }
 
   // enum to mark the position of predicate in the range
-  enum class Location {
-    BEFORE, MIN, MIDDLE, MAX, AFTER
-  };
+  enum class Location { BEFORE, MIN, MIDDLE, MAX, AFTER };
 
   DIAGNOSTIC_PUSH
   DIAGNOSTIC_IGNORE("-Wfloat-equal")
@@ -331,11 +302,8 @@ namespace orc {
    * @return the TruthValue result of the test
    */
   template <typename T>
-  TruthValue evaluatePredicateRange(const PredicateLeaf::Operator op,
-                                    const std::vector<T>& values,
-                                    const T& minValue,
-                                    const T& maxValue,
-                                    bool hasNull) {
+  TruthValue evaluatePredicateRange(const PredicateLeaf::Operator op, const std::vector<T>& values,
+                                    const T& minValue, const T& maxValue, bool hasNull) {
     Location loc;
     switch (op) {
       case PredicateLeaf::Operator::NULL_SAFE_EQUALS:
@@ -387,8 +355,7 @@ namespace orc {
           // are all of the values outside of the range?
           for (auto& value : values) {
             loc = compareToRange(value, minValue, maxValue);
-            if (loc == Location::MIN || loc == Location::MIDDLE ||
-                loc == Location::MAX) {
+            if (loc == Location::MIN || loc == Location::MIDDLE || loc == Location::MAX) {
               return hasNull ? TruthValue::YES_NO_NULL : TruthValue::YES_NO;
             }
           }
@@ -423,19 +390,17 @@ namespace orc {
 
   DIAGNOSTIC_POP
 
-  static TruthValue evaluateBoolPredicate(
-                                        const PredicateLeaf::Operator op,
-                                        const std::vector<Literal>& literals,
-                                        const proto::ColumnStatistics& stats) {
-    bool hasNull = stats.hasnull();
-    if (!stats.has_bucketstatistics() ||
-        stats.bucketstatistics().count_size() == 0) {
+  static TruthValue evaluateBoolPredicate(const PredicateLeaf::Operator op,
+                                          const std::vector<Literal>& literals,
+                                          const proto::ColumnStatistics& stats) {
+    bool hasNull = stats.has_null();
+    if (!stats.has_bucket_statistics() || stats.bucket_statistics().count_size() == 0) {
       // does not have bool stats
       return hasNull ? TruthValue::YES_NO_NULL : TruthValue::YES_NO;
     }
 
-    auto trueCount = stats.bucketstatistics().count(0);
-    auto falseCount = stats.numberofvalues() - trueCount;
+    auto trueCount = stats.bucket_statistics().count(0);
+    auto falseCount = stats.number_of_values() - trueCount;
     switch (op) {
       case PredicateLeaf::Operator::IS_NULL:
         return hasNull ? TruthValue::YES_NO : TruthValue::NO;
@@ -500,8 +465,7 @@ namespace orc {
     return result;
   }
 
-  static std::vector<Literal::Timestamp> literal2Timestamp(
-                                           const std::vector<Literal>& values) {
+  static std::vector<Literal::Timestamp> literal2Timestamp(const std::vector<Literal>& values) {
     std::vector<Literal::Timestamp> result;
     std::for_each(values.cbegin(), values.cend(), [&](const Literal& val) {
       if (!val.isNull()) {
@@ -511,8 +475,7 @@ namespace orc {
     return result;
   }
 
-  static std::vector<Decimal> literal2Decimal(
-                                           const std::vector<Literal>& values) {
+  static std::vector<Decimal> literal2Decimal(const std::vector<Literal>& values) {
     std::vector<Decimal> result;
     std::for_each(values.cbegin(), values.cend(), [&](const Literal& val) {
       if (!val.isNull()) {
@@ -522,8 +485,7 @@ namespace orc {
     return result;
   }
 
-  static std::vector<double> literal2Double(
-                                           const std::vector<Literal>& values) {
+  static std::vector<double> literal2Double(const std::vector<Literal>& values) {
     std::vector<double> result;
     std::for_each(values.cbegin(), values.cend(), [&](const Literal& val) {
       if (!val.isNull()) {
@@ -533,8 +495,7 @@ namespace orc {
     return result;
   }
 
-  static std::vector<TString> literal2String(
-                                           const std::vector<Literal>& values) {
+  static std::vector<TString> literal2String(const std::vector<Literal>& values) {
     std::vector<TString> result;
     std::for_each(values.cbegin(), values.cend(), [&](const Literal& val) {
       if (!val.isNull()) {
@@ -544,114 +505,84 @@ namespace orc {
     return result;
   }
 
-  TruthValue PredicateLeaf::evaluatePredicateMinMax(
-                                const proto::ColumnStatistics& colStats) const {
+  TruthValue PredicateLeaf::evaluatePredicateMinMax(const proto::ColumnStatistics& colStats) const {
     TruthValue result = TruthValue::YES_NO_NULL;
     switch (mType) {
       case PredicateDataType::LONG: {
-        if (colStats.has_intstatistics() &&
-            colStats.intstatistics().has_minimum() &&
-            colStats.intstatistics().has_maximum()) {
-          const auto& stats = colStats.intstatistics();
-          result = evaluatePredicateRange(
-            mOperator,
-            literal2Long(mLiterals),
-            stats.minimum(),
-            stats.maximum(),
-            colStats.hasnull());
+        if (colStats.has_int_statistics() && colStats.int_statistics().has_minimum() &&
+            colStats.int_statistics().has_maximum()) {
+          const auto& stats = colStats.int_statistics();
+          result = evaluatePredicateRange(mOperator, literal2Long(mLiterals), stats.minimum(),
+                                          stats.maximum(), colStats.has_null());
         }
         break;
       }
       case PredicateDataType::FLOAT: {
-        if (colStats.has_doublestatistics() &&
-            colStats.doublestatistics().has_minimum() &&
-            colStats.doublestatistics().has_maximum()) {
-          const auto& stats = colStats.doublestatistics();
+        if (colStats.has_double_statistics() && colStats.double_statistics().has_minimum() &&
+            colStats.double_statistics().has_maximum()) {
+          const auto& stats = colStats.double_statistics();
           if (!std::isfinite(stats.sum())) {
-              result = colStats.hasnull() ?
-                      TruthValue::YES_NO_NULL : TruthValue::YES_NO;
+            result = colStats.has_null() ? TruthValue::YES_NO_NULL : TruthValue::YES_NO;
           } else {
-              result = evaluatePredicateRange(
-                      mOperator,
-                      literal2Double(mLiterals),
-                      stats.minimum(),
-                      stats.maximum(),
-                      colStats.hasnull());
+            result = evaluatePredicateRange(mOperator, literal2Double(mLiterals), stats.minimum(),
+                                            stats.maximum(), colStats.has_null());
           }
         }
         break;
       }
       case PredicateDataType::STRING: {
-        ///TODO: check lowerBound and upperBound as well
-        if (colStats.has_stringstatistics() &&
-            colStats.stringstatistics().has_minimum() &&
-            colStats.stringstatistics().has_maximum()) {
-          const auto& stats = colStats.stringstatistics();
-          result = evaluatePredicateRange(
-            mOperator,
-            literal2String(mLiterals),
-            stats.minimum(),
-            stats.maximum(),
-            colStats.hasnull());
+        /// TODO: check lowerBound and upperBound as well
+        if (colStats.has_string_statistics() && colStats.string_statistics().has_minimum() &&
+            colStats.string_statistics().has_maximum()) {
+          const auto& stats = colStats.string_statistics();
+          result = evaluatePredicateRange(mOperator, literal2String(mLiterals), stats.minimum(),
+                                          stats.maximum(), colStats.has_null());
         }
         break;
       }
       case PredicateDataType::DATE: {
-        if (colStats.has_datestatistics() &&
-            colStats.datestatistics().has_minimum() &&
-            colStats.datestatistics().has_maximum()) {
-          const auto& stats = colStats.datestatistics();
-          result = evaluatePredicateRange(
-            mOperator,
-            literal2Date(mLiterals),
-            stats.minimum(),
-            stats.maximum(),
-            colStats.hasnull());
+        if (colStats.has_date_statistics() && colStats.date_statistics().has_minimum() &&
+            colStats.date_statistics().has_maximum()) {
+          const auto& stats = colStats.date_statistics();
+          result = evaluatePredicateRange(mOperator, literal2Date(mLiterals), stats.minimum(),
+                                          stats.maximum(), colStats.has_null());
         }
         break;
       }
       case PredicateDataType::TIMESTAMP: {
-        if (colStats.has_timestampstatistics() &&
-            colStats.timestampstatistics().has_minimumutc() &&
-            colStats.timestampstatistics().has_maximumutc()) {
-          const auto& stats = colStats.timestampstatistics();
+        if (colStats.has_timestamp_statistics() &&
+            colStats.timestamp_statistics().has_minimum_utc() &&
+            colStats.timestamp_statistics().has_maximum_utc()) {
+          const auto& stats = colStats.timestamp_statistics();
           constexpr int32_t DEFAULT_MIN_NANOS = 0;
           constexpr int32_t DEFAULT_MAX_NANOS = 999999;
-          int32_t minNano = stats.has_minimumnanos() ?
-            stats.minimumnanos() - 1 : DEFAULT_MIN_NANOS;
-          int32_t maxNano = stats.has_maximumnanos() ?
-            stats.maximumnanos() - 1 : DEFAULT_MAX_NANOS;
+          int32_t minNano =
+              stats.has_minimum_nanos() ? stats.minimum_nanos() - 1 : DEFAULT_MIN_NANOS;
+          int32_t maxNano =
+              stats.has_maximum_nanos() ? stats.maximum_nanos() - 1 : DEFAULT_MAX_NANOS;
           Literal::Timestamp minTimestamp(
-            stats.minimumutc() / 1000,
-            static_cast<int32_t>((stats.minimumutc() % 1000) * 1000000) + minNano);
+              stats.minimum_utc() / 1000,
+              static_cast<int32_t>((stats.minimum_utc() % 1000) * 1000000) + minNano);
           Literal::Timestamp maxTimestamp(
-            stats.maximumutc() / 1000,
-            static_cast<int32_t>((stats.maximumutc() % 1000) * 1000000) + maxNano);
-          result = evaluatePredicateRange(
-            mOperator,
-            literal2Timestamp(mLiterals),
-            minTimestamp,
-            maxTimestamp,
-            colStats.hasnull());
+              stats.maximum_utc() / 1000,
+              static_cast<int32_t>((stats.maximum_utc() % 1000) * 1000000) + maxNano);
+          result = evaluatePredicateRange(mOperator, literal2Timestamp(mLiterals), minTimestamp,
+                                          maxTimestamp, colStats.has_null());
         }
         break;
       }
       case PredicateDataType::DECIMAL: {
-        if (colStats.has_decimalstatistics() &&
-            colStats.decimalstatistics().has_minimum() &&
-            colStats.decimalstatistics().has_maximum()) {
-          const auto& stats = colStats.decimalstatistics();
-          result = evaluatePredicateRange(
-            mOperator,
-            literal2Decimal(mLiterals),
-            Decimal(stats.minimum()),
-            Decimal(stats.maximum()),
-            colStats.hasnull());
+        if (colStats.has_decimal_statistics() && colStats.decimal_statistics().has_minimum() &&
+            colStats.decimal_statistics().has_maximum()) {
+          const auto& stats = colStats.decimal_statistics();
+          result = evaluatePredicateRange(mOperator, literal2Decimal(mLiterals),
+                                          Decimal(stats.minimum()), Decimal(stats.maximum()),
+                                          colStats.has_null());
         }
         break;
       }
-      case PredicateDataType::BOOLEAN:  {
-        if (colStats.has_bucketstatistics()) {
+      case PredicateDataType::BOOLEAN: {
+        if (colStats.has_bucket_statistics()) {
           result = evaluateBoolPredicate(mOperator, mLiterals, colStats);
         }
         break;
@@ -661,7 +592,7 @@ namespace orc {
     }
 
     // make sure null literal is respected for IN operator
-    if (mOperator == Operator::IN && colStats.hasnull()) {
+    if (mOperator == Operator::IN && colStats.has_null()) {
       for (const auto& literal : mLiterals) {
         if (literal.isNull()) {
           result = TruthValue::YES_NO_NULL;
@@ -673,29 +604,24 @@ namespace orc {
     return result;
   }
 
-  static bool shouldEvaluateBloomFilter(PredicateLeaf::Operator op,
-                                        TruthValue result,
-                                        const BloomFilter * bloomFilter) {
+  static bool shouldEvaluateBloomFilter(PredicateLeaf::Operator op, TruthValue result,
+                                        const BloomFilter* bloomFilter) {
     // evaluate bloom filter only when
     // 1) Bloom filter is available
     // 2) Min/Max evaluation yield YES or MAYBE
     // 3) Predicate is EQUALS or IN list
     // 4) Decimal type stores its string representation
     //    but has inconsistency in trailing zeros
-    if (bloomFilter != nullptr
-        && result != TruthValue::NO_NULL && result != TruthValue::NO
-        && (op == PredicateLeaf::Operator::EQUALS
-            || op == PredicateLeaf::Operator::NULL_SAFE_EQUALS
-            || op == PredicateLeaf::Operator::IN)) {
+    if (bloomFilter != nullptr && result != TruthValue::NO_NULL && result != TruthValue::NO &&
+        (op == PredicateLeaf::Operator::EQUALS || op == PredicateLeaf::Operator::NULL_SAFE_EQUALS ||
+         op == PredicateLeaf::Operator::IN)) {
       return true;
     }
     return false;
   }
 
-  static TruthValue checkInBloomFilter(PredicateLeaf::Operator,
-                                       PredicateDataType type,
-                                       const Literal& literal,
-                                       const BloomFilter * bf,
+  static TruthValue checkInBloomFilter(PredicateLeaf::Operator, PredicateDataType type,
+                                       const Literal& literal, const BloomFilter* bf,
                                        bool hasNull) {
     TruthValue result = hasNull ? TruthValue::NO_NULL : TruthValue::NO;
     if (literal.isNull()) {
@@ -715,7 +641,7 @@ namespace orc {
       }
     } else if (type == PredicateDataType::DECIMAL) {
       std::string decimal = literal.getDecimal().toString(true);
-      if (bf->testBytes(decimal.c_str(),  static_cast<int64_t>(decimal.size()))) {
+      if (bf->testBytes(decimal.c_str(), static_cast<int64_t>(decimal.size()))) {
         result = TruthValue::YES_NO_NULL;
       }
     } else if (type == PredicateDataType::TIMESTAMP) {
@@ -737,25 +663,20 @@ namespace orc {
     return result;
   }
 
-  TruthValue PredicateLeaf::evaluatePredicateBloomFiter(const BloomFilter * bf,
-                                                        bool hasNull) const {
+  TruthValue PredicateLeaf::evaluatePredicateBloomFiter(const BloomFilter* bf, bool hasNull) const {
     switch (mOperator) {
       case Operator::NULL_SAFE_EQUALS:
         // null safe equals does not return *_NULL variant.
         // So set hasNull to false
-        return checkInBloomFilter(
-          mOperator, mType, mLiterals.front(), bf, false);
+        return checkInBloomFilter(mOperator, mType, mLiterals.front(), bf, false);
       case Operator::EQUALS:
-        return checkInBloomFilter(
-          mOperator, mType, mLiterals.front(), bf, hasNull);
+        return checkInBloomFilter(mOperator, mType, mLiterals.front(), bf, hasNull);
       case Operator::IN:
-        for (const auto &literal : mLiterals) {
+        for (const auto& literal : mLiterals) {
           // if at least one value in IN list exist in bloom filter,
           // qualify the row group/stripe
-          TruthValue result = checkInBloomFilter(
-            mOperator, mType, literal, bf, hasNull);
-          if (result == TruthValue::YES_NO_NULL ||
-              result == TruthValue::YES_NO) {
+          TruthValue result = checkInBloomFilter(mOperator, mType, literal, bf, hasNull);
+          if (result == TruthValue::YES_NO_NULL || result == TruthValue::YES_NO) {
             return result;
           }
         }
@@ -771,7 +692,7 @@ namespace orc {
 
   TruthValue PredicateLeaf::evaluate(const WriterVersion writerVersion,
                                      const proto::ColumnStatistics& colStats,
-                                     const BloomFilter * bloomFilter) const {
+                                     const BloomFilter* bloomFilter) const {
     // files written before ORC-135 stores timestamp wrt to local timezone
     // causing issues with PPD. disable PPD for timestamp for all old files
     if (mType == PredicateDataType::TIMESTAMP) {
@@ -780,14 +701,13 @@ namespace orc {
       }
     }
 
-    bool allNull = colStats.hasnull() && colStats.numberofvalues() == 0;
-    if (mOperator == Operator::IS_NULL || ((
-        mOperator == Operator::EQUALS ||
-        mOperator == Operator::NULL_SAFE_EQUALS) &&
-        mLiterals.at(0).isNull())) {
+    bool allNull = colStats.has_null() && colStats.number_of_values() == 0;
+    if (mOperator == Operator::IS_NULL ||
+        ((mOperator == Operator::EQUALS || mOperator == Operator::NULL_SAFE_EQUALS) &&
+         mLiterals.at(0).isNull())) {
       // IS_NULL operator does not need to check min/max stats and bloom filter
-      return allNull ? TruthValue::YES :
-             (colStats.hasnull() ? TruthValue::YES_NO : TruthValue::NO);
+      return allNull ? TruthValue::YES
+                     : (colStats.has_null() ? TruthValue::YES_NO : TruthValue::NO);
     } else if (allNull) {
       // if we don't have any value, everything must have been null
       return TruthValue::IS_NULL;
@@ -795,10 +715,10 @@ namespace orc {
 
     TruthValue result = evaluatePredicateMinMax(colStats);
     if (shouldEvaluateBloomFilter(mOperator, result, bloomFilter)) {
-      return evaluatePredicateBloomFiter(bloomFilter, colStats.hasnull());
+      return evaluatePredicateBloomFiter(bloomFilter, colStats.has_null());
     } else {
       return result;
     }
   }
 
-} // namespace orc
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/sargs/PredicateLeaf.hh b/contrib/libs/apache/orc/c++/src/sargs/PredicateLeaf.hh
index 99791cf976..21ed456155 100644
--- a/contrib/libs/apache/orc/c++/src/sargs/PredicateLeaf.hh
+++ b/contrib/libs/apache/orc/c++/src/sargs/PredicateLeaf.hh
@@ -19,18 +19,17 @@
 #ifndef ORC_PREDICATELEAF_HH
 #define ORC_PREDICATELEAF_HH
 
-#include "wrap/orc-proto-wrapper.hh"
 #include "orc/Common.hh"
 #include "orc/sargs/Literal.hh"
 #include "orc/sargs/TruthValue.hh"
+#include "wrap/orc-proto-wrapper.hh"
 
 #include <string>
 #include <vector>
 
 namespace orc {
 
-  static constexpr uint64_t INVALID_COLUMN_ID =
-    std::numeric_limits<uint64_t>::max();
+  static constexpr uint64_t INVALID_COLUMN_ID = std::numeric_limits<uint64_t>::max();
 
   class BloomFilter;
 
@@ -38,7 +37,7 @@ namespace orc {
    * The primitive predicates that form a SearchArgument.
    */
   class PredicateLeaf {
-  public:
+   public:
     /**
      * The possible operators for predicates. To get the opposites, construct
      * an expression with a not operator.
@@ -55,9 +54,9 @@ namespace orc {
 
     // The possible types for sargs.
     enum class Type {
-      LONG = 0,     // all of the integer types
-      FLOAT,        // float and double
-      STRING,       // string, char, varchar
+      LONG = 0,  // all of the integer types
+      FLOAT,     // float and double
+      STRING,    // string, char, varchar
       DATE,
       DECIMAL,
       TIMESTAMP,
@@ -66,34 +65,20 @@ namespace orc {
 
     PredicateLeaf() = default;
 
-    PredicateLeaf(Operator op,
-                  PredicateDataType type,
-                  const std::string& colName,
-                  Literal literal);
+    PredicateLeaf(Operator op, PredicateDataType type, const std::string& colName, Literal literal);
 
-    PredicateLeaf(Operator op,
-                  PredicateDataType type,
-                  uint64_t columnId,
-                  Literal literal);
+    PredicateLeaf(Operator op, PredicateDataType type, uint64_t columnId, Literal literal);
 
-    PredicateLeaf(Operator op,
-                  PredicateDataType type,
-                  const std::string& colName,
+    PredicateLeaf(Operator op, PredicateDataType type, const std::string& colName,
                   const std::initializer_list<Literal>& literalList);
 
-    PredicateLeaf(Operator op,
-                  PredicateDataType type,
-                  uint64_t columnId,
+    PredicateLeaf(Operator op, PredicateDataType type, uint64_t columnId,
                   const std::initializer_list<Literal>& literalList);
 
-    PredicateLeaf(Operator op,
-                  PredicateDataType type,
-                  const std::string& colName,
+    PredicateLeaf(Operator op, PredicateDataType type, const std::string& colName,
                   const std::vector<Literal>& literalList);
 
-    PredicateLeaf(Operator op,
-                  PredicateDataType type,
-                  uint64_t columnId,
+    PredicateLeaf(Operator op, PredicateDataType type, uint64_t columnId,
                   const std::vector<Literal>& literalList);
 
     /**
@@ -134,17 +119,18 @@ namespace orc {
     /**
      * Evaluate current PredicateLeaf based on ColumnStatistics and BloomFilter
      */
-    TruthValue evaluate(const WriterVersion writerVersion,
-                        const proto::ColumnStatistics& colStats,
-                        const BloomFilter * bloomFilter) const;
+    TruthValue evaluate(const WriterVersion writerVersion, const proto::ColumnStatistics& colStats,
+                        const BloomFilter* bloomFilter) const;
 
     std::string toString() const;
 
     bool operator==(const PredicateLeaf& r) const;
 
-    size_t getHashCode() const { return mHashCode; }
+    size_t getHashCode() const {
+      return mHashCode;
+    }
 
-  private:
+   private:
     size_t hashCode() const;
 
     void validate() const;
@@ -152,13 +138,11 @@ namespace orc {
 
     std::string columnDebugString() const;
 
-    TruthValue evaluatePredicateMinMax(
-                                 const proto::ColumnStatistics& colStats) const;
+    TruthValue evaluatePredicateMinMax(const proto::ColumnStatistics& colStats) const;
 
-    TruthValue evaluatePredicateBloomFiter(const BloomFilter * bloomFilter,
-                                           bool hasNull) const;
+    TruthValue evaluatePredicateBloomFiter(const BloomFilter* bloomFilter, bool hasNull) const;
 
-  private:
+   private:
     Operator mOperator;
     PredicateDataType mType;
     std::string mColumnName;
@@ -180,6 +164,6 @@ namespace orc {
     }
   };
 
-} // namespace orc
+}  // namespace orc
 
-#endif //ORC_PREDICATELEAF_HH
+#endif  // ORC_PREDICATELEAF_HH
diff --git a/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc b/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc
index 42a554f5ca..7032a88126 100644
--- a/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc
+++ b/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc
@@ -22,8 +22,7 @@
 namespace orc {
 
   // find column id from column name
-  uint64_t SargsApplier::findColumn(const Type& type,
-                                    const std::string& colName) {
+  uint64_t SargsApplier::findColumn(const Type& type, const std::string& colName) {
     for (uint64_t i = 0; i != type.getSubtypeCount(); ++i) {
       // Only STRUCT type has field names
       if (type.getKind() == STRUCT && type.getFieldName(i) == colName) {
@@ -38,19 +37,18 @@ namespace orc {
     return INVALID_COLUMN_ID;
   }
 
-  SargsApplier::SargsApplier(const Type& type,
-                             const SearchArgument * searchArgument,
-                             uint64_t rowIndexStride,
-                             WriterVersion writerVersion)
-                             : mType(type)
-                             , mSearchArgument(searchArgument)
-                             , mRowIndexStride(rowIndexStride)
-                             , mWriterVersion(writerVersion)
-                             , mStats(0, 0)
-                             , mHasEvaluatedFileStats(false)
-                             , mFileStatsEvalResult(true) {
-    const SearchArgumentImpl * sargs =
-      dynamic_cast<const SearchArgumentImpl *>(mSearchArgument);
+  SargsApplier::SargsApplier(const Type& type, const SearchArgument* searchArgument,
+                             uint64_t rowIndexStride, WriterVersion writerVersion,
+                             ReaderMetrics* metrics, const SchemaEvolution* schemaEvolution)
+      : mType(type),
+        mSearchArgument(searchArgument),
+        mSchemaEvolution(schemaEvolution),
+        mRowIndexStride(rowIndexStride),
+        mWriterVersion(writerVersion),
+        mHasEvaluatedFileStats(false),
+        mFileStatsEvalResult(true),
+        mMetrics(metrics) {
+    const SearchArgumentImpl* sargs = dynamic_cast<const SearchArgumentImpl*>(mSearchArgument);
 
     // find the mapping from predicate leaves to columns
     const std::vector<PredicateLeaf>& leaves = sargs->getLeaves();
@@ -64,13 +62,11 @@ namespace orc {
     }
   }
 
-  bool SargsApplier::pickRowGroups(
-               uint64_t rowsInStripe,
-               const std::unordered_map<uint64_t, proto::RowIndex>& rowIndexes,
-               const std::map<uint32_t, BloomFilterIndex>& bloomFilters) {
+  bool SargsApplier::pickRowGroups(uint64_t rowsInStripe,
+                                   const std::unordered_map<uint64_t, proto::RowIndex>& rowIndexes,
+                                   const std::map<uint32_t, BloomFilterIndex>& bloomFilters) {
     // init state of each row group
-    uint64_t groupsInStripe =
-      (rowsInStripe + mRowIndexStride - 1) / mRowIndexStride;
+    uint64_t groupsInStripe = (rowsInStripe + mRowIndexStride - 1) / mRowIndexStride;
     mNextSkippedRows.resize(groupsInStripe);
     mTotalRowsInStripe = rowsInStripe;
 
@@ -79,10 +75,8 @@ namespace orc {
       return true;
     }
 
-    const auto& leaves =
-      dynamic_cast<const SearchArgumentImpl *>(mSearchArgument)->getLeaves();
-    std::vector<TruthValue> leafValues(
-      leaves.size(), TruthValue::YES_NO_NULL);
+    const auto& leaves = dynamic_cast<const SearchArgumentImpl*>(mSearchArgument)->getLeaves();
+    std::vector<TruthValue> leafValues(leaves.size(), TruthValue::YES_NO_NULL);
     mHasSelected = false;
     mHasSkipped = false;
     uint64_t nextSkippedRowGroup = groupsInStripe;
@@ -95,10 +89,13 @@ namespace orc {
         if (columnIdx == INVALID_COLUMN_ID || rowIndexIter == rowIndexes.cend()) {
           // this column does not exist in current file
           leafValues[pred] = TruthValue::YES_NO_NULL;
+        } else if (mSchemaEvolution && !mSchemaEvolution->isSafePPDConversion(columnIdx)) {
+          // cannot evaluate predicate when ppd is not safe
+          leafValues[pred] = TruthValue::YES_NO_NULL;
         } else {
           // get column statistics
           const proto::ColumnStatistics& statistics =
-            rowIndexIter->second.entry(static_cast<int>(rowGroup)).statistics();
+              rowIndexIter->second.entry(static_cast<int>(rowGroup)).statistics();
 
           // get bloom filter
           std::shared_ptr<BloomFilter> bloomFilter;
@@ -107,9 +104,7 @@ namespace orc {
             bloomFilter = iter->second.entries.at(rowGroup);
           }
 
-          leafValues[pred] = leaves[pred].evaluate(mWriterVersion,
-                                                   statistics,
-                                                   bloomFilter.get());
+          leafValues[pred] = leaves[pred].evaluate(mWriterVersion, statistics, bloomFilter.get());
         }
       }
 
@@ -118,69 +113,76 @@ namespace orc {
         mNextSkippedRows[rowGroup] = 0;
         nextSkippedRowGroup = rowGroup;
       } else {
-        mNextSkippedRows[rowGroup] = (nextSkippedRowGroup == groupsInStripe) ?
-                                     rowsInStripe : (nextSkippedRowGroup * mRowIndexStride);
+        mNextSkippedRows[rowGroup] = (nextSkippedRowGroup == groupsInStripe)
+                                         ? rowsInStripe
+                                         : (nextSkippedRowGroup * mRowIndexStride);
       }
       mHasSelected |= needed;
       mHasSkipped |= !needed;
     } while (rowGroup != 0);
 
     // update stats
-    mStats.first = std::accumulate(
-      mNextSkippedRows.cbegin(), mNextSkippedRows.cend(), mStats.first,
-      [](bool rg, uint64_t s) { return rg ? 1 : 0 + s; });
-    mStats.second += groupsInStripe;
+    uint64_t selectedRGs = std::accumulate(
+        mNextSkippedRows.cbegin(), mNextSkippedRows.cend(), 0UL,
+        [](uint64_t initVal, uint64_t rg) { return rg > 0 ? initVal + 1 : initVal; });
+    if (mMetrics != nullptr) {
+      mMetrics->SelectedRowGroupCount.fetch_add(selectedRGs);
+      mMetrics->EvaluatedRowGroupCount.fetch_add(groupsInStripe);
+    }
 
     return mHasSelected;
   }
 
-  bool SargsApplier::evaluateColumnStatistics(
-                                    const PbColumnStatistics& colStats) const {
-    const SearchArgumentImpl * sargs =
-      dynamic_cast<const SearchArgumentImpl *>(mSearchArgument);
+  bool SargsApplier::evaluateColumnStatistics(const PbColumnStatistics& colStats) const {
+    const SearchArgumentImpl* sargs = dynamic_cast<const SearchArgumentImpl*>(mSearchArgument);
     if (sargs == nullptr) {
       throw InvalidArgument("Failed to cast to SearchArgumentImpl");
     }
 
     const std::vector<PredicateLeaf>& leaves = sargs->getLeaves();
-    std::vector<TruthValue> leafValues(
-      leaves.size(), TruthValue::YES_NO_NULL);
+    std::vector<TruthValue> leafValues(leaves.size(), TruthValue::YES_NO_NULL);
 
     for (size_t pred = 0; pred != leaves.size(); ++pred) {
       uint64_t columnId = mFilterColumns[pred];
-      if (columnId != INVALID_COLUMN_ID &&
-          colStats.size() > static_cast<int>(columnId)) {
-        leafValues[pred] = leaves[pred].evaluate(
-          mWriterVersion, colStats.Get(static_cast<int>(columnId)), nullptr);
+      if (columnId != INVALID_COLUMN_ID && colStats.size() > static_cast<int>(columnId)) {
+        leafValues[pred] = leaves[pred].evaluate(mWriterVersion,
+                                                 colStats.Get(static_cast<int>(columnId)), nullptr);
       }
     }
 
     return isNeeded(mSearchArgument->evaluate(leafValues));
   }
 
-  bool SargsApplier::evaluateStripeStatistics(
-                            const proto::StripeStatistics& stripeStats) {
-    if (stripeStats.colstats_size() == 0) {
+  bool SargsApplier::evaluateStripeStatistics(const proto::StripeStatistics& stripeStats,
+                                              uint64_t stripeRowGroupCount) {
+    if (stripeStats.col_stats_size() == 0) {
       return true;
     }
 
-    bool ret = evaluateColumnStatistics(stripeStats.colstats());
+    bool ret = evaluateColumnStatistics(stripeStats.col_stats());
     if (!ret) {
       // reset mNextSkippedRows when the current stripe does not satisfy the PPD
       mNextSkippedRows.clear();
+      if (mMetrics != nullptr) {
+        mMetrics->EvaluatedRowGroupCount.fetch_add(stripeRowGroupCount);
+      }
     }
     return ret;
   }
 
-  bool SargsApplier::evaluateFileStatistics(const proto::Footer& footer) {
+  bool SargsApplier::evaluateFileStatistics(const proto::Footer& footer,
+                                            uint64_t numRowGroupsInStripeRange) {
     if (!mHasEvaluatedFileStats) {
       if (footer.statistics_size() == 0) {
         mFileStatsEvalResult = true;
       } else {
         mFileStatsEvalResult = evaluateColumnStatistics(footer.statistics());
+        if (!mFileStatsEvalResult && mMetrics != nullptr) {
+          mMetrics->EvaluatedRowGroupCount.fetch_add(numRowGroupsInStripeRange);
+        }
       }
       mHasEvaluatedFileStats = true;
     }
     return mFileStatsEvalResult;
   }
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.hh b/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.hh
index d8bdf852d0..73703dcf6b 100644
--- a/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.hh
+++ b/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.hh
@@ -19,62 +19,78 @@
 #ifndef ORC_SARGSAPPLIER_HH
 #define ORC_SARGSAPPLIER_HH
 
-#include "wrap/orc-proto-wrapper.hh"
 #include <orc/Common.hh>
 #include "orc/BloomFilter.hh"
+#include "orc/Reader.hh"
 #include "orc/Type.hh"
+#include "wrap/orc-proto-wrapper.hh"
 
 #include "sargs/SearchArgument.hh"
 
+#include "SchemaEvolution.hh"
+
 #include <unordered_map>
 
 namespace orc {
 
   class SargsApplier {
-  public:
-    SargsApplier(const Type& type,
-                 const SearchArgument * searchArgument,
-                 uint64_t rowIndexStride,
-                 WriterVersion writerVersion);
+   public:
+    SargsApplier(const Type& type, const SearchArgument* searchArgument, uint64_t rowIndexStride,
+                 WriterVersion writerVersion, ReaderMetrics* metrics,
+                 const SchemaEvolution* schemaEvolution = nullptr);
 
     /**
      * Evaluate search argument on file statistics
+     * If file statistics don't satisfy the sargs,
+     * the EvaluatedRowGroupCount of Reader Metrics will be updated.
+     * Otherwise, Reader Metrics will not be updated and
+     * will require further evaluation.
      * @return true if file statistics satisfy the sargs
      */
-    bool evaluateFileStatistics(const proto::Footer& footer);
+    bool evaluateFileStatistics(const proto::Footer& footer, uint64_t numRowGroupsInStripeRange);
 
     /**
      * Evaluate search argument on stripe statistics
+     * If stripe statistics don't satisfy the sargs,
+     * the EvaluatedRowGroupCount of Reader Metrics will be updated.
+     * Otherwise, Reader Metrics will not be updated and
+     * will require further evaluation.
      * @return true if stripe statistics satisfy the sargs
      */
-    bool evaluateStripeStatistics(const proto::StripeStatistics& stripeStats);
+    bool evaluateStripeStatistics(const proto::StripeStatistics& stripeStats,
+                                  uint64_t stripeRowGroupCount);
 
     /**
      * TODO: use proto::RowIndex and proto::BloomFilter to do the evaluation
      * Pick the row groups that we need to load from the current stripe.
      * @return true if any row group is selected
      */
-    bool pickRowGroups(
-                      uint64_t rowsInStripe,
-                      const std::unordered_map<uint64_t, proto::RowIndex>& rowIndexes,
-                      const std::map<uint32_t, BloomFilterIndex>& bloomFilters);
+    bool pickRowGroups(uint64_t rowsInStripe,
+                       const std::unordered_map<uint64_t, proto::RowIndex>& rowIndexes,
+                       const std::map<uint32_t, BloomFilterIndex>& bloomFilters);
 
     /**
      * Return a vector of the next skipped row for each RowGroup. Each value is the row id
      * in stripe. 0 means the current RowGroup is entirely skipped.
      * Only valid after invoking pickRowGroups().
      */
-    const std::vector<uint64_t>& getNextSkippedRows() const { return mNextSkippedRows; }
+    const std::vector<uint64_t>& getNextSkippedRows() const {
+      return mNextSkippedRows;
+    }
 
     /**
      * Indicate whether any row group is selected in the last evaluation
      */
-    bool hasSelected() const { return mHasSelected; }
+    bool hasSelected() const {
+      return mHasSelected;
+    }
 
     /**
      * Indicate whether any row group is skipped in the last evaluation
      */
-    bool hasSkipped() const { return mHasSkipped; }
+    bool hasSkipped() const {
+      return mHasSkipped;
+    }
 
     /**
      * Whether any row group from current row in the stripe matches PPD.
@@ -90,13 +106,17 @@ namespace orc {
     }
 
     std::pair<uint64_t, uint64_t> getStats() const {
-      return mStats;
+      if (mMetrics != nullptr) {
+        return std::make_pair(mMetrics->SelectedRowGroupCount.load(),
+                              mMetrics->EvaluatedRowGroupCount.load());
+      } else {
+        return {0, 0};
+      }
     }
 
-  private:
+   private:
     // evaluate column statistics in the form of protobuf::RepeatedPtrField
-    typedef ::google::protobuf::RepeatedPtrField<proto::ColumnStatistics>
-      PbColumnStatistics;
+    typedef ::google::protobuf::RepeatedPtrField<proto::ColumnStatistics> PbColumnStatistics;
     bool evaluateColumnStatistics(const PbColumnStatistics& colStats) const;
 
     friend class TestSargsApplier_findColumnTest_Test;
@@ -104,9 +124,10 @@ namespace orc {
     friend class TestSargsApplier_findMapColumnTest_Test;
     static uint64_t findColumn(const Type& type, const std::string& colName);
 
-  private:
+   private:
     const Type& mType;
-    const SearchArgument * mSearchArgument;
+    const SearchArgument* mSearchArgument;
+    const SchemaEvolution* mSchemaEvolution;
     uint64_t mRowIndexStride;
     WriterVersion mWriterVersion;
     // column ids for each predicate leaf in the search argument
@@ -119,13 +140,14 @@ namespace orc {
     uint64_t mTotalRowsInStripe;
     bool mHasSelected;
     bool mHasSkipped;
-    // keep stats of selected RGs and evaluated RGs
-    std::pair<uint64_t, uint64_t> mStats;
     // store result of file stats evaluation
     bool mHasEvaluatedFileStats;
     bool mFileStatsEvalResult;
+    // use the SelectedRowGroupCount and EvaluatedRowGroupCount to
+    // keep stats of selected RGs and evaluated RGs
+    ReaderMetrics* mMetrics;
   };
 
-}
+}  // namespace orc
 
-#endif //ORC_SARGSAPPLIER_HH
+#endif  // ORC_SARGSAPPLIER_HH
diff --git a/contrib/libs/apache/orc/c++/src/sargs/SearchArgument.cc b/contrib/libs/apache/orc/c++/src/sargs/SearchArgument.cc
index f6abb316b5..806727f0a0 100644
--- a/contrib/libs/apache/orc/c++/src/sargs/SearchArgument.cc
+++ b/contrib/libs/apache/orc/c++/src/sargs/SearchArgument.cc
@@ -33,14 +33,12 @@ namespace orc {
     return mLeaves;
   }
 
-  const ExpressionTree * SearchArgumentImpl::getExpression() const {
+  const ExpressionTree* SearchArgumentImpl::getExpression() const {
     return mExpressionTree.get();
   }
 
-  TruthValue SearchArgumentImpl::evaluate(
-                                  const std::vector<TruthValue>& leaves) const {
-    return mExpressionTree == nullptr ?
-      TruthValue::YES : mExpressionTree->evaluate(leaves);
+  TruthValue SearchArgumentImpl::evaluate(const std::vector<TruthValue>& leaves) const {
+    return mExpressionTree == nullptr ? TruthValue::YES : mExpressionTree->evaluate(leaves);
   }
 
   std::string SearchArgumentImpl::toString() const {
@@ -61,8 +59,7 @@ namespace orc {
     mCurrTree.push_back(mRoot);
   }
 
-  SearchArgumentBuilder&
-  SearchArgumentBuilderImpl::start(ExpressionTree::Operator op) {
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::start(ExpressionTree::Operator op) {
     TreeNode node = std::make_shared<ExpressionTree>(op);
     mCurrTree.front()->addChild(node);
     mCurrTree.push_front(node);
@@ -84,13 +81,13 @@ namespace orc {
   SearchArgumentBuilder& SearchArgumentBuilderImpl::end() {
     TreeNode& current = mCurrTree.front();
     if (current->getChildren().empty()) {
-      throw std::invalid_argument("Cannot create expression " +
-        mRoot->toString() + " with no children.");
+      throw std::invalid_argument("Cannot create expression " + mRoot->toString() +
+                                  " with no children.");
     }
     if (current->getOperator() == ExpressionTree::Operator::NOT &&
         current->getChildren().size() != 1) {
-      throw std::invalid_argument("Can't create NOT expression " +
-        current->toString() + " with more than 1 child.");
+      throw std::invalid_argument("Can't create NOT expression " + current->toString() +
+                                  " with more than 1 child.");
     }
     mCurrTree.pop_front();
     return *this;
@@ -110,16 +107,14 @@ namespace orc {
     return columnId == INVALID_COLUMN_ID;
   }
 
-  template<typename T>
-  SearchArgumentBuilder&
-  SearchArgumentBuilderImpl::compareOperator(PredicateLeaf::Operator op,
-                                             T column,
-                                             PredicateDataType type,
-                                             Literal literal) {
+  template <typename T>
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::compareOperator(PredicateLeaf::Operator op,
+                                                                    T column,
+                                                                    PredicateDataType type,
+                                                                    Literal literal) {
     TreeNode parent = mCurrTree.front();
     if (isInvalidColumn(column)) {
-      parent->addChild(
-        std::make_shared<ExpressionTree>(TruthValue::YES_NO_NULL));
+      parent->addChild(std::make_shared<ExpressionTree>(TruthValue::YES_NO_NULL));
     } else {
       PredicateLeaf leaf(op, type, column, literal);
       parent->addChild(std::make_shared<ExpressionTree>(addLeaf(leaf)));
@@ -130,29 +125,25 @@ namespace orc {
   SearchArgumentBuilder& SearchArgumentBuilderImpl::lessThan(const std::string& column,
                                                              PredicateDataType type,
                                                              Literal literal) {
-    return compareOperator(
-      PredicateLeaf::Operator::LESS_THAN, column, type, literal);
+    return compareOperator(PredicateLeaf::Operator::LESS_THAN, column, type, literal);
   }
 
   SearchArgumentBuilder& SearchArgumentBuilderImpl::lessThan(uint64_t columnId,
                                                              PredicateDataType type,
                                                              Literal literal) {
-    return compareOperator(
-      PredicateLeaf::Operator::LESS_THAN, columnId, type, literal);
+    return compareOperator(PredicateLeaf::Operator::LESS_THAN, columnId, type, literal);
   }
 
   SearchArgumentBuilder& SearchArgumentBuilderImpl::lessThanEquals(const std::string& column,
                                                                    PredicateDataType type,
                                                                    Literal literal) {
-    return compareOperator(
-      PredicateLeaf::Operator::LESS_THAN_EQUALS, column, type, literal);
+    return compareOperator(PredicateLeaf::Operator::LESS_THAN_EQUALS, column, type, literal);
   }
 
   SearchArgumentBuilder& SearchArgumentBuilderImpl::lessThanEquals(uint64_t columnId,
                                                                    PredicateDataType type,
                                                                    Literal literal) {
-    return compareOperator(
-      PredicateLeaf::Operator::LESS_THAN_EQUALS, columnId, type, literal);
+    return compareOperator(PredicateLeaf::Operator::LESS_THAN_EQUALS, columnId, type, literal);
   }
 
   SearchArgumentBuilder& SearchArgumentBuilderImpl::equals(const std::string& column,
@@ -161,8 +152,7 @@ namespace orc {
     if (literal.isNull()) {
       return isNull(column, type);
     } else {
-      return compareOperator(
-        PredicateLeaf::Operator::EQUALS, column, type, literal);
+      return compareOperator(PredicateLeaf::Operator::EQUALS, column, type, literal);
     }
   }
 
@@ -172,54 +162,46 @@ namespace orc {
     if (literal.isNull()) {
       return isNull(columnId, type);
     } else {
-      return compareOperator(
-        PredicateLeaf::Operator::EQUALS, columnId, type, literal);
+      return compareOperator(PredicateLeaf::Operator::EQUALS, columnId, type, literal);
     }
   }
 
   SearchArgumentBuilder& SearchArgumentBuilderImpl::nullSafeEquals(const std::string& column,
                                                                    PredicateDataType type,
                                                                    Literal literal) {
-    return compareOperator(
-      PredicateLeaf::Operator::NULL_SAFE_EQUALS, column, type, literal);
+    return compareOperator(PredicateLeaf::Operator::NULL_SAFE_EQUALS, column, type, literal);
   }
 
   SearchArgumentBuilder& SearchArgumentBuilderImpl::nullSafeEquals(uint64_t columnId,
                                                                    PredicateDataType type,
                                                                    Literal literal) {
-    return compareOperator(
-      PredicateLeaf::Operator::NULL_SAFE_EQUALS, columnId, type, literal);
+    return compareOperator(PredicateLeaf::Operator::NULL_SAFE_EQUALS, columnId, type, literal);
   }
 
-  template<typename T, typename CONTAINER>
-  SearchArgumentBuilder& SearchArgumentBuilderImpl::addChildForIn(T column,
-                                                PredicateDataType type,
-                                                const CONTAINER& literals) {
-    TreeNode &parent = mCurrTree.front();
+  template <typename T, typename CONTAINER>
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::addChildForIn(T column, PredicateDataType type,
+                                                                  const CONTAINER& literals) {
+    TreeNode& parent = mCurrTree.front();
     if (isInvalidColumn(column)) {
-      parent->addChild(
-        std::make_shared<ExpressionTree>((TruthValue::YES_NO_NULL)));
+      parent->addChild(std::make_shared<ExpressionTree>((TruthValue::YES_NO_NULL)));
     } else {
       if (literals.size() == 0) {
-        throw std::invalid_argument(
-          "Can't create in expression with no arguments");
+        throw std::invalid_argument("Can't create in expression with no arguments");
       }
-      PredicateLeaf leaf(
-        PredicateLeaf::Operator::IN, type, column, literals);
+      PredicateLeaf leaf(PredicateLeaf::Operator::IN, type, column, literals);
       parent->addChild(std::make_shared<ExpressionTree>(addLeaf(leaf)));
     }
     return *this;
   }
 
-  SearchArgumentBuilder& SearchArgumentBuilderImpl::in(const std::string& column,
-                                                       PredicateDataType type,
-                                                       const std::initializer_list<Literal>& literals) {
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::in(
+      const std::string& column, PredicateDataType type,
+      const std::initializer_list<Literal>& literals) {
     return addChildForIn(column, type, literals);
   }
 
-  SearchArgumentBuilder& SearchArgumentBuilderImpl::in(uint64_t columnId,
-                                                       PredicateDataType type,
-                                                       const std::initializer_list<Literal>& literals) {
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::in(
+      uint64_t columnId, PredicateDataType type, const std::initializer_list<Literal>& literals) {
     return addChildForIn(columnId, type, literals);
   }
 
@@ -229,23 +211,19 @@ namespace orc {
     return addChildForIn(column, type, literals);
   }
 
-  SearchArgumentBuilder& SearchArgumentBuilderImpl::in(uint64_t columnId,
-                                                       PredicateDataType type,
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::in(uint64_t columnId, PredicateDataType type,
                                                        const std::vector<Literal>& literals) {
     return addChildForIn(columnId, type, literals);
   }
 
-  template<typename T>
-  SearchArgumentBuilder& SearchArgumentBuilderImpl::addChildForIsNull(T column, PredicateDataType type) {
+  template <typename T>
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::addChildForIsNull(T column,
+                                                                      PredicateDataType type) {
     TreeNode& parent = mCurrTree.front();
     if (isInvalidColumn(column)) {
-      parent->addChild(
-        std::make_shared<ExpressionTree>(TruthValue::YES_NO_NULL));
+      parent->addChild(std::make_shared<ExpressionTree>(TruthValue::YES_NO_NULL));
     } else {
-      PredicateLeaf leaf(PredicateLeaf::Operator::IS_NULL,
-                         type,
-                         column,
-                         {});
+      PredicateLeaf leaf(PredicateLeaf::Operator::IS_NULL, type, column, {});
       parent->addChild(std::make_shared<ExpressionTree>(addLeaf(leaf)));
     }
     return *this;
@@ -261,34 +239,29 @@ namespace orc {
     return addChildForIsNull(columnId, type);
   }
 
-  template<typename T>
+  template <typename T>
   SearchArgumentBuilder& SearchArgumentBuilderImpl::addChildForBetween(T column,
                                                                        PredicateDataType type,
-                                                                       Literal lower, Literal upper) {
+                                                                       Literal lower,
+                                                                       Literal upper) {
     TreeNode& parent = mCurrTree.front();
     if (isInvalidColumn(column)) {
-      parent->addChild(
-        std::make_shared<ExpressionTree>(TruthValue::YES_NO_NULL));
+      parent->addChild(std::make_shared<ExpressionTree>(TruthValue::YES_NO_NULL));
     } else {
-      PredicateLeaf leaf(PredicateLeaf::Operator::BETWEEN,
-                         type,
-                         column,
-                         { lower, upper });
+      PredicateLeaf leaf(PredicateLeaf::Operator::BETWEEN, type, column, {lower, upper});
       parent->addChild(std::make_shared<ExpressionTree>(addLeaf(leaf)));
     }
     return *this;
   }
 
   SearchArgumentBuilder& SearchArgumentBuilderImpl::between(const std::string& column,
-                                                            PredicateDataType type,
-                                                            Literal lower,
+                                                            PredicateDataType type, Literal lower,
                                                             Literal upper) {
     return addChildForBetween(column, type, lower, upper);
   }
 
   SearchArgumentBuilder& SearchArgumentBuilderImpl::between(uint64_t columnId,
-                                                            PredicateDataType type,
-                                                            Literal lower,
+                                                            PredicateDataType type, Literal lower,
                                                             Literal upper) {
     return addChildForBetween(columnId, type, lower, upper);
   }
@@ -307,9 +280,7 @@ namespace orc {
    * @param leafReorder buffer for leaf reorder
    * @return the next available leaf id
    */
-  static size_t compactLeaves(const TreeNode& tree,
-                              size_t next,
-                              size_t leafReorder[]) {
+  static size_t compactLeaves(const TreeNode& tree, size_t next, size_t leafReorder[]) {
     if (tree->getOperator() == ExpressionTree::Operator::LEAF) {
       size_t oldLeaf = tree->getLeaf();
       if (leafReorder[oldLeaf] == UNUSED_LEAF) {
@@ -378,18 +349,16 @@ namespace orc {
         case ExpressionTree::Operator::AND: {
           TreeNode result(new ExpressionTree(ExpressionTree::Operator::OR));
           for (auto& kid : child->getChildren()) {
-            result->addChild(pushDownNot(std::make_shared<ExpressionTree>(
-                ExpressionTree::Operator::NOT, NodeList{ kid })
-            ));
+            result->addChild(pushDownNot(
+                std::make_shared<ExpressionTree>(ExpressionTree::Operator::NOT, NodeList{kid})));
           }
           return result;
         }
         case ExpressionTree::Operator::OR: {
           TreeNode result(new ExpressionTree(ExpressionTree::Operator::AND));
           for (auto& kid : child->getChildren()) {
-            result->addChild(pushDownNot(std::make_shared<ExpressionTree>(
-                ExpressionTree::Operator::NOT, NodeList{ kid })
-            ));
+            result->addChild(pushDownNot(
+                std::make_shared<ExpressionTree>(ExpressionTree::Operator::NOT, NodeList{kid})));
           }
           return result;
         }
@@ -432,8 +401,7 @@ namespace orc {
             case ExpressionTree::Operator::LEAF:
             case ExpressionTree::Operator::CONSTANT:
             default:
-              throw std::invalid_argument(
-                "Got a maybe as child of " + expr->toString());
+              throw std::invalid_argument("Got a maybe as child of " + expr->toString());
           }
         } else {
           expr->getChildren()[i] = child;
@@ -444,8 +412,9 @@ namespace orc {
       if (!children.empty()) {
         // eliminate removed maybe nodes from expr
         std::vector<TreeNode> nodes;
-        std::for_each(children.begin(), children.end(),
-          [&](const TreeNode& node){ if (node) nodes.emplace_back(node); });
+        std::for_each(children.begin(), children.end(), [&](const TreeNode& node) {
+          if (node) nodes.emplace_back(node);
+        });
         std::swap(children, nodes);
         if (children.empty()) {
           return std::make_shared<ExpressionTree>(TruthValue::YES_NO_NULL);
@@ -462,7 +431,7 @@ namespace orc {
    * @return the flattened expression, which will always be root with
    *   potentially modified children.
    */
-   TreeNode SearchArgumentBuilderImpl::flatten(TreeNode root) {
+  TreeNode SearchArgumentBuilderImpl::flatten(TreeNode root) {
     if (root) {
       std::vector<TreeNode> nodes;
       for (size_t i = 0; i != root->getChildren().size(); ++i) {
@@ -524,10 +493,8 @@ namespace orc {
       }
     }
     if (andList.size() > 1) {
-      generateAllCombinations(
-        result,
-        std::vector<TreeNode>(andList.cbegin() + 1, andList.cend()),
-        nonAndList);
+      generateAllCombinations(result, std::vector<TreeNode>(andList.cbegin() + 1, andList.cend()),
+                              nonAndList);
     }
   }
 
@@ -576,8 +543,7 @@ namespace orc {
         }
         if (!andList.empty()) {
           if (checkCombinationsThreshold(andList)) {
-            root = std::make_shared<ExpressionTree>(
-              ExpressionTree::Operator::AND);
+            root = std::make_shared<ExpressionTree>(ExpressionTree::Operator::AND);
             generateAllCombinations(root->getChildren(), andList, nonAndList);
           } else {
             root = std::make_shared<ExpressionTree>(TruthValue::YES_NO_NULL);
@@ -588,17 +554,15 @@ namespace orc {
     return root;
   }
 
-  SearchArgumentImpl::SearchArgumentImpl(TreeNode root,
-                                         const std::vector<PredicateLeaf>& leaves)
-                                        : mExpressionTree(root)
-                                        , mLeaves(leaves) {
+  SearchArgumentImpl::SearchArgumentImpl(TreeNode root, const std::vector<PredicateLeaf>& leaves)
+      : mExpressionTree(root), mLeaves(leaves) {
     // PASS
   }
 
   std::unique_ptr<SearchArgument> SearchArgumentBuilderImpl::build() {
     if (mCurrTree.size() != 1) {
-      throw std::invalid_argument("Failed to end " +
-        std::to_string(mCurrTree.size()) + " operations.");
+      throw std::invalid_argument("Failed to end " + std::to_string(mCurrTree.size()) +
+                                  " operations.");
     }
     mRoot = pushDownNot(mRoot);
     mRoot = foldMaybe(mRoot);
@@ -612,18 +576,17 @@ namespace orc {
     std::vector<PredicateLeaf> leafList(newLeafCount, PredicateLeaf());
 
     // build the new list
-    for (auto & leaf : mLeaves) {
+    for (auto& leaf : mLeaves) {
       size_t newLoc = leafReorder[leaf.second];
       if (newLoc != UNUSED_LEAF) {
         leafList[newLoc] = leaf.first;
       }
     }
-    return std::unique_ptr<SearchArgument>(
-      new SearchArgumentImpl(mRoot, leafList));
+    return std::make_unique<SearchArgumentImpl>(mRoot, leafList);
   }
 
   std::unique_ptr<SearchArgumentBuilder> SearchArgumentFactory::newBuilder() {
-    return std::unique_ptr<SearchArgumentBuilder>(new SearchArgumentBuilderImpl());
+    return std::make_unique<SearchArgumentBuilderImpl>();
   }
 
-} // namespace orc
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/sargs/SearchArgument.hh b/contrib/libs/apache/orc/c++/src/sargs/SearchArgument.hh
index 57d765e1df..4b74b28743 100644
--- a/contrib/libs/apache/orc/c++/src/sargs/SearchArgument.hh
+++ b/contrib/libs/apache/orc/c++/src/sargs/SearchArgument.hh
@@ -19,10 +19,10 @@
 #ifndef ORC_SRC_SEARCHARGUMENT_HH
 #define ORC_SRC_SEARCHARGUMENT_HH
 
-#include "wrap/orc-proto-wrapper.hh"
 #include "ExpressionTree.hh"
 #include "orc/sargs/SearchArgument.hh"
 #include "sargs/PredicateLeaf.hh"
+#include "wrap/orc-proto-wrapper.hh"
 
 #include <deque>
 #include <stdexcept>
@@ -40,7 +40,7 @@ namespace orc {
    * (<a href="http://en.wikipedia.org/wiki/Conjunctive_normal_form">CNF</a>).
    */
   class SearchArgumentImpl : public SearchArgument {
-  public:
+   public:
     SearchArgumentImpl(TreeNode root, const std::vector<PredicateLeaf>& leaves);
 
     /**
@@ -54,7 +54,7 @@ namespace orc {
      * Get the expression tree. This should only needed for file formats that
      * need to translate the expression to an internal form.
      */
-    const ExpressionTree * getExpression() const;
+    const ExpressionTree* getExpression() const;
 
     /**
      * Evaluate the entire predicate based on the values for the leaf predicates.
@@ -65,7 +65,7 @@ namespace orc {
 
     std::string toString() const override;
 
-  private:
+   private:
     std::shared_ptr<ExpressionTree> mExpressionTree;
     std::vector<PredicateLeaf> mLeaves;
   };
@@ -75,7 +75,7 @@ namespace orc {
    * must call startOr, startAnd, or startNot before adding any leaves.
    */
   class SearchArgumentBuilderImpl : public SearchArgumentBuilder {
-  public:
+   public:
     SearchArgumentBuilderImpl();
 
     /**
@@ -110,8 +110,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    SearchArgumentBuilder& lessThan(const std::string& column,
-                                    PredicateDataType type,
+    SearchArgumentBuilder& lessThan(const std::string& column, PredicateDataType type,
                                     Literal literal) override;
 
     /**
@@ -121,8 +120,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    SearchArgumentBuilder& lessThan(uint64_t columnId,
-                                    PredicateDataType type,
+    SearchArgumentBuilder& lessThan(uint64_t columnId, PredicateDataType type,
                                     Literal literal) override;
 
     /**
@@ -132,8 +130,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    SearchArgumentBuilder& lessThanEquals(const std::string& column,
-                                          PredicateDataType type,
+    SearchArgumentBuilder& lessThanEquals(const std::string& column, PredicateDataType type,
                                           Literal literal) override;
 
     /**
@@ -143,8 +140,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    SearchArgumentBuilder& lessThanEquals(uint64_t columnId,
-                                          PredicateDataType type,
+    SearchArgumentBuilder& lessThanEquals(uint64_t columnId, PredicateDataType type,
                                           Literal literal) override;
 
     /**
@@ -154,8 +150,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    SearchArgumentBuilder& equals(const std::string& column,
-                                  PredicateDataType type,
+    SearchArgumentBuilder& equals(const std::string& column, PredicateDataType type,
                                   Literal literal) override;
 
     /**
@@ -165,8 +160,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    SearchArgumentBuilder& equals(uint64_t columnId,
-                                  PredicateDataType type,
+    SearchArgumentBuilder& equals(uint64_t columnId, PredicateDataType type,
                                   Literal literal) override;
 
     /**
@@ -176,8 +170,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    SearchArgumentBuilder& nullSafeEquals(const std::string& column,
-                                          PredicateDataType type,
+    SearchArgumentBuilder& nullSafeEquals(const std::string& column, PredicateDataType type,
                                           Literal literal) override;
 
     /**
@@ -187,8 +180,7 @@ namespace orc {
      * @param literal the literal
      * @return this
      */
-    SearchArgumentBuilder& nullSafeEquals(uint64_t columnId,
-                                          PredicateDataType type,
+    SearchArgumentBuilder& nullSafeEquals(uint64_t columnId, PredicateDataType type,
                                           Literal literal) override;
 
     /**
@@ -198,8 +190,7 @@ namespace orc {
      * @param literals the literals
      * @return this
      */
-    SearchArgumentBuilder& in(const std::string& column,
-                              PredicateDataType type,
+    SearchArgumentBuilder& in(const std::string& column, PredicateDataType type,
                               const std::initializer_list<Literal>& literals) override;
 
     /**
@@ -209,8 +200,7 @@ namespace orc {
      * @param literals the literals
      * @return this
      */
-    SearchArgumentBuilder& in(uint64_t columnId,
-                              PredicateDataType type,
+    SearchArgumentBuilder& in(uint64_t columnId, PredicateDataType type,
                               const std::initializer_list<Literal>& literals) override;
 
     /**
@@ -220,8 +210,7 @@ namespace orc {
      * @param literals the literals
      * @return this
      */
-    SearchArgumentBuilder& in(const std::string& column,
-                              PredicateDataType type,
+    SearchArgumentBuilder& in(const std::string& column, PredicateDataType type,
                               const std::vector<Literal>& literals) override;
 
     /**
@@ -231,8 +220,7 @@ namespace orc {
      * @param literals the literals
      * @return this
      */
-    SearchArgumentBuilder& in(uint64_t columnId,
-                              PredicateDataType type,
+    SearchArgumentBuilder& in(uint64_t columnId, PredicateDataType type,
                               const std::vector<Literal>& literals) override;
 
     /**
@@ -241,8 +229,7 @@ namespace orc {
      * @param type the type of the expression
      * @return this
      */
-    SearchArgumentBuilder& isNull(const std::string& column,
-                                  PredicateDataType type) override;
+    SearchArgumentBuilder& isNull(const std::string& column, PredicateDataType type) override;
 
     /**
      * Add an is null leaf to the current item on the stack.
@@ -250,8 +237,7 @@ namespace orc {
      * @param type the type of the expression
      * @return this
      */
-    SearchArgumentBuilder& isNull(uint64_t columnId,
-                                  PredicateDataType type) override;
+    SearchArgumentBuilder& isNull(uint64_t columnId, PredicateDataType type) override;
 
     /**
      * Add a between leaf to the current item on the stack.
@@ -261,9 +247,7 @@ namespace orc {
      * @param upper the literal
      * @return this
      */
-    SearchArgumentBuilder& between(const std::string& column,
-                                   PredicateDataType type,
-                                   Literal lower,
+    SearchArgumentBuilder& between(const std::string& column, PredicateDataType type, Literal lower,
                                    Literal upper) override;
 
     /**
@@ -274,9 +258,7 @@ namespace orc {
      * @param upper the literal
      * @return this
      */
-    SearchArgumentBuilder& between(uint64_t columnId,
-                                   PredicateDataType type,
-                                   Literal lower,
+    SearchArgumentBuilder& between(uint64_t columnId, PredicateDataType type, Literal lower,
                                    Literal upper) override;
 
     /**
@@ -293,49 +275,40 @@ namespace orc {
      */
     std::unique_ptr<SearchArgument> build() override;
 
-  private:
+   private:
     SearchArgumentBuilder& start(ExpressionTree::Operator op);
     size_t addLeaf(PredicateLeaf leaf);
 
     static bool isInvalidColumn(const std::string& column);
     static bool isInvalidColumn(uint64_t columnId);
 
-    template<typename T>
-    SearchArgumentBuilder& compareOperator(PredicateLeaf::Operator op,
-                                           T column,
-                                           PredicateDataType type,
-                                           Literal literal);
+    template <typename T>
+    SearchArgumentBuilder& compareOperator(PredicateLeaf::Operator op, T column,
+                                           PredicateDataType type, Literal literal);
 
-    template<typename T, typename CONTAINER>
-    SearchArgumentBuilder& addChildForIn(T column,
-                                         PredicateDataType type,
+    template <typename T, typename CONTAINER>
+    SearchArgumentBuilder& addChildForIn(T column, PredicateDataType type,
                                          const CONTAINER& literals);
 
-    template<typename T>
-    SearchArgumentBuilder& addChildForIsNull(T column,
-                                             PredicateDataType type);
+    template <typename T>
+    SearchArgumentBuilder& addChildForIsNull(T column, PredicateDataType type);
 
-    template<typename T>
-    SearchArgumentBuilder& addChildForBetween(T column,
-                                              PredicateDataType type,
-                                              Literal lower,
+    template <typename T>
+    SearchArgumentBuilder& addChildForBetween(T column, PredicateDataType type, Literal lower,
                                               Literal upper);
 
-  public:
+   public:
     static TreeNode pushDownNot(TreeNode root);
     static TreeNode foldMaybe(TreeNode expr);
     static TreeNode flatten(TreeNode root);
     static TreeNode convertToCNF(TreeNode root);
 
-  private:
+   private:
     std::deque<TreeNode> mCurrTree;
-    std::unordered_map<PredicateLeaf,
-                       size_t,
-                       PredicateLeafHash,
-                       PredicateLeafComparator> mLeaves;
+    std::unordered_map<PredicateLeaf, size_t, PredicateLeafHash, PredicateLeafComparator> mLeaves;
     std::shared_ptr<ExpressionTree> mRoot;
   };
 
-} // namespace orc
+}  // namespace orc
 
-#endif //ORC_SRC_SEARCHARGUMENT_HH
+#endif  // ORC_SRC_SEARCHARGUMENT_HH
diff --git a/contrib/libs/apache/orc/c++/src/sargs/TruthValue.cc b/contrib/libs/apache/orc/c++/src/sargs/TruthValue.cc
index fe00ed9472..4b3eda7e90 100644
--- a/contrib/libs/apache/orc/c++/src/sargs/TruthValue.cc
+++ b/contrib/libs/apache/orc/c++/src/sargs/TruthValue.cc
@@ -122,4 +122,4 @@ namespace orc {
     }
   }
 
-}
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/wrap/coded-stream-wrapper.h b/contrib/libs/apache/orc/c++/src/wrap/coded-stream-wrapper.h
index 605fbf826c..1373c18924 100644
--- a/contrib/libs/apache/orc/c++/src/wrap/coded-stream-wrapper.h
+++ b/contrib/libs/apache/orc/c++/src/wrap/coded-stream-wrapper.h
@@ -1,15 +1,20 @@
 /*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 #ifndef CODED_STREAM_WRAPPER_HH
@@ -20,12 +25,12 @@
 DIAGNOSTIC_PUSH
 
 #ifdef __clang__
-  DIAGNOSTIC_IGNORE("-Wshorten-64-to-32")
-  DIAGNOSTIC_IGNORE("-Wreserved-id-macro")
+DIAGNOSTIC_IGNORE("-Wshorten-64-to-32")
+DIAGNOSTIC_IGNORE("-Wreserved-id-macro")
 #endif
 
 #if defined(__GNUC__) || defined(__clang__)
-  DIAGNOSTIC_IGNORE("-Wconversion")
+DIAGNOSTIC_IGNORE("-Wconversion")
 #endif
 
 #include <google/protobuf/io/coded_stream.h>
diff --git a/contrib/libs/apache/orc/c++/src/wrap/orc-proto-wrapper.hh b/contrib/libs/apache/orc/c++/src/wrap/orc-proto-wrapper.hh
index 5c161660cc..014c7d6570 100644
--- a/contrib/libs/apache/orc/c++/src/wrap/orc-proto-wrapper.hh
+++ b/contrib/libs/apache/orc/c++/src/wrap/orc-proto-wrapper.hh
@@ -1,15 +1,20 @@
 /*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 #ifndef ORC_PROTO_WRAPPER_HH
@@ -20,27 +25,27 @@
 DIAGNOSTIC_PUSH
 
 #if defined(__GNUC__) || defined(__clang__)
-  DIAGNOSTIC_IGNORE("-Wconversion")
-  DIAGNOSTIC_IGNORE("-Wdeprecated")
-  DIAGNOSTIC_IGNORE("-Wsign-conversion")
-  DIAGNOSTIC_IGNORE("-Wunused-parameter")
+DIAGNOSTIC_IGNORE("-Wconversion")
+DIAGNOSTIC_IGNORE("-Wdeprecated")
+DIAGNOSTIC_IGNORE("-Wsign-conversion")
+DIAGNOSTIC_IGNORE("-Wunused-parameter")
 #endif
 
 #ifdef __clang__
-  DIAGNOSTIC_IGNORE("-Wnested-anon-types")
-  DIAGNOSTIC_IGNORE("-Wreserved-id-macro")
-  DIAGNOSTIC_IGNORE("-Wshorten-64-to-32")
-  DIAGNOSTIC_IGNORE("-Wunknown-warning-option")
-  DIAGNOSTIC_IGNORE("-Wweak-vtables")
-  DIAGNOSTIC_IGNORE("-Wzero-as-null-pointer-constant")
+DIAGNOSTIC_IGNORE("-Wnested-anon-types")
+DIAGNOSTIC_IGNORE("-Wreserved-id-macro")
+DIAGNOSTIC_IGNORE("-Wshorten-64-to-32")
+DIAGNOSTIC_IGNORE("-Wunknown-warning-option")
+DIAGNOSTIC_IGNORE("-Wweak-vtables")
+DIAGNOSTIC_IGNORE("-Wzero-as-null-pointer-constant")
 #endif
 
 #if defined(_MSC_VER)
-  DIAGNOSTIC_IGNORE(4146) // unary minus operator applied to unsigned type, result still unsigned
-  DIAGNOSTIC_IGNORE(4800) // forcing value to bool 'true' or 'false'
+DIAGNOSTIC_IGNORE(4146)  // unary minus operator applied to unsigned type, result still unsigned
+DIAGNOSTIC_IGNORE(4800)  // forcing value to bool 'true' or 'false'
 #endif
 
-#include "contrib/libs/apache/orc/proto/orc_proto.pb.h"
+#include "orc_proto.pb.h"
 
 DIAGNOSTIC_POP
 
diff --git a/contrib/libs/apache/orc/c++/src/wrap/snappy-wrapper.h b/contrib/libs/apache/orc/c++/src/wrap/snappy-wrapper.h
index aeab0f0033..18166f7200 100644
--- a/contrib/libs/apache/orc/c++/src/wrap/snappy-wrapper.h
+++ b/contrib/libs/apache/orc/c++/src/wrap/snappy-wrapper.h
@@ -1,15 +1,20 @@
 /*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 #ifndef SNAPPY_WRAPPER_HH
@@ -20,7 +25,7 @@
 DIAGNOSTIC_PUSH
 
 #ifdef __clang__
-  DIAGNOSTIC_IGNORE("-Wreserved-id-macro")
+DIAGNOSTIC_IGNORE("-Wreserved-id-macro")
 #endif
 
 #include <snappy.h>
diff --git a/contrib/libs/apache/orc/c++/src/wrap/zero-copy-stream-wrapper.h b/contrib/libs/apache/orc/c++/src/wrap/zero-copy-stream-wrapper.h
index 1af0bd002d..0a42daaf84 100644
--- a/contrib/libs/apache/orc/c++/src/wrap/zero-copy-stream-wrapper.h
+++ b/contrib/libs/apache/orc/c++/src/wrap/zero-copy-stream-wrapper.h
@@ -1,15 +1,20 @@
 /*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 #ifndef ZERO_COPY_STREAM_WRAPPER_HH
@@ -20,13 +25,13 @@
 DIAGNOSTIC_PUSH
 
 #if defined(__GNUC__) || defined(__clang__)
-  DIAGNOSTIC_IGNORE("-Wdeprecated")
-  DIAGNOSTIC_IGNORE("-Wpadded")
-  DIAGNOSTIC_IGNORE("-Wunused-parameter")
+DIAGNOSTIC_IGNORE("-Wdeprecated")
+DIAGNOSTIC_IGNORE("-Wpadded")
+DIAGNOSTIC_IGNORE("-Wunused-parameter")
 #endif
 
 #ifdef __clang__
-  DIAGNOSTIC_IGNORE("-Wreserved-id-macro")
+DIAGNOSTIC_IGNORE("-Wreserved-id-macro")
 #endif
 
 #include <google/protobuf/io/zero_copy_stream.h>
diff --git a/contrib/libs/apache/orc/proto/orc_proto.proto b/contrib/libs/apache/orc/proto/orc_proto.proto
deleted file mode 100644
index ff05657a54..0000000000
--- a/contrib/libs/apache/orc/proto/orc_proto.proto
+++ /dev/null
@@ -1,451 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-syntax = "proto2";
-
-package orc.proto;
-
-option java_package = "org.apache.orc";
-
-message IntegerStatistics  {
-  optional sint64 minimum = 1;
-  optional sint64 maximum = 2;
-  optional sint64 sum = 3;
-}
-
-message DoubleStatistics {
-  optional double minimum = 1;
-  optional double maximum = 2;
-  optional double sum = 3;
-}
-
-message StringStatistics {
-  optional string minimum = 1;
-  optional string maximum = 2;
-  // sum will store the total length of all strings in a stripe
-  optional sint64 sum = 3;
-  // If the minimum or maximum value was longer than 1024 bytes, store a lower or upper
-  // bound instead of the minimum or maximum values above.
-  optional string lowerBound = 4;
-  optional string upperBound = 5;
-}
-
-message BucketStatistics {
-  repeated uint64 count = 1 [packed=true];
-}
-
-message DecimalStatistics {
-  optional string minimum = 1;
-  optional string maximum = 2;
-  optional string sum = 3;
-}
-
-message DateStatistics {
-  // min,max values saved as days since epoch
-  optional sint32 minimum = 1;
-  optional sint32 maximum = 2;
-}
-
-message TimestampStatistics {
-  // min,max values saved as milliseconds since epoch
-  optional sint64 minimum = 1;
-  optional sint64 maximum = 2;
-  optional sint64 minimumUtc = 3;
-  optional sint64 maximumUtc = 4;
-  // store the lower 6 TS digits for min/max to achieve nanosecond precision
-  optional int32 minimumNanos = 5;
-  optional int32 maximumNanos = 6;
-}
-
-message BinaryStatistics {
-  // sum will store the total binary blob length in a stripe
-  optional sint64 sum = 1;
-}
-
-// Statistics for list and map
-message CollectionStatistics {
-  optional uint64 minChildren = 1;
-  optional uint64 maxChildren = 2;
-  optional uint64 totalChildren = 3;
-}
-
-message ColumnStatistics {
-  optional uint64 numberOfValues = 1;
-  optional IntegerStatistics intStatistics = 2;
-  optional DoubleStatistics doubleStatistics = 3;
-  optional StringStatistics stringStatistics = 4;
-  optional BucketStatistics bucketStatistics = 5;
-  optional DecimalStatistics decimalStatistics = 6;
-  optional DateStatistics dateStatistics = 7;
-  optional BinaryStatistics binaryStatistics = 8;
-  optional TimestampStatistics timestampStatistics = 9;
-  optional bool hasNull = 10;
-  optional uint64 bytesOnDisk = 11;
-  optional CollectionStatistics collectionStatistics = 12;
-}
-
-message RowIndexEntry {
-  repeated uint64 positions = 1 [packed=true];
-  optional ColumnStatistics statistics = 2;
-}
-
-message RowIndex {
-  repeated RowIndexEntry entry = 1;
-}
-
-message BloomFilter {
-  optional uint32 numHashFunctions = 1;
-  repeated fixed64 bitset = 2;
-  optional bytes utf8bitset = 3;
-}
-
-message BloomFilterIndex {
-  repeated BloomFilter bloomFilter = 1;
-}
-
-message Stream {
-  // if you add new index stream kinds, you need to make sure to update
-  // StreamName to ensure it is added to the stripe in the right area
-  enum Kind {
-    PRESENT = 0;
-    DATA = 1;
-    LENGTH = 2;
-    DICTIONARY_DATA = 3;
-    DICTIONARY_COUNT = 4;
-    SECONDARY = 5;
-    ROW_INDEX = 6;
-    BLOOM_FILTER = 7;
-    BLOOM_FILTER_UTF8 = 8;
-    // Virtual stream kinds to allocate space for encrypted index and data.
-    ENCRYPTED_INDEX = 9;
-    ENCRYPTED_DATA = 10;
-
-    // stripe statistics streams
-    STRIPE_STATISTICS = 100;
-    // A virtual stream kind that is used for setting the encryption IV.
-    FILE_STATISTICS = 101;
-  }
-  optional Kind kind = 1;
-  optional uint32 column = 2;
-  optional uint64 length = 3;
-}
-
-message ColumnEncoding {
-  enum Kind {
-    DIRECT = 0;
-    DICTIONARY = 1;
-    DIRECT_V2 = 2;
-    DICTIONARY_V2 = 3;
-  }
-  optional Kind kind = 1;
-  optional uint32 dictionarySize = 2;
-
-  // The encoding of the bloom filters for this column:
-  //   0 or missing = none or original
-  //   1            = ORC-135 (utc for timestamps)
-  optional uint32 bloomEncoding = 3;
-}
-
-message StripeEncryptionVariant {
-  repeated Stream streams = 1;
-  repeated ColumnEncoding encoding = 2;
-}
-
-// each stripe looks like:
-//   index streams
-//     unencrypted
-//     variant 1..N
-//   data streams
-//     unencrypted
-//     variant 1..N
-//   footer
-
-message StripeFooter {
-  repeated Stream streams = 1;
-  repeated ColumnEncoding columns = 2;
-  optional string writerTimezone = 3;
-  // one for each column encryption variant
-  repeated StripeEncryptionVariant encryption = 4;
-}
-
-// the file tail looks like:
-//   encrypted stripe statistics: ColumnarStripeStatistics (order by variant)
-//   stripe statistics: Metadata
-//   footer: Footer
-//   postscript: PostScript
-//   psLen: byte
-
-message StringPair {
-  optional string key = 1;
-  optional string value = 2;
-}
-
-message Type {
-  enum Kind {
-    BOOLEAN = 0;
-    BYTE = 1;
-    SHORT = 2;
-    INT = 3;
-    LONG = 4;
-    FLOAT = 5;
-    DOUBLE = 6;
-    STRING = 7;
-    BINARY = 8;
-    TIMESTAMP = 9;
-    LIST = 10;
-    MAP = 11;
-    STRUCT = 12;
-    UNION = 13;
-    DECIMAL = 14;
-    DATE = 15;
-    VARCHAR = 16;
-    CHAR = 17;
-    TIMESTAMP_INSTANT = 18;
-  }
-  optional Kind kind = 1;
-  repeated uint32 subtypes = 2 [packed=true];
-  repeated string fieldNames = 3;
-  optional uint32 maximumLength = 4;
-  optional uint32 precision = 5;
-  optional uint32 scale = 6;
-  repeated StringPair attributes = 7;
-}
-
-message StripeInformation {
-  // the global file offset of the start of the stripe
-  optional uint64 offset = 1;
-  // the number of bytes of index
-  optional uint64 indexLength = 2;
-  // the number of bytes of data
-  optional uint64 dataLength = 3;
-  // the number of bytes in the stripe footer
-  optional uint64 footerLength = 4;
-  // the number of rows in this stripe
-  optional uint64 numberOfRows = 5;
-  // If this is present, the reader should use this value for the encryption
-  // stripe id for setting the encryption IV. Otherwise, the reader should
-  // use one larger than the previous stripe's encryptStripeId.
-  // For unmerged ORC files, the first stripe will use 1 and the rest of the
-  // stripes won't have it set. For merged files, the stripe information
-  // will be copied from their original files and thus the first stripe of
-  // each of the input files will reset it to 1.
-  // Note that 1 was choosen, because protobuf v3 doesn't serialize
-  // primitive types that are the default (eg. 0).
-  optional uint64 encryptStripeId = 6;
-  // For each encryption variant, the new encrypted local key to use
-  // until we find a replacement.
-  repeated bytes encryptedLocalKeys = 7;
-}
-
-message UserMetadataItem {
-  optional string name = 1;
-  optional bytes value = 2;
-}
-
-// StripeStatistics (1 per a stripe), which each contain the
-// ColumnStatistics for each column.
-// This message type is only used in ORC v0 and v1.
-message StripeStatistics {
-  repeated ColumnStatistics colStats = 1;
-}
-
-// This message type is only used in ORC v0 and v1.
-message Metadata {
-  repeated StripeStatistics stripeStats = 1;
-}
-
-// In ORC v2 (and for encrypted columns in v1), each column has
-// their column statistics written separately.
-message ColumnarStripeStatistics {
-  // one value for each stripe in the file
-  repeated ColumnStatistics colStats = 1;
-}
-
-enum EncryptionAlgorithm {
-  UNKNOWN_ENCRYPTION = 0;  // used for detecting future algorithms
-  AES_CTR_128 = 1;
-  AES_CTR_256 = 2;
-}
-
-message FileStatistics {
-  repeated ColumnStatistics column = 1;
-}
-
-// How was the data masked? This isn't necessary for reading the file, but
-// is documentation about how the file was written.
-message DataMask {
-  // the kind of masking, which may include third party masks
-  optional string name = 1;
-  // parameters for the mask
-  repeated string maskParameters = 2;
-  // the unencrypted column roots this mask was applied to
-  repeated uint32 columns = 3 [packed = true];
-}
-
-// Information about the encryption keys.
-message EncryptionKey {
-  optional string keyName = 1;
-  optional uint32 keyVersion = 2;
-  optional EncryptionAlgorithm algorithm = 3;
-}
-
-// The description of an encryption variant.
-// Each variant is a single subtype that is encrypted with a single key.
-message EncryptionVariant {
-  // the column id of the root
-  optional uint32 root = 1;
-  // The master key that was used to encrypt the local key, referenced as
-  // an index into the Encryption.key list.
-  optional uint32 key = 2;
-  // the encrypted key for the file footer
-  optional bytes encryptedKey = 3;
-  // the stripe statistics for this variant
-  repeated Stream stripeStatistics = 4;
-  // encrypted file statistics as a FileStatistics
-  optional bytes fileStatistics = 5;
-}
-
-// Which KeyProvider encrypted the local keys.
-enum KeyProviderKind {
-  UNKNOWN = 0;
-  HADOOP = 1;
-  AWS = 2;
-  GCP = 3;
-  AZURE = 4;
-}
-
-message Encryption {
-  // all of the masks used in this file
-  repeated DataMask mask = 1;
-  // all of the keys used in this file
-  repeated EncryptionKey key = 2;
-  // The encrypted variants.
-  // Readers should prefer the first variant that the user has access to
-  // the corresponding key. If they don't have access to any of the keys,
-  // they should get the unencrypted masked data.
-  repeated EncryptionVariant variants = 3;
-  // How are the local keys encrypted?
-  optional KeyProviderKind keyProvider = 4;
-}
-
-enum CalendarKind {
-  UNKNOWN_CALENDAR = 0;
-   // A hybrid Julian/Gregorian calendar with a cutover point in October 1582.
-  JULIAN_GREGORIAN = 1;
-  // A calendar that extends the Gregorian calendar back forever.
-  PROLEPTIC_GREGORIAN = 2;
-}
-
-message Footer {
-  optional uint64 headerLength = 1;
-  optional uint64 contentLength = 2;
-  repeated StripeInformation stripes = 3;
-  repeated Type types = 4;
-  repeated UserMetadataItem metadata = 5;
-  optional uint64 numberOfRows = 6;
-  repeated ColumnStatistics statistics = 7;
-  optional uint32 rowIndexStride = 8;
-
-  // Each implementation that writes ORC files should register for a code
-  // 0 = ORC Java
-  // 1 = ORC C++
-  // 2 = Presto
-  // 3 = Scritchley Go from https://github.com/scritchley/orc
-  // 4 = Trino
-  optional uint32 writer = 9;
-
-  // information about the encryption in this file
-  optional Encryption encryption = 10;
-  optional CalendarKind calendar = 11;
-
-  // informative description about the version of the software that wrote
-  // the file. It is assumed to be within a given writer, so for example
-  // ORC 1.7.2 = "1.7.2". It may include suffixes, such as "-SNAPSHOT".
-  optional string softwareVersion = 12;
-}
-
-enum CompressionKind {
-  NONE = 0;
-  ZLIB = 1;
-  SNAPPY = 2;
-  LZO = 3;
-  LZ4 = 4;
-  ZSTD = 5;
-}
-
-// Serialized length must be less that 255 bytes
-message PostScript {
-  optional uint64 footerLength = 1;
-  optional CompressionKind compression = 2;
-  optional uint64 compressionBlockSize = 3;
-  // the version of the file format
-  //   [0, 11] = Hive 0.11
-  //   [0, 12] = Hive 0.12
-  repeated uint32 version = 4 [packed = true];
-  optional uint64 metadataLength = 5;
-
-  // The version of the writer that wrote the file. This number is
-  // updated when we make fixes or large changes to the writer so that
-  // readers can detect whether a given bug is present in the data.
-  //
-  // Only the Java ORC writer may use values under 6 (or missing) so that
-  // readers that predate ORC-202 treat the new writers correctly. Each
-  // writer should assign their own sequence of versions starting from 6.
-  //
-  // Version of the ORC Java writer:
-  //   0 = original
-  //   1 = HIVE-8732 fixed (fixed stripe/file maximum statistics &
-  //                        string statistics use utf8 for min/max)
-  //   2 = HIVE-4243 fixed (use real column names from Hive tables)
-  //   3 = HIVE-12055 added (vectorized writer implementation)
-  //   4 = HIVE-13083 fixed (decimals write present stream correctly)
-  //   5 = ORC-101 fixed (bloom filters use utf8 consistently)
-  //   6 = ORC-135 fixed (timestamp statistics use utc)
-  //   7 = ORC-517 fixed (decimal64 min/max incorrect)
-  //   8 = ORC-203 added (trim very long string statistics)
-  //   9 = ORC-14 added (column encryption)
-  //
-  // Version of the ORC C++ writer:
-  //   6 = original
-  //
-  // Version of the Presto writer:
-  //   6 = original
-  //
-  // Version of the Scritchley Go writer:
-  //   6 = original
-  //
-  // Version of the Trino writer:
-  //   6 = original
-  //
-  optional uint32 writerVersion = 6;
-
-  // the number of bytes in the encrypted stripe statistics
-  optional uint64 stripeStatisticsLength = 7;
-
-  // Leave this last in the record
-  optional string magic = 8000;
-}
-
-// The contents of the file tail that must be serialized.
-// This gets serialized as part of OrcSplit, also used by footer cache.
-message FileTail {
-  optional PostScript postscript = 1;
-  optional Footer footer = 2;
-  optional uint64 fileLength = 3;
-  optional uint64 postscriptLength = 4;
-}
diff --git a/contrib/libs/apache/orc/ya.make b/contrib/libs/apache/orc/ya.make
index be3b4d5a01..ec4d745340 100644
--- a/contrib/libs/apache/orc/ya.make
+++ b/contrib/libs/apache/orc/ya.make
@@ -6,11 +6,12 @@ LICENSE(Apache-2.0)
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
-VERSION(1.8.0)
+VERSION(2.0.0)
 
-ORIGINAL_SOURCE(https://github.com/apache/orc/archive/rel/release-1.8.0.tar.gz)
+ORIGINAL_SOURCE(https://github.com/apache/orc/archive/rel/release-2.0.0.tar.gz)
 
 PEERDIR(
+    contrib/libs/apache/orc-format
     contrib/libs/lz4
     contrib/libs/snappy
     contrib/libs/zlib
@@ -20,7 +21,6 @@ PEERDIR(
 ADDINCL(
     GLOBAL contrib/libs/apache/orc/c++/include
     contrib/libs/apache/orc/c++/src
-    contrib/libs/apache/orc/proto
     contrib/libs/lz4
     contrib/libs/zstd/include
 )
@@ -29,15 +29,23 @@ NO_COMPILER_WARNINGS()
 
 NO_UTIL()
 
+CFLAGS(
+    -DENABLE_METRICS=0
+)
+
 SRCS(
     c++/src/Adaptor.cc
+    c++/src/BlockBuffer.cc
     c++/src/BloomFilter.cc
+    c++/src/BpackingDefault.cc
     c++/src/ByteRLE.cc
     c++/src/ColumnPrinter.cc
     c++/src/ColumnReader.cc
     c++/src/ColumnWriter.cc
     c++/src/Common.cc
     c++/src/Compression.cc
+    c++/src/ConvertColumnReader.cc
+    c++/src/CpuInfoUtil.cc
     c++/src/Exceptions.cc
     c++/src/Int128.cc
     c++/src/LzoDecompressor.cc
@@ -50,6 +58,7 @@ SRCS(
     c++/src/Reader.cc
     c++/src/RleDecoderV2.cc
     c++/src/RleEncoderV2.cc
+    c++/src/SchemaEvolution.cc
     c++/src/Statistics.cc
     c++/src/StripeStream.cc
     c++/src/Timezone.cc
@@ -64,7 +73,6 @@ SRCS(
     c++/src/sargs/SargsApplier.cc
     c++/src/sargs/SearchArgument.cc
     c++/src/sargs/TruthValue.cc
-    proto/orc_proto.proto
 )
 
 END()
author	thegeorg <thegeorg@yandex-team.com>	2024-03-17 04:47:32 +0300
committer	thegeorg <thegeorg@yandex-team.com>	2024-03-17 04:57:12 +0300
commit	0816a937aebb4bb8ff5d68730c625cb1c99c9b4b (patch)
tree	45dd2b2d18017590838384a1a7687279ac280444 /contrib/libs/apache/orc
parent	6d5eb3aff8e43031b7dcb8be42d649799cd8a6c3 (diff)
download	ydb-0816a937aebb4bb8ff5d68730c625cb1c99c9b4b.tar.gz