Restoring authorship annotation for <iaz1607@yandex-team.ru>. Commit 2 of 2.

author: iaz1607 <iaz1607@yandex-team.ru> 2022-02-10 16:45:37 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:45:37 +0300
commit: 94e51c602b555459333b3c6ae92476c424c930bc (patch)
tree: b2cc84ee7850122e7ccf51d0ea21e4fa7e7a5685 /contrib/libs/apache/orc/c++
parent: e5437feb4ac2d2dc044e1090b9312dde5ef197e0 (diff)
download: ydb-94e51c602b555459333b3c6ae92476c424c930bc.tar.gz
65 files changed, 23322 insertions, 23322 deletions
diff --git a/contrib/libs/apache/orc/c++/include/orc/BloomFilter.hh b/contrib/libs/apache/orc/c++/include/orc/BloomFilter.hh
index 42f0476f03..86c1288b62 100644
--- a/contrib/libs/apache/orc/c++/include/orc/BloomFilter.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/BloomFilter.hh
@@ -1,45 +1,45 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_BLOOMFILTER_HH 
-#define ORC_BLOOMFILTER_HH 
- 
-#include "orc/orc-config.hh" 
- 
-#include <memory> 
-#include <vector> 
- 
-namespace orc { 
- 
-  class BloomFilter { 
-  public: 
-    virtual ~BloomFilter(); 
- 
-    // test if the element exists in BloomFilter 
-    virtual bool testBytes(const char * data, int64_t length) const = 0; 
-    virtual bool testLong(int64_t data) const = 0; 
-    virtual bool testDouble(double data) const = 0; 
-  }; 
- 
-  struct BloomFilterIndex { 
-    std::vector<std::shared_ptr<BloomFilter>> entries; 
-  }; 
- 
-}; 
- 
-#endif //ORC_BLOOMFILTER_HH 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_BLOOMFILTER_HH
+#define ORC_BLOOMFILTER_HH
+
+#include "orc/orc-config.hh"
+
+#include <memory>
+#include <vector>
+
+namespace orc {
+
+  class BloomFilter {
+  public:
+    virtual ~BloomFilter();
+
+    // test if the element exists in BloomFilter
+    virtual bool testBytes(const char * data, int64_t length) const = 0;
+    virtual bool testLong(int64_t data) const = 0;
+    virtual bool testDouble(double data) const = 0;
+  };
+
+  struct BloomFilterIndex {
+    std::vector<std::shared_ptr<BloomFilter>> entries;
+  };
+
+};
+
+#endif //ORC_BLOOMFILTER_HH
diff --git a/contrib/libs/apache/orc/c++/include/orc/ColumnPrinter.hh b/contrib/libs/apache/orc/c++/include/orc/ColumnPrinter.hh
index 349cabe025..aa19214738 100644
--- a/contrib/libs/apache/orc/c++/include/orc/ColumnPrinter.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/ColumnPrinter.hh
@@ -1,51 +1,51 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_COLUMN_PRINTER_HH 
-#define ORC_COLUMN_PRINTER_HH 
- 
-#include "orc/orc-config.hh" 
-#include "orc/OrcFile.hh" 
-#include "orc/Vector.hh" 
- 
-#include <stdio.h> 
-#include <string> 
-#include <memory> 
-#include <string> 
-#include <vector> 
- 
-namespace orc { 
- 
-  class ColumnPrinter { 
-  protected: 
-    std::string &buffer; 
-    bool hasNulls ; 
-    const char* notNull; 
- 
-  public: 
-    ColumnPrinter(std::string&); 
-    virtual ~ColumnPrinter(); 
-    virtual void printRow(uint64_t rowId) = 0; 
-    // should be called once at the start of each batch of rows 
-    virtual void reset(const ColumnVectorBatch& batch); 
-  }; 
- 
-  ORC_UNIQUE_PTR<ColumnPrinter> createColumnPrinter(std::string&, 
-                                                    const Type* type); 
-} 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_COLUMN_PRINTER_HH
+#define ORC_COLUMN_PRINTER_HH
+
+#include "orc/orc-config.hh"
+#include "orc/OrcFile.hh"
+#include "orc/Vector.hh"
+
+#include <stdio.h>
+#include <string>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace orc {
+
+  class ColumnPrinter {
+  protected:
+    std::string &buffer;
+    bool hasNulls ;
+    const char* notNull;
+
+  public:
+    ColumnPrinter(std::string&);
+    virtual ~ColumnPrinter();
+    virtual void printRow(uint64_t rowId) = 0;
+    // should be called once at the start of each batch of rows
+    virtual void reset(const ColumnVectorBatch& batch);
+  };
+
+  ORC_UNIQUE_PTR<ColumnPrinter> createColumnPrinter(std::string&,
+                                                    const Type* type);
+}
+#endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Common.hh b/contrib/libs/apache/orc/c++/include/orc/Common.hh
index 34dc0a118f..4aa4a85118 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Common.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Common.hh
@@ -1,286 +1,286 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_COMMON_HH 
-#define ORC_COMMON_HH 
- 
-#include "orc/Vector.hh" 
-#include "orc/Type.hh" 
-#include "orc/Exceptions.hh" 
- 
-#include <string> 
- 
-namespace orc { 
- 
-  class FileVersion { 
-  private: 
-    uint32_t majorVersion; 
-    uint32_t minorVersion; 
-  public: 
-    static const FileVersion& v_0_11(); 
-    static const FileVersion& v_0_12(); 
- 
-    FileVersion(uint32_t major, uint32_t minor) : 
-                majorVersion(major), minorVersion(minor) { 
-    } 
- 
-    /** 
-     * Get major version 
-     */ 
-    uint32_t getMajor() const { 
-        return this->majorVersion; 
-    } 
- 
-    /** 
-     * Get minor version 
-     */ 
-    uint32_t getMinor() const { 
-        return this->minorVersion; 
-    } 
- 
-    bool operator == (const FileVersion & right) const { 
-      return this->majorVersion == right.getMajor() && 
-              this->minorVersion == right.getMinor(); 
-    } 
- 
-    bool operator != (const FileVersion & right) const { 
-      return !(*this == right); 
-    } 
- 
-    std::string toString() const; 
-  }; 
- 
-  enum WriterId { 
-    ORC_JAVA_WRITER = 0, 
-    ORC_CPP_WRITER = 1, 
-    PRESTO_WRITER = 2, 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_COMMON_HH
+#define ORC_COMMON_HH
+
+#include "orc/Vector.hh"
+#include "orc/Type.hh"
+#include "orc/Exceptions.hh"
+
+#include <string>
+
+namespace orc {
+
+  class FileVersion {
+  private:
+    uint32_t majorVersion;
+    uint32_t minorVersion;
+  public:
+    static const FileVersion& v_0_11();
+    static const FileVersion& v_0_12();
+
+    FileVersion(uint32_t major, uint32_t minor) :
+                majorVersion(major), minorVersion(minor) {
+    }
+
+    /**
+     * Get major version
+     */
+    uint32_t getMajor() const {
+        return this->majorVersion;
+    }
+
+    /**
+     * Get minor version
+     */
+    uint32_t getMinor() const {
+        return this->minorVersion;
+    }
+
+    bool operator == (const FileVersion & right) const {
+      return this->majorVersion == right.getMajor() &&
+              this->minorVersion == right.getMinor();
+    }
+
+    bool operator != (const FileVersion & right) const {
+      return !(*this == right);
+    }
+
+    std::string toString() const;
+  };
+
+  enum WriterId {
+    ORC_JAVA_WRITER = 0,
+    ORC_CPP_WRITER = 1,
+    PRESTO_WRITER = 2,
     SCRITCHLEY_GO = 3,
     TRINO_WRITER = 4,
-    UNKNOWN_WRITER = INT32_MAX 
-  }; 
- 
+    UNKNOWN_WRITER = INT32_MAX
+  };
+
   std::string writerIdToString(uint32_t id);
 
-  enum CompressionKind { 
-    CompressionKind_NONE = 0, 
-    CompressionKind_ZLIB = 1, 
-    CompressionKind_SNAPPY = 2, 
-    CompressionKind_LZO = 3, 
-    CompressionKind_LZ4 = 4, 
-    CompressionKind_ZSTD = 5, 
-    CompressionKind_MAX = INT32_MAX 
-  }; 
- 
-  /** 
-   * Get the name of the CompressionKind. 
-   */ 
-  std::string compressionKindToString(CompressionKind kind); 
- 
-  enum WriterVersion { 
-    WriterVersion_ORIGINAL = 0, 
-    WriterVersion_HIVE_8732 = 1, 
-    WriterVersion_HIVE_4243 = 2, 
-    WriterVersion_HIVE_12055 = 3, 
-    WriterVersion_HIVE_13083 = 4, 
-    WriterVersion_ORC_101 = 5, 
-    WriterVersion_ORC_135 = 6, 
+  enum CompressionKind {
+    CompressionKind_NONE = 0,
+    CompressionKind_ZLIB = 1,
+    CompressionKind_SNAPPY = 2,
+    CompressionKind_LZO = 3,
+    CompressionKind_LZ4 = 4,
+    CompressionKind_ZSTD = 5,
+    CompressionKind_MAX = INT32_MAX
+  };
+
+  /**
+   * Get the name of the CompressionKind.
+   */
+  std::string compressionKindToString(CompressionKind kind);
+
+  enum WriterVersion {
+    WriterVersion_ORIGINAL = 0,
+    WriterVersion_HIVE_8732 = 1,
+    WriterVersion_HIVE_4243 = 2,
+    WriterVersion_HIVE_12055 = 3,
+    WriterVersion_HIVE_13083 = 4,
+    WriterVersion_ORC_101 = 5,
+    WriterVersion_ORC_135 = 6,
     WriterVersion_ORC_517 = 7,
     WriterVersion_ORC_203 = 8,
     WriterVersion_ORC_14 = 9,
-    WriterVersion_MAX = INT32_MAX 
-  }; 
- 
-  /** 
-   * Get the name of the WriterVersion. 
-   */ 
-  std::string writerVersionToString(WriterVersion kind); 
- 
-  enum StreamKind { 
-    StreamKind_PRESENT = 0, 
-    StreamKind_DATA = 1, 
-    StreamKind_LENGTH = 2, 
-    StreamKind_DICTIONARY_DATA = 3, 
-    StreamKind_DICTIONARY_COUNT = 4, 
-    StreamKind_SECONDARY = 5, 
-    StreamKind_ROW_INDEX = 6, 
-    StreamKind_BLOOM_FILTER = 7, 
-    StreamKind_BLOOM_FILTER_UTF8 = 8 
-  }; 
- 
-  /** 
-   * Get the string representation of the StreamKind. 
-   */ 
-  std::string streamKindToString(StreamKind kind); 
- 
-  class StreamInformation { 
-  public: 
-    virtual ~StreamInformation(); 
- 
-    virtual StreamKind getKind() const = 0; 
-    virtual uint64_t getColumnId() const = 0; 
-    virtual uint64_t getOffset() const = 0; 
-    virtual uint64_t getLength() const = 0; 
-  }; 
- 
-  enum ColumnEncodingKind { 
-    ColumnEncodingKind_DIRECT = 0, 
-    ColumnEncodingKind_DICTIONARY = 1, 
-    ColumnEncodingKind_DIRECT_V2 = 2, 
-    ColumnEncodingKind_DICTIONARY_V2 = 3 
-  }; 
- 
-  std::string columnEncodingKindToString(ColumnEncodingKind kind); 
- 
-  class StripeInformation { 
-  public: 
-    virtual ~StripeInformation(); 
- 
-    /** 
-     * Get the byte offset of the start of the stripe. 
-     * @return the bytes from the start of the file 
-     */ 
-    virtual uint64_t getOffset() const = 0; 
- 
-    /** 
-     * Get the total length of the stripe in bytes. 
-     * @return the number of bytes in the stripe 
-     */ 
-    virtual uint64_t getLength() const = 0; 
- 
-    /** 
-     * Get the length of the stripe's indexes. 
-     * @return the number of bytes in the index 
-     */ 
-    virtual uint64_t getIndexLength() const = 0; 
- 
-    /** 
-     * Get the length of the stripe's data. 
-     * @return the number of bytes in the stripe 
-     */ 
-    virtual uint64_t getDataLength()const = 0; 
- 
-    /** 
-     * Get the length of the stripe's tail section, which contains its index. 
-     * @return the number of bytes in the tail 
-     */ 
-    virtual uint64_t getFooterLength() const = 0; 
- 
-    /** 
-     * Get the number of rows in the stripe. 
-     * @return a count of the number of rows 
-     */ 
-    virtual uint64_t getNumberOfRows() const = 0; 
- 
-    /** 
-     * Get the number of streams in the stripe. 
-     */ 
-    virtual uint64_t getNumberOfStreams() const = 0; 
- 
-    /** 
-     * Get the StreamInformation for the given stream. 
-     */ 
-    virtual ORC_UNIQUE_PTR<StreamInformation> 
-    getStreamInformation(uint64_t streamId) const = 0; 
- 
-    /** 
-     * Get the column encoding for the given column. 
-     * @param colId the columnId 
-     */ 
-    virtual ColumnEncodingKind getColumnEncoding(uint64_t colId) const = 0; 
- 
-    /** 
-     * Get the dictionary size. 
-     * @param colId the columnId 
-     * @return the size of the dictionary or 0 if there isn't one 
-     */ 
-    virtual uint64_t getDictionarySize(uint64_t colId) const = 0; 
- 
-    /** 
-     * Get the writer timezone. 
-     */ 
-    virtual const std::string& getWriterTimezone() const = 0; 
-  }; 
- 
-  // Return true if val1 < val2; otherwise return false 
-  template <typename T> 
-  inline bool compare(T val1, T val2) { 
-    return (val1 < val2); 
-  } 
- 
-  // Specialization for Decimal 
-  template <> 
-  inline bool compare(Decimal val1, Decimal val2) { 
-    // compare integral parts 
-    Int128 integral1 = scaleDownInt128ByPowerOfTen(val1.value, 
-                                                   val1.scale); 
-    Int128 integral2 = scaleDownInt128ByPowerOfTen(val2.value, 
-                                                   val2.scale); 
- 
-    if (integral1 < integral2) { 
-      return true; 
-    } else if (integral1 > integral2) { 
-      return false; 
-    } 
- 
-    // integral parts are equal, continue comparing fractional parts 
-    // unnecessary to check overflow here because the scaled number will not 
-    // exceed original ones 
-    bool overflow = false, positive = val1.value >= 0; 
-    val1.value -= scaleUpInt128ByPowerOfTen(integral1, 
-                                            val1.scale, 
-                                            overflow); 
-    val2.value -= scaleUpInt128ByPowerOfTen(integral2, 
-                                            val2.scale, 
-                                            overflow); 
- 
-    int32_t diff = val1.scale - val2.scale; 
-    if (diff > 0) { 
-      val2.value = scaleUpInt128ByPowerOfTen(val2.value, 
-                                             diff, 
-                                             overflow); 
-      if (overflow) { 
-        return positive ? true : false; 
-      } 
-    } else { 
-      val1.value = scaleUpInt128ByPowerOfTen(val1.value, 
-                                             -diff, 
-                                             overflow); 
-      if (overflow) { 
-        return positive ? false : true; 
-      } 
-    } 
- 
-    if (val1.value < val2.value) { 
-      return true; 
-    } 
-    return false; 
-  } 
- 
-  enum BloomFilterVersion { 
-    // Include both the BLOOM_FILTER and BLOOM_FILTER_UTF8 streams to support 
-    // both old and new readers. 
-    ORIGINAL = 0, 
-    // Only include the BLOOM_FILTER_UTF8 streams that consistently use UTF8. 
-    // See ORC-101 
-    UTF8 = 1, 
-    FUTURE = INT32_MAX 
-  }; 
- 
-} 
- 
-#endif 
+    WriterVersion_MAX = INT32_MAX
+  };
+
+  /**
+   * Get the name of the WriterVersion.
+   */
+  std::string writerVersionToString(WriterVersion kind);
+
+  enum StreamKind {
+    StreamKind_PRESENT = 0,
+    StreamKind_DATA = 1,
+    StreamKind_LENGTH = 2,
+    StreamKind_DICTIONARY_DATA = 3,
+    StreamKind_DICTIONARY_COUNT = 4,
+    StreamKind_SECONDARY = 5,
+    StreamKind_ROW_INDEX = 6,
+    StreamKind_BLOOM_FILTER = 7,
+    StreamKind_BLOOM_FILTER_UTF8 = 8
+  };
+
+  /**
+   * Get the string representation of the StreamKind.
+   */
+  std::string streamKindToString(StreamKind kind);
+
+  class StreamInformation {
+  public:
+    virtual ~StreamInformation();
+
+    virtual StreamKind getKind() const = 0;
+    virtual uint64_t getColumnId() const = 0;
+    virtual uint64_t getOffset() const = 0;
+    virtual uint64_t getLength() const = 0;
+  };
+
+  enum ColumnEncodingKind {
+    ColumnEncodingKind_DIRECT = 0,
+    ColumnEncodingKind_DICTIONARY = 1,
+    ColumnEncodingKind_DIRECT_V2 = 2,
+    ColumnEncodingKind_DICTIONARY_V2 = 3
+  };
+
+  std::string columnEncodingKindToString(ColumnEncodingKind kind);
+
+  class StripeInformation {
+  public:
+    virtual ~StripeInformation();
+
+    /**
+     * Get the byte offset of the start of the stripe.
+     * @return the bytes from the start of the file
+     */
+    virtual uint64_t getOffset() const = 0;
+
+    /**
+     * Get the total length of the stripe in bytes.
+     * @return the number of bytes in the stripe
+     */
+    virtual uint64_t getLength() const = 0;
+
+    /**
+     * Get the length of the stripe's indexes.
+     * @return the number of bytes in the index
+     */
+    virtual uint64_t getIndexLength() const = 0;
+
+    /**
+     * Get the length of the stripe's data.
+     * @return the number of bytes in the stripe
+     */
+    virtual uint64_t getDataLength()const = 0;
+
+    /**
+     * Get the length of the stripe's tail section, which contains its index.
+     * @return the number of bytes in the tail
+     */
+    virtual uint64_t getFooterLength() const = 0;
+
+    /**
+     * Get the number of rows in the stripe.
+     * @return a count of the number of rows
+     */
+    virtual uint64_t getNumberOfRows() const = 0;
+
+    /**
+     * Get the number of streams in the stripe.
+     */
+    virtual uint64_t getNumberOfStreams() const = 0;
+
+    /**
+     * Get the StreamInformation for the given stream.
+     */
+    virtual ORC_UNIQUE_PTR<StreamInformation>
+    getStreamInformation(uint64_t streamId) const = 0;
+
+    /**
+     * Get the column encoding for the given column.
+     * @param colId the columnId
+     */
+    virtual ColumnEncodingKind getColumnEncoding(uint64_t colId) const = 0;
+
+    /**
+     * Get the dictionary size.
+     * @param colId the columnId
+     * @return the size of the dictionary or 0 if there isn't one
+     */
+    virtual uint64_t getDictionarySize(uint64_t colId) const = 0;
+
+    /**
+     * Get the writer timezone.
+     */
+    virtual const std::string& getWriterTimezone() const = 0;
+  };
+
+  // Return true if val1 < val2; otherwise return false
+  template <typename T>
+  inline bool compare(T val1, T val2) {
+    return (val1 < val2);
+  }
+
+  // Specialization for Decimal
+  template <>
+  inline bool compare(Decimal val1, Decimal val2) {
+    // compare integral parts
+    Int128 integral1 = scaleDownInt128ByPowerOfTen(val1.value,
+                                                   val1.scale);
+    Int128 integral2 = scaleDownInt128ByPowerOfTen(val2.value,
+                                                   val2.scale);
+
+    if (integral1 < integral2) {
+      return true;
+    } else if (integral1 > integral2) {
+      return false;
+    }
+
+    // integral parts are equal, continue comparing fractional parts
+    // unnecessary to check overflow here because the scaled number will not
+    // exceed original ones
+    bool overflow = false, positive = val1.value >= 0;
+    val1.value -= scaleUpInt128ByPowerOfTen(integral1,
+                                            val1.scale,
+                                            overflow);
+    val2.value -= scaleUpInt128ByPowerOfTen(integral2,
+                                            val2.scale,
+                                            overflow);
+
+    int32_t diff = val1.scale - val2.scale;
+    if (diff > 0) {
+      val2.value = scaleUpInt128ByPowerOfTen(val2.value,
+                                             diff,
+                                             overflow);
+      if (overflow) {
+        return positive ? true : false;
+      }
+    } else {
+      val1.value = scaleUpInt128ByPowerOfTen(val1.value,
+                                             -diff,
+                                             overflow);
+      if (overflow) {
+        return positive ? false : true;
+      }
+    }
+
+    if (val1.value < val2.value) {
+      return true;
+    }
+    return false;
+  }
+
+  enum BloomFilterVersion {
+    // Include both the BLOOM_FILTER and BLOOM_FILTER_UTF8 streams to support
+    // both old and new readers.
+    ORIGINAL = 0,
+    // Only include the BLOOM_FILTER_UTF8 streams that consistently use UTF8.
+    // See ORC-101
+    UTF8 = 1,
+    FUTURE = INT32_MAX
+  };
+
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Exceptions.hh b/contrib/libs/apache/orc/c++/include/orc/Exceptions.hh
index e991f9eecd..9765d4fd6b 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Exceptions.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Exceptions.hh
@@ -1,60 +1,60 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_EXCEPTIONS_HH 
-#define ORC_EXCEPTIONS_HH 
- 
-#include "orc/orc-config.hh" 
- 
-#include <stdexcept> 
-#include <string> 
- 
-namespace orc { 
- 
-  class NotImplementedYet: public std::logic_error { 
-  public: 
-    explicit NotImplementedYet(const std::string& what_arg); 
-    explicit NotImplementedYet(const char* what_arg); 
-    virtual ~NotImplementedYet() ORC_NOEXCEPT; 
-    NotImplementedYet(const NotImplementedYet&); 
-  private: 
-    NotImplementedYet& operator=(const NotImplementedYet&); 
-  }; 
- 
-  class ParseError: public std::runtime_error { 
-  public: 
-    explicit ParseError(const std::string& what_arg); 
-    explicit ParseError(const char* what_arg); 
-    virtual ~ParseError() ORC_NOEXCEPT; 
-    ParseError(const ParseError&); 
-  private: 
-    ParseError& operator=(const ParseError&); 
-  }; 
- 
-  class InvalidArgument: public std::runtime_error { 
-  public: 
-    explicit InvalidArgument(const std::string& what_arg); 
-    explicit InvalidArgument(const char* what_arg); 
-    virtual ~InvalidArgument() ORC_NOEXCEPT; 
-    InvalidArgument(const InvalidArgument&); 
-  private: 
-    InvalidArgument& operator=(const InvalidArgument&); 
-  }; 
-} 
- 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_EXCEPTIONS_HH
+#define ORC_EXCEPTIONS_HH
+
+#include "orc/orc-config.hh"
+
+#include <stdexcept>
+#include <string>
+
+namespace orc {
+
+  class NotImplementedYet: public std::logic_error {
+  public:
+    explicit NotImplementedYet(const std::string& what_arg);
+    explicit NotImplementedYet(const char* what_arg);
+    virtual ~NotImplementedYet() ORC_NOEXCEPT;
+    NotImplementedYet(const NotImplementedYet&);
+  private:
+    NotImplementedYet& operator=(const NotImplementedYet&);
+  };
+
+  class ParseError: public std::runtime_error {
+  public:
+    explicit ParseError(const std::string& what_arg);
+    explicit ParseError(const char* what_arg);
+    virtual ~ParseError() ORC_NOEXCEPT;
+    ParseError(const ParseError&);
+  private:
+    ParseError& operator=(const ParseError&);
+  };
+
+  class InvalidArgument: public std::runtime_error {
+  public:
+    explicit InvalidArgument(const std::string& what_arg);
+    explicit InvalidArgument(const char* what_arg);
+    virtual ~InvalidArgument() ORC_NOEXCEPT;
+    InvalidArgument(const InvalidArgument&);
+  private:
+    InvalidArgument& operator=(const InvalidArgument&);
+  };
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Int128.hh b/contrib/libs/apache/orc/c++/include/orc/Int128.hh
index 63b84478c6..f86d8f08a6 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Int128.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Int128.hh
@@ -1,372 +1,372 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_INT_128 
-#define ORC_INT_128 
- 
-#include "orc/orc-config.hh" 
- 
-#include <stdexcept> 
-#include <string> 
- 
-namespace orc { 
- 
-  /** 
-   * Represents a signed 128-bit integer in two's complement. 
-   * Calculations wrap around and overflow is ignored. 
-   * 
-   * For a discussion of the algorithms, look at Knuth's volume 2, 
-   * Semi-numerical Algorithms section 4.3.1. 
-   * 
-   */ 
-  class Int128 { 
-  public: 
-    Int128() { 
-      highbits = 0; 
-      lowbits = 0; 
-    } 
- 
-    /** 
-     * Convert a signed 64 bit value into an Int128. 
-     */ 
-    Int128(int64_t right) { 
-      if (right >= 0) { 
-        highbits = 0; 
-        lowbits = static_cast<uint64_t>(right); 
-      } else { 
-        highbits = -1; 
-        lowbits = static_cast<uint64_t>(right); 
-      } 
-    } 
- 
-    /** 
-     * Create from the twos complement representation. 
-     */ 
-    Int128(int64_t high, uint64_t low) { 
-      highbits = high; 
-      lowbits = low; 
-    } 
- 
-    /** 
-     * Parse the number from a base 10 string representation. 
-     */ 
-    explicit Int128(const std::string&); 
- 
-    /** 
-     * Maximum positive value allowed by the type. 
-     */ 
-    static Int128 maximumValue(); 
- 
-    /** 
-     * Minimum negative value allowed by the type. 
-     */ 
-    static Int128 minimumValue(); 
- 
-    Int128& negate() { 
-      lowbits = ~lowbits + 1; 
-      highbits = ~highbits; 
-      if (lowbits == 0) { 
-        highbits += 1; 
-      } 
-      return *this; 
-    } 
- 
-    Int128& abs() { 
-      if (highbits < 0) { 
-        negate(); 
-      } 
-      return *this; 
-    } 
- 
-    Int128 abs() const { 
-      Int128 value = *this; 
-      value.abs(); 
-      return value; 
-    } 
- 
-    Int128& invert() { 
-      lowbits = ~lowbits; 
-      highbits = ~highbits; 
-      return *this; 
-    } 
- 
-    /** 
-     * Add a number to this one. The result is truncated to 128 bits. 
-     * @param right the number to add 
-     * @return *this 
-     */ 
-    Int128& operator+=(const Int128 &right) { 
-      uint64_t sum = lowbits + right.lowbits; 
-      highbits += right.highbits; 
-      if (sum < lowbits) { 
-        highbits += 1; 
-      } 
-      lowbits = sum; 
-      return *this; 
-    } 
- 
-    /** 
-     * Subtract a number from this one. The result is truncated to 128 bits. 
-     * @param right the number to subtract 
-     * @return *this 
-     */ 
-    Int128& operator-=(const Int128 &right) { 
-      uint64_t diff = lowbits - right.lowbits; 
-      highbits -= right.highbits; 
-      if (diff > lowbits) { 
-        highbits -= 1; 
-      } 
-      lowbits = diff; 
-      return *this; 
-    } 
- 
-    /** 
-     * Multiply this number by a number. The result is truncated to 128 bits. 
-     * @param right the number to multiply by 
-     * @return *this 
-     */ 
-    Int128& operator*=(const Int128 &right); 
- 
-    /** 
-     * Divide this number by right and return the result. This operation is 
-     * not destructive. 
-     * 
-     * The answer rounds to zero. Signs work like: 
-     *    21 /  5 ->  4,  1 
-     *   -21 /  5 -> -4, -1 
-     *    21 / -5 -> -4,  1 
-     *   -21 / -5 ->  4, -1 
-     * @param right the number to divide by 
-     * @param remainder the remainder after the division 
-     */ 
-    Int128 divide(const Int128 &right, Int128& remainder) const; 
- 
-    /** 
-     * Logical or between two Int128. 
-     * @param right the number to or in 
-     * @return *this 
-     */ 
-    Int128& operator|=(const Int128 &right) { 
-      lowbits |= right.lowbits; 
-      highbits |= right.highbits; 
-      return *this; 
-    } 
- 
-    /** 
-     * Logical and between two Int128. 
-     * @param right the number to and in 
-     * @return *this 
-     */ 
-    Int128& operator&=(const Int128 &right) { 
-      lowbits &= right.lowbits; 
-      highbits &= right.highbits; 
-      return *this; 
-    } 
- 
-    /** 
-     * Logical and between two Int128. 
-     * @param right the number to and in 
-     * @return logical and result 
-     */ 
-    Int128 operator&(const Int128 &right) { 
-      Int128 value = *this; 
-      value &= right; 
-      return value; 
-    } 
- 
-    /** 
-     * Shift left by the given number of bits. 
-     * Values larger than 2**127 will shift into the sign bit. 
-     */ 
-    Int128& operator<<=(uint32_t bits) { 
-      if (bits != 0) { 
-        if (bits < 64) { 
-          highbits <<= bits; 
-          highbits |= (lowbits >> (64 - bits)); 
-          lowbits <<= bits; 
-        } else if (bits < 128) { 
-          highbits = static_cast<int64_t>(lowbits) << (bits - 64); 
-          lowbits = 0; 
-        } else { 
-          highbits = 0; 
-          lowbits = 0; 
-        } 
-      } 
-      return *this; 
-    } 
- 
-    /** 
-     * Shift right by the given number of bits. Negative values will 
-     * sign extend and fill with one bits. 
-     */ 
-    Int128& operator>>=(uint32_t bits) { 
-      if (bits != 0) { 
-        if (bits < 64) { 
-          lowbits >>= bits; 
-          lowbits |= static_cast<uint64_t>(highbits << (64 - bits)); 
-          highbits = static_cast<int64_t> 
-            (static_cast<uint64_t>(highbits) >> bits); 
-        } else if (bits < 128) { 
-          lowbits = static_cast<uint64_t>(highbits >> (bits - 64)); 
-          highbits = highbits >= 0 ? 0 : -1l; 
-        } else { 
-          highbits = highbits >= 0 ? 0 : -1l; 
-          lowbits = static_cast<uint64_t>(highbits); 
-        } 
-      } 
-      return *this; 
-    } 
- 
-    bool operator==(const Int128& right) const { 
-      return highbits == right.highbits && lowbits == right.lowbits; 
-    } 
- 
-    bool operator!=(const Int128& right) const { 
-      return highbits != right.highbits || lowbits != right.lowbits; 
-    } 
- 
-    bool operator<(const Int128 &right) const { 
-      if (highbits == right.highbits) { 
-        return lowbits < right.lowbits; 
-      } else { 
-        return highbits < right.highbits; 
-      } 
-    } 
- 
-    bool operator<=(const Int128 &right) const { 
-      if (highbits == right.highbits) { 
-        return lowbits <= right.lowbits; 
-      } else { 
-        return highbits <= right.highbits; 
-      } 
-    } 
- 
-    bool operator>(const Int128 &right) const { 
-      if (highbits == right.highbits) { 
-        return lowbits > right.lowbits; 
-      } else { 
-        return highbits > right.highbits; 
-      } 
-    } 
- 
-    bool operator>=(const Int128 &right) const { 
-      if (highbits == right.highbits) { 
-        return lowbits >= right.lowbits; 
-      } else { 
-        return highbits >= right.highbits; 
-      } 
-    } 
- 
-    uint32_t hash() const { 
-      return static_cast<uint32_t>(highbits >> 32) ^ 
-        static_cast<uint32_t>(highbits) ^ 
-        static_cast<uint32_t>(lowbits >> 32) ^ 
-        static_cast<uint32_t>(lowbits); 
-    } 
- 
-    /** 
-     * Does this value fit into a long? 
-     */ 
-    bool fitsInLong() const { 
-      switch (highbits) { 
-      case 0: 
-        return 0 == (lowbits & LONG_SIGN_BIT); 
-      case -1: 
-        return 0 != (lowbits & LONG_SIGN_BIT); 
-      default: 
-        return false; 
-      } 
-    } 
- 
-    /** 
-     * Convert the value to a long and 
-     */ 
-    int64_t toLong() const { 
-      if (fitsInLong()) { 
-        return static_cast<int64_t>(lowbits); 
-      } 
-      throw std::range_error("Int128 too large to convert to long"); 
-    } 
- 
-    /** 
-     * Return the base 10 string representation of the integer. 
-     */ 
-    std::string toString() const; 
- 
-    /** 
-     * Return the base 10 string representation with a decimal point, 
-     * the given number of places after the decimal. 
-     */ 
-    std::string toDecimalString(int32_t scale=0) const; 
- 
-    /** 
-     * Return the base 16 string representation of the two's complement with 
-     * a prefix of "0x". 
-     * Int128(-1).toHexString() = "0xffffffffffffffffffffffffffffffff". 
-     */ 
-    std::string toHexString() const; 
- 
-    /** 
-     * Get the high bits of the twos complement representation of the number. 
-     */ 
-    int64_t getHighBits() { 
-      return highbits; 
-    } 
- 
-    /** 
-     * Get the low bits of the twos complement representation of the number. 
-     */ 
-    uint64_t getLowBits() { 
-      return lowbits; 
-    } 
- 
-    /** 
-     * Represent the absolute number as a list of uint32. 
-     * Visible for testing only. 
-     * @param array the array that is set to the value of the number 
-     * @param wasNegative set to true if the original number was negative 
-     * @return the number of elements that were set in the array (1 to 4) 
-     */ 
-    int64_t fillInArray(uint32_t* array, bool &wasNegative) const; 
- 
-  private: 
-    static const uint64_t LONG_SIGN_BIT = 0x8000000000000000u; 
-    int64_t highbits; 
-    uint64_t lowbits; 
-  }; 
- 
- 
-  /** 
-   * Scales up an Int128 value 
-   * @param value the Int128 value to scale 
-   * @param power the scale offset. Result of a negative factor is undefined. 
-   * @param overflow returns whether the result overflows or not 
-   * @return the scaled value 
-   */ 
-  Int128 scaleUpInt128ByPowerOfTen(Int128 value, 
-                                   int32_t power, 
-                                   bool &overflow); 
-  /** 
-   * Scales down an Int128 value 
-   * @param value the Int128 value to scale 
-   * @param power the scale offset. Result of a negative factor is undefined. 
-   * @return the scaled value 
-   */ 
-  Int128 scaleDownInt128ByPowerOfTen(Int128 value, int32_t power); 
-} 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_INT_128
+#define ORC_INT_128
+
+#include "orc/orc-config.hh"
+
+#include <stdexcept>
+#include <string>
+
+namespace orc {
+
+  /**
+   * Represents a signed 128-bit integer in two's complement.
+   * Calculations wrap around and overflow is ignored.
+   *
+   * For a discussion of the algorithms, look at Knuth's volume 2,
+   * Semi-numerical Algorithms section 4.3.1.
+   *
+   */
+  class Int128 {
+  public:
+    Int128() {
+      highbits = 0;
+      lowbits = 0;
+    }
+
+    /**
+     * Convert a signed 64 bit value into an Int128.
+     */
+    Int128(int64_t right) {
+      if (right >= 0) {
+        highbits = 0;
+        lowbits = static_cast<uint64_t>(right);
+      } else {
+        highbits = -1;
+        lowbits = static_cast<uint64_t>(right);
+      }
+    }
+
+    /**
+     * Create from the twos complement representation.
+     */
+    Int128(int64_t high, uint64_t low) {
+      highbits = high;
+      lowbits = low;
+    }
+
+    /**
+     * Parse the number from a base 10 string representation.
+     */
+    explicit Int128(const std::string&);
+
+    /**
+     * Maximum positive value allowed by the type.
+     */
+    static Int128 maximumValue();
+
+    /**
+     * Minimum negative value allowed by the type.
+     */
+    static Int128 minimumValue();
+
+    Int128& negate() {
+      lowbits = ~lowbits + 1;
+      highbits = ~highbits;
+      if (lowbits == 0) {
+        highbits += 1;
+      }
+      return *this;
+    }
+
+    Int128& abs() {
+      if (highbits < 0) {
+        negate();
+      }
+      return *this;
+    }
+
+    Int128 abs() const {
+      Int128 value = *this;
+      value.abs();
+      return value;
+    }
+
+    Int128& invert() {
+      lowbits = ~lowbits;
+      highbits = ~highbits;
+      return *this;
+    }
+
+    /**
+     * Add a number to this one. The result is truncated to 128 bits.
+     * @param right the number to add
+     * @return *this
+     */
+    Int128& operator+=(const Int128 &right) {
+      uint64_t sum = lowbits + right.lowbits;
+      highbits += right.highbits;
+      if (sum < lowbits) {
+        highbits += 1;
+      }
+      lowbits = sum;
+      return *this;
+    }
+
+    /**
+     * Subtract a number from this one. The result is truncated to 128 bits.
+     * @param right the number to subtract
+     * @return *this
+     */
+    Int128& operator-=(const Int128 &right) {
+      uint64_t diff = lowbits - right.lowbits;
+      highbits -= right.highbits;
+      if (diff > lowbits) {
+        highbits -= 1;
+      }
+      lowbits = diff;
+      return *this;
+    }
+
+    /**
+     * Multiply this number by a number. The result is truncated to 128 bits.
+     * @param right the number to multiply by
+     * @return *this
+     */
+    Int128& operator*=(const Int128 &right);
+
+    /**
+     * Divide this number by right and return the result. This operation is
+     * not destructive.
+     *
+     * The answer rounds to zero. Signs work like:
+     *    21 /  5 ->  4,  1
+     *   -21 /  5 -> -4, -1
+     *    21 / -5 -> -4,  1
+     *   -21 / -5 ->  4, -1
+     * @param right the number to divide by
+     * @param remainder the remainder after the division
+     */
+    Int128 divide(const Int128 &right, Int128& remainder) const;
+
+    /**
+     * Logical or between two Int128.
+     * @param right the number to or in
+     * @return *this
+     */
+    Int128& operator|=(const Int128 &right) {
+      lowbits |= right.lowbits;
+      highbits |= right.highbits;
+      return *this;
+    }
+
+    /**
+     * Logical and between two Int128.
+     * @param right the number to and in
+     * @return *this
+     */
+    Int128& operator&=(const Int128 &right) {
+      lowbits &= right.lowbits;
+      highbits &= right.highbits;
+      return *this;
+    }
+
+    /**
+     * Logical and between two Int128.
+     * @param right the number to and in
+     * @return logical and result
+     */
+    Int128 operator&(const Int128 &right) {
+      Int128 value = *this;
+      value &= right;
+      return value;
+    }
+
+    /**
+     * Shift left by the given number of bits.
+     * Values larger than 2**127 will shift into the sign bit.
+     */
+    Int128& operator<<=(uint32_t bits) {
+      if (bits != 0) {
+        if (bits < 64) {
+          highbits <<= bits;
+          highbits |= (lowbits >> (64 - bits));
+          lowbits <<= bits;
+        } else if (bits < 128) {
+          highbits = static_cast<int64_t>(lowbits) << (bits - 64);
+          lowbits = 0;
+        } else {
+          highbits = 0;
+          lowbits = 0;
+        }
+      }
+      return *this;
+    }
+
+    /**
+     * Shift right by the given number of bits. Negative values will
+     * sign extend and fill with one bits.
+     */
+    Int128& operator>>=(uint32_t bits) {
+      if (bits != 0) {
+        if (bits < 64) {
+          lowbits >>= bits;
+          lowbits |= static_cast<uint64_t>(highbits << (64 - bits));
+          highbits = static_cast<int64_t>
+            (static_cast<uint64_t>(highbits) >> bits);
+        } else if (bits < 128) {
+          lowbits = static_cast<uint64_t>(highbits >> (bits - 64));
+          highbits = highbits >= 0 ? 0 : -1l;
+        } else {
+          highbits = highbits >= 0 ? 0 : -1l;
+          lowbits = static_cast<uint64_t>(highbits);
+        }
+      }
+      return *this;
+    }
+
+    bool operator==(const Int128& right) const {
+      return highbits == right.highbits && lowbits == right.lowbits;
+    }
+
+    bool operator!=(const Int128& right) const {
+      return highbits != right.highbits || lowbits != right.lowbits;
+    }
+
+    bool operator<(const Int128 &right) const {
+      if (highbits == right.highbits) {
+        return lowbits < right.lowbits;
+      } else {
+        return highbits < right.highbits;
+      }
+    }
+
+    bool operator<=(const Int128 &right) const {
+      if (highbits == right.highbits) {
+        return lowbits <= right.lowbits;
+      } else {
+        return highbits <= right.highbits;
+      }
+    }
+
+    bool operator>(const Int128 &right) const {
+      if (highbits == right.highbits) {
+        return lowbits > right.lowbits;
+      } else {
+        return highbits > right.highbits;
+      }
+    }
+
+    bool operator>=(const Int128 &right) const {
+      if (highbits == right.highbits) {
+        return lowbits >= right.lowbits;
+      } else {
+        return highbits >= right.highbits;
+      }
+    }
+
+    uint32_t hash() const {
+      return static_cast<uint32_t>(highbits >> 32) ^
+        static_cast<uint32_t>(highbits) ^
+        static_cast<uint32_t>(lowbits >> 32) ^
+        static_cast<uint32_t>(lowbits);
+    }
+
+    /**
+     * Does this value fit into a long?
+     */
+    bool fitsInLong() const {
+      switch (highbits) {
+      case 0:
+        return 0 == (lowbits & LONG_SIGN_BIT);
+      case -1:
+        return 0 != (lowbits & LONG_SIGN_BIT);
+      default:
+        return false;
+      }
+    }
+
+    /**
+     * Convert the value to a long and
+     */
+    int64_t toLong() const {
+      if (fitsInLong()) {
+        return static_cast<int64_t>(lowbits);
+      }
+      throw std::range_error("Int128 too large to convert to long");
+    }
+
+    /**
+     * Return the base 10 string representation of the integer.
+     */
+    std::string toString() const;
+
+    /**
+     * Return the base 10 string representation with a decimal point,
+     * the given number of places after the decimal.
+     */
+    std::string toDecimalString(int32_t scale=0) const;
+
+    /**
+     * Return the base 16 string representation of the two's complement with
+     * a prefix of "0x".
+     * Int128(-1).toHexString() = "0xffffffffffffffffffffffffffffffff".
+     */
+    std::string toHexString() const;
+
+    /**
+     * Get the high bits of the twos complement representation of the number.
+     */
+    int64_t getHighBits() {
+      return highbits;
+    }
+
+    /**
+     * Get the low bits of the twos complement representation of the number.
+     */
+    uint64_t getLowBits() {
+      return lowbits;
+    }
+
+    /**
+     * Represent the absolute number as a list of uint32.
+     * Visible for testing only.
+     * @param array the array that is set to the value of the number
+     * @param wasNegative set to true if the original number was negative
+     * @return the number of elements that were set in the array (1 to 4)
+     */
+    int64_t fillInArray(uint32_t* array, bool &wasNegative) const;
+
+  private:
+    static const uint64_t LONG_SIGN_BIT = 0x8000000000000000u;
+    int64_t highbits;
+    uint64_t lowbits;
+  };
+
+
+  /**
+   * Scales up an Int128 value
+   * @param value the Int128 value to scale
+   * @param power the scale offset. Result of a negative factor is undefined.
+   * @param overflow returns whether the result overflows or not
+   * @return the scaled value
+   */
+  Int128 scaleUpInt128ByPowerOfTen(Int128 value,
+                                   int32_t power,
+                                   bool &overflow);
+  /**
+   * Scales down an Int128 value
+   * @param value the Int128 value to scale
+   * @param power the scale offset. Result of a negative factor is undefined.
+   * @return the scaled value
+   */
+  Int128 scaleDownInt128ByPowerOfTen(Int128 value, int32_t power);
+}
+#endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/MemoryPool.hh b/contrib/libs/apache/orc/c++/include/orc/MemoryPool.hh
index a34651721f..71d76c438a 100644
--- a/contrib/libs/apache/orc/c++/include/orc/MemoryPool.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/MemoryPool.hh
@@ -1,150 +1,150 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef MEMORYPOOL_HH_ 
-#define MEMORYPOOL_HH_ 
- 
-#include "orc/orc-config.hh" 
-#include "orc/Int128.hh" 
- 
-#include <memory> 
- 
-namespace orc { 
- 
-  class MemoryPool { 
-  public: 
-    virtual ~MemoryPool(); 
- 
-    virtual char* malloc(uint64_t size) = 0; 
-    virtual void free(char* p) = 0; 
-  }; 
-  MemoryPool* getDefaultPool(); 
- 
-  template <class T> 
-  class DataBuffer { 
-  private: 
-    MemoryPool& memoryPool; 
-    T* buf; 
-    // current size 
-    uint64_t currentSize; 
-    // maximal capacity (actual allocated memory) 
-    uint64_t currentCapacity; 
- 
-    // not implemented 
-    DataBuffer(DataBuffer& buffer); 
-    DataBuffer& operator=(DataBuffer& buffer); 
- 
-  public: 
-    DataBuffer(MemoryPool& pool, uint64_t _size = 0); 
- 
-    DataBuffer(DataBuffer<T>&& buffer) ORC_NOEXCEPT; 
- 
-    virtual ~DataBuffer(); 
- 
-    T* data() { 
-      return buf; 
-    } 
- 
-    const T* data() const { 
-      return buf; 
-    } 
- 
-    uint64_t size() { 
-      return currentSize; 
-    } 
- 
-    uint64_t capacity() { 
-      return currentCapacity; 
-    } 
- 
-    T& operator[](uint64_t i) { 
-      return buf[i]; 
-    } 
- 
-    void reserve(uint64_t _size); 
-    void resize(uint64_t _size); 
-  }; 
- 
-  // Specializations for char 
- 
-  template <> 
-  DataBuffer<char>::~DataBuffer(); 
- 
-  template <> 
-  void DataBuffer<char>::resize(uint64_t newSize); 
- 
-  // Specializations for char* 
- 
-  template <> 
-  DataBuffer<char*>::~DataBuffer(); 
- 
-  template <> 
-  void DataBuffer<char*>::resize(uint64_t newSize); 
- 
-  // Specializations for double 
- 
-  template <> 
-  DataBuffer<double>::~DataBuffer(); 
- 
-  template <> 
-  void DataBuffer<double>::resize(uint64_t newSize); 
- 
-  // Specializations for int64_t 
- 
-  template <> 
-  DataBuffer<int64_t>::~DataBuffer(); 
- 
-  template <> 
-  void DataBuffer<int64_t>::resize(uint64_t newSize); 
- 
-  // Specializations for uint64_t 
- 
-  template <> 
-  DataBuffer<uint64_t>::~DataBuffer(); 
- 
-  template <> 
-  void DataBuffer<uint64_t>::resize(uint64_t newSize); 
- 
-  // Specializations for unsigned char 
- 
-  template <> 
-  DataBuffer<unsigned char>::~DataBuffer(); 
- 
-  template <> 
-  void DataBuffer<unsigned char>::resize(uint64_t newSize); 
- 
-  #ifdef __clang__ 
-    #pragma clang diagnostic push 
-    #pragma clang diagnostic ignored "-Wweak-template-vtables" 
-  #endif 
- 
-  extern template class DataBuffer<char>; 
-  extern template class DataBuffer<char*>; 
-  extern template class DataBuffer<double>; 
-  extern template class DataBuffer<Int128>; 
-  extern template class DataBuffer<int64_t>; 
-  extern template class DataBuffer<uint64_t>; 
-  extern template class DataBuffer<unsigned char>; 
- 
-  #ifdef __clang__ 
-    #pragma clang diagnostic pop 
-  #endif 
-} // namespace orc 
- 
- 
-#endif /* MEMORYPOOL_HH_ */ 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MEMORYPOOL_HH_
+#define MEMORYPOOL_HH_
+
+#include "orc/orc-config.hh"
+#include "orc/Int128.hh"
+
+#include <memory>
+
+namespace orc {
+
+  class MemoryPool {
+  public:
+    virtual ~MemoryPool();
+
+    virtual char* malloc(uint64_t size) = 0;
+    virtual void free(char* p) = 0;
+  };
+  MemoryPool* getDefaultPool();
+
+  template <class T>
+  class DataBuffer {
+  private:
+    MemoryPool& memoryPool;
+    T* buf;
+    // current size
+    uint64_t currentSize;
+    // maximal capacity (actual allocated memory)
+    uint64_t currentCapacity;
+
+    // not implemented
+    DataBuffer(DataBuffer& buffer);
+    DataBuffer& operator=(DataBuffer& buffer);
+
+  public:
+    DataBuffer(MemoryPool& pool, uint64_t _size = 0);
+
+    DataBuffer(DataBuffer<T>&& buffer) ORC_NOEXCEPT;
+
+    virtual ~DataBuffer();
+
+    T* data() {
+      return buf;
+    }
+
+    const T* data() const {
+      return buf;
+    }
+
+    uint64_t size() {
+      return currentSize;
+    }
+
+    uint64_t capacity() {
+      return currentCapacity;
+    }
+
+    T& operator[](uint64_t i) {
+      return buf[i];
+    }
+
+    void reserve(uint64_t _size);
+    void resize(uint64_t _size);
+  };
+
+  // Specializations for char
+
+  template <>
+  DataBuffer<char>::~DataBuffer();
+
+  template <>
+  void DataBuffer<char>::resize(uint64_t newSize);
+
+  // Specializations for char*
+
+  template <>
+  DataBuffer<char*>::~DataBuffer();
+
+  template <>
+  void DataBuffer<char*>::resize(uint64_t newSize);
+
+  // Specializations for double
+
+  template <>
+  DataBuffer<double>::~DataBuffer();
+
+  template <>
+  void DataBuffer<double>::resize(uint64_t newSize);
+
+  // Specializations for int64_t
+
+  template <>
+  DataBuffer<int64_t>::~DataBuffer();
+
+  template <>
+  void DataBuffer<int64_t>::resize(uint64_t newSize);
+
+  // Specializations for uint64_t
+
+  template <>
+  DataBuffer<uint64_t>::~DataBuffer();
+
+  template <>
+  void DataBuffer<uint64_t>::resize(uint64_t newSize);
+
+  // Specializations for unsigned char
+
+  template <>
+  DataBuffer<unsigned char>::~DataBuffer();
+
+  template <>
+  void DataBuffer<unsigned char>::resize(uint64_t newSize);
+
+  #ifdef __clang__
+    #pragma clang diagnostic push
+    #pragma clang diagnostic ignored "-Wweak-template-vtables"
+  #endif
+
+  extern template class DataBuffer<char>;
+  extern template class DataBuffer<char*>;
+  extern template class DataBuffer<double>;
+  extern template class DataBuffer<Int128>;
+  extern template class DataBuffer<int64_t>;
+  extern template class DataBuffer<uint64_t>;
+  extern template class DataBuffer<unsigned char>;
+
+  #ifdef __clang__
+    #pragma clang diagnostic pop
+  #endif
+} // namespace orc
+
+
+#endif /* MEMORYPOOL_HH_ */
diff --git a/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh b/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh
index 541d725bfc..c64853168a 100644
--- a/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh
@@ -1,148 +1,148 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_FILE_HH 
-#define ORC_FILE_HH 
- 
-#include <string> 
- 
-#include "orc/orc-config.hh" 
-#include "orc/Reader.hh" 
-#include "orc/Writer.hh" 
- 
-/** /file orc/OrcFile.hh 
-    @brief The top level interface to ORC. 
-*/ 
- 
-namespace orc { 
- 
-  /** 
-   * An abstract interface for providing ORC readers a stream of bytes. 
-   */ 
-  class InputStream { 
-  public: 
-    virtual ~InputStream(); 
- 
-    /** 
-     * Get the total length of the file in bytes. 
-     */ 
-    virtual uint64_t getLength() const = 0; 
- 
-    /** 
-     * Get the natural size for reads. 
-     * @return the number of bytes that should be read at once 
-     */ 
-    virtual uint64_t getNaturalReadSize() const = 0; 
- 
-    /** 
-     * Read length bytes from the file starting at offset into 
-     * the buffer starting at buf. 
-     * @param buf the starting position of a buffer. 
-     * @param length the number of bytes to read. 
-     * @param offset the position in the stream to read from. 
-     */ 
-    virtual void read(void* buf, 
-                      uint64_t length, 
-                      uint64_t offset) = 0; 
- 
-    /** 
-     * Get the name of the stream for error messages. 
-     */ 
-    virtual const std::string& getName() const = 0; 
-  }; 
- 
-  /** 
-   * An abstract interface for providing ORC writer a stream of bytes. 
-   */ 
-  class OutputStream { 
-  public: 
-    virtual ~OutputStream(); 
- 
-    /** 
-     * Get the total length of bytes written. 
-     */ 
-    virtual uint64_t getLength() const = 0; 
- 
-    /** 
-     * Get the natural size for reads. 
-     * @return the number of bytes that should be written at once 
-     */ 
-    virtual uint64_t getNaturalWriteSize() const = 0; 
- 
-    /** 
-     * Write/Append length bytes pointed by buf to the file stream 
-     * @param buf the starting position of a buffer. 
-     * @param length the number of bytes to write. 
-     */ 
-    virtual void write(const void* buf, size_t length) = 0; 
- 
-    /** 
-     * Get the name of the stream for error messages. 
-     */ 
-    virtual const std::string& getName() const = 0; 
- 
-    /** 
-     * Close the stream and flush any pending data to the disk. 
-     */ 
-    virtual void close() = 0; 
-  }; 
- 
-  /** 
-   * Create a stream to a local file or HDFS file if path begins with "hdfs://" 
-   * @param path the name of the file in the local file system or HDFS 
-   */ 
-  ORC_UNIQUE_PTR<InputStream> readFile(const std::string& path); 
- 
-  /** 
-   * Create a stream to a local file. 
-   * @param path the name of the file in the local file system 
-   */ 
-  ORC_UNIQUE_PTR<InputStream> readLocalFile(const std::string& path); 
- 
-  /** 
-   * Create a stream to an HDFS file. 
-   * @param path the uri of the file in HDFS 
-   */ 
-  ORC_UNIQUE_PTR<InputStream> readHdfsFile(const std::string& path); 
- 
-  /** 
-   * Create a reader to read the ORC file. 
-   * @param stream the stream to read 
-   * @param options the options for reading the file 
-   */ 
-  ORC_UNIQUE_PTR<Reader> createReader(ORC_UNIQUE_PTR<InputStream> stream, 
-                                      const ReaderOptions& options); 
-  /** 
-   * Create a stream to write to a local file. 
-   * @param path the name of the file in the local file system 
-   */ 
-  ORC_UNIQUE_PTR<OutputStream> writeLocalFile(const std::string& path); 
- 
-  /** 
-   * Create a writer to write the ORC file. 
-   * @param type the type of data to be written 
-   * @param stream the stream to write to 
-   * @param options the options for writing the file 
-   */ 
-  ORC_UNIQUE_PTR<Writer> createWriter( 
-                                      const Type& type, 
-                                      OutputStream* stream, 
-                                      const WriterOptions& options); 
-} 
- 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_FILE_HH
+#define ORC_FILE_HH
+
+#include <string>
+
+#include "orc/orc-config.hh"
+#include "orc/Reader.hh"
+#include "orc/Writer.hh"
+
+/** /file orc/OrcFile.hh
+    @brief The top level interface to ORC.
+*/
+
+namespace orc {
+
+  /**
+   * An abstract interface for providing ORC readers a stream of bytes.
+   */
+  class InputStream {
+  public:
+    virtual ~InputStream();
+
+    /**
+     * Get the total length of the file in bytes.
+     */
+    virtual uint64_t getLength() const = 0;
+
+    /**
+     * Get the natural size for reads.
+     * @return the number of bytes that should be read at once
+     */
+    virtual uint64_t getNaturalReadSize() const = 0;
+
+    /**
+     * Read length bytes from the file starting at offset into
+     * the buffer starting at buf.
+     * @param buf the starting position of a buffer.
+     * @param length the number of bytes to read.
+     * @param offset the position in the stream to read from.
+     */
+    virtual void read(void* buf,
+                      uint64_t length,
+                      uint64_t offset) = 0;
+
+    /**
+     * Get the name of the stream for error messages.
+     */
+    virtual const std::string& getName() const = 0;
+  };
+
+  /**
+   * An abstract interface for providing ORC writer a stream of bytes.
+   */
+  class OutputStream {
+  public:
+    virtual ~OutputStream();
+
+    /**
+     * Get the total length of bytes written.
+     */
+    virtual uint64_t getLength() const = 0;
+
+    /**
+     * Get the natural size for reads.
+     * @return the number of bytes that should be written at once
+     */
+    virtual uint64_t getNaturalWriteSize() const = 0;
+
+    /**
+     * Write/Append length bytes pointed by buf to the file stream
+     * @param buf the starting position of a buffer.
+     * @param length the number of bytes to write.
+     */
+    virtual void write(const void* buf, size_t length) = 0;
+
+    /**
+     * Get the name of the stream for error messages.
+     */
+    virtual const std::string& getName() const = 0;
+
+    /**
+     * Close the stream and flush any pending data to the disk.
+     */
+    virtual void close() = 0;
+  };
+
+  /**
+   * Create a stream to a local file or HDFS file if path begins with "hdfs://"
+   * @param path the name of the file in the local file system or HDFS
+   */
+  ORC_UNIQUE_PTR<InputStream> readFile(const std::string& path);
+
+  /**
+   * Create a stream to a local file.
+   * @param path the name of the file in the local file system
+   */
+  ORC_UNIQUE_PTR<InputStream> readLocalFile(const std::string& path);
+
+  /**
+   * Create a stream to an HDFS file.
+   * @param path the uri of the file in HDFS
+   */
+  ORC_UNIQUE_PTR<InputStream> readHdfsFile(const std::string& path);
+
+  /**
+   * Create a reader to read the ORC file.
+   * @param stream the stream to read
+   * @param options the options for reading the file
+   */
+  ORC_UNIQUE_PTR<Reader> createReader(ORC_UNIQUE_PTR<InputStream> stream,
+                                      const ReaderOptions& options);
+  /**
+   * Create a stream to write to a local file.
+   * @param path the name of the file in the local file system
+   */
+  ORC_UNIQUE_PTR<OutputStream> writeLocalFile(const std::string& path);
+
+  /**
+   * Create a writer to write the ORC file.
+   * @param type the type of data to be written
+   * @param stream the stream to write to
+   * @param options the options for writing the file
+   */
+  ORC_UNIQUE_PTR<Writer> createWriter(
+                                      const Type& type,
+                                      OutputStream* stream,
+                                      const WriterOptions& options);
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Reader.hh b/contrib/libs/apache/orc/c++/include/orc/Reader.hh
index 55c95557fc..5d9a532c11 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Reader.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Reader.hh
@@ -1,550 +1,550 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_READER_HH 
-#define ORC_READER_HH 
- 
-#include "orc/BloomFilter.hh" 
-#include "orc/Common.hh" 
-#include "orc/orc-config.hh" 
-#include "orc/Statistics.hh" 
-#include "orc/Type.hh" 
-#include "orc/Vector.hh" 
- 
-#include <map> 
-#include <memory> 
-#include <set> 
-#include <string> 
-#include <vector> 
- 
-namespace orc { 
- 
-  // classes that hold data members so we can maintain binary compatibility 
-  struct ReaderOptionsPrivate; 
-  struct RowReaderOptionsPrivate; 
- 
-  /** 
-   * Options for creating a Reader. 
-   */ 
-  class ReaderOptions { 
-  private: 
-    ORC_UNIQUE_PTR<ReaderOptionsPrivate> privateBits; 
- 
-  public: 
-    ReaderOptions(); 
-    ReaderOptions(const ReaderOptions&); 
-    ReaderOptions(ReaderOptions&); 
-    ReaderOptions& operator=(const ReaderOptions&); 
-    virtual ~ReaderOptions(); 
- 
-    /** 
-     * Set the stream to use for printing warning or error messages. 
-     */ 
-    ReaderOptions& setErrorStream(std::ostream& stream); 
- 
-    /** 
-     * Set a serialized copy of the file tail to be used when opening the file. 
-     * 
-     * When one process opens the file and other processes need to read 
-     * the rows, we want to enable clients to just read the tail once. 
-     * By passing the string returned by Reader.getSerializedFileTail(), to 
-     * this function, the second reader will not need to read the file tail 
-     * from disk. 
-     * 
-     * @param serialization the bytes of the serialized tail to use 
-     */ 
-    ReaderOptions& setSerializedFileTail(const std::string& serialization); 
- 
-    /** 
-     * Set the memory allocator. 
-     */ 
-    ReaderOptions& setMemoryPool(MemoryPool& pool); 
- 
-    /** 
-     * Set the location of the tail as defined by the logical length of the 
-     * file. 
-     */ 
-    ReaderOptions& setTailLocation(uint64_t offset); 
- 
-    /** 
-     * Get the stream to write warnings or errors to. 
-     */ 
-    std::ostream* getErrorStream() const; 
- 
-    /** 
-     * Get the serialized file tail that the user passed in. 
-     */ 
-    std::string getSerializedFileTail() const; 
- 
-    /** 
-     * Get the desired tail location. 
-     * @return if not set, return the maximum long. 
-     */ 
-    uint64_t getTailLocation() const; 
- 
-    /** 
-     * Get the memory allocator. 
-     */ 
-    MemoryPool* getMemoryPool() const; 
-  }; 
- 
-  /** 
-   * Options for creating a RowReader. 
-   */ 
-  class RowReaderOptions { 
-  private: 
-    ORC_UNIQUE_PTR<RowReaderOptionsPrivate> privateBits; 
- 
-  public: 
-    RowReaderOptions(); 
-    RowReaderOptions(const RowReaderOptions&); 
-    RowReaderOptions(RowReaderOptions&); 
-    RowReaderOptions& operator=(const RowReaderOptions&); 
-    virtual ~RowReaderOptions(); 
- 
-    /** 
-     * For files that have structs as the top-level object, select the fields 
-     * to read. The first field is 0, the second 1, and so on. By default, 
-     * all columns are read. This option clears any previous setting of 
-     * the selected columns. 
-     * @param include a list of fields to read 
-     * @return this 
-     */ 
-    RowReaderOptions& include(const std::list<uint64_t>& include); 
- 
-    /** 
-     * For files that have structs as the top-level object, select the fields 
-     * to read by name. By default, all columns are read. This option clears 
-     * any previous setting of the selected columns. 
-     * @param include a list of fields to read 
-     * @return this 
-     */ 
-    RowReaderOptions& include(const std::list<std::string>& include); 
- 
-    /** 
-     * Selects which type ids to read. The root type is always 0 and the 
-     * rest of the types are labeled in a preorder traversal of the tree. 
-     * The parent types are automatically selected, but the children are not. 
-     * 
-     * This option clears any previous setting of the selected columns or 
-     * types. 
-     * @param types a list of the type ids to read 
-     * @return this 
-     */ 
-    RowReaderOptions& includeTypes(const std::list<uint64_t>& types); 
- 
-    /** 
-     * Set the section of the file to process. 
-     * @param offset the starting byte offset 
-     * @param length the number of bytes to read 
-     * @return this 
-     */ 
-    RowReaderOptions& range(uint64_t offset, uint64_t length); 
- 
-    /** 
-     * For Hive 0.11 (and 0.12) decimals, the precision was unlimited 
-     * and thus may overflow the 38 digits that is supported. If one 
-     * of the Hive 0.11 decimals is too large, the reader may either convert 
-     * the value to NULL or throw an exception. That choice is controlled 
-     * by this setting. 
-     * 
-     * Defaults to true. 
-     * 
-     * @param shouldThrow should the reader throw a ParseError? 
-     * @return returns *this 
-     */ 
-    RowReaderOptions& throwOnHive11DecimalOverflow(bool shouldThrow); 
- 
-    /** 
-     * For Hive 0.11 (and 0.12) written decimals, which have unlimited 
-     * scale and precision, the reader forces the scale to a consistent 
-     * number that is configured. This setting changes the scale that is 
-     * forced upon these old decimals. See also throwOnHive11DecimalOverflow. 
-     * 
-     * Defaults to 6. 
-     * 
-     * @param forcedScale the scale that will be forced on Hive 0.11 decimals 
-     * @return returns *this 
-     */ 
-    RowReaderOptions& forcedScaleOnHive11Decimal(int32_t forcedScale); 
- 
-    /** 
-     * Set enable encoding block mode. 
-     * By enable encoding block mode, Row Reader will not decode 
-     * dictionary encoded string vector, but instead return an index array with 
-     * reference to corresponding dictionary. 
-     */ 
-    RowReaderOptions& setEnableLazyDecoding(bool enable); 
- 
-    /** 
-     * Should enable encoding block mode 
-     */ 
-    bool getEnableLazyDecoding() const; 
- 
-    /** 
-     * Were the field ids set? 
-     */ 
-    bool getIndexesSet() const; 
- 
-    /** 
-     * Were the type ids set? 
-     */ 
-    bool getTypeIdsSet() const; 
- 
-    /** 
-     * Get the list of selected field or type ids to read. 
-     */ 
-    const std::list<uint64_t>& getInclude() const; 
- 
-    /** 
-     * Were the include names set? 
-     */ 
-    bool getNamesSet() const; 
- 
-    /** 
-     * Get the list of selected columns to read. All children of the selected 
-     * columns are also selected. 
-     */ 
-    const std::list<std::string>& getIncludeNames() const; 
- 
-    /** 
-     * Get the start of the range for the data being processed. 
-     * @return if not set, return 0 
-     */ 
-    uint64_t getOffset() const; 
- 
-    /** 
-     * Get the end of the range for the data being processed. 
-     * @return if not set, return the maximum long 
-     */ 
-    uint64_t getLength() const; 
- 
-    /** 
-     * Should the reader throw a ParseError when a Hive 0.11 decimal is 
-     * larger than the supported 38 digits of precision? Otherwise, the 
-     * data item is replaced by a NULL. 
-     */ 
-    bool getThrowOnHive11DecimalOverflow() const; 
- 
-    /** 
-     * What scale should all Hive 0.11 decimals be normalized to? 
-     */ 
-    int32_t getForcedScaleOnHive11Decimal() const; 
-  }; 
- 
- 
-  class RowReader; 
- 
-  /** 
-   * The interface for reading ORC file meta-data and constructing RowReaders. 
-   * This is an an abstract class that will be subclassed as necessary. 
-   */ 
-  class Reader { 
-  public: 
-    virtual ~Reader(); 
- 
-    /** 
-     * Get the format version of the file. Currently known values are: 
-     * 0.11 and 0.12 
-     * @return the FileVersion object 
-     */ 
-    virtual FileVersion getFormatVersion() const = 0; 
- 
-    /** 
-     * Get the number of rows in the file. 
-     * @return the number of rows 
-     */ 
-    virtual uint64_t getNumberOfRows() const = 0; 
- 
-    /** 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_READER_HH
+#define ORC_READER_HH
+
+#include "orc/BloomFilter.hh"
+#include "orc/Common.hh"
+#include "orc/orc-config.hh"
+#include "orc/Statistics.hh"
+#include "orc/Type.hh"
+#include "orc/Vector.hh"
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace orc {
+
+  // classes that hold data members so we can maintain binary compatibility
+  struct ReaderOptionsPrivate;
+  struct RowReaderOptionsPrivate;
+
+  /**
+   * Options for creating a Reader.
+   */
+  class ReaderOptions {
+  private:
+    ORC_UNIQUE_PTR<ReaderOptionsPrivate> privateBits;
+
+  public:
+    ReaderOptions();
+    ReaderOptions(const ReaderOptions&);
+    ReaderOptions(ReaderOptions&);
+    ReaderOptions& operator=(const ReaderOptions&);
+    virtual ~ReaderOptions();
+
+    /**
+     * Set the stream to use for printing warning or error messages.
+     */
+    ReaderOptions& setErrorStream(std::ostream& stream);
+
+    /**
+     * Set a serialized copy of the file tail to be used when opening the file.
+     *
+     * When one process opens the file and other processes need to read
+     * the rows, we want to enable clients to just read the tail once.
+     * By passing the string returned by Reader.getSerializedFileTail(), to
+     * this function, the second reader will not need to read the file tail
+     * from disk.
+     *
+     * @param serialization the bytes of the serialized tail to use
+     */
+    ReaderOptions& setSerializedFileTail(const std::string& serialization);
+
+    /**
+     * Set the memory allocator.
+     */
+    ReaderOptions& setMemoryPool(MemoryPool& pool);
+
+    /**
+     * Set the location of the tail as defined by the logical length of the
+     * file.
+     */
+    ReaderOptions& setTailLocation(uint64_t offset);
+
+    /**
+     * Get the stream to write warnings or errors to.
+     */
+    std::ostream* getErrorStream() const;
+
+    /**
+     * Get the serialized file tail that the user passed in.
+     */
+    std::string getSerializedFileTail() const;
+
+    /**
+     * Get the desired tail location.
+     * @return if not set, return the maximum long.
+     */
+    uint64_t getTailLocation() const;
+
+    /**
+     * Get the memory allocator.
+     */
+    MemoryPool* getMemoryPool() const;
+  };
+
+  /**
+   * Options for creating a RowReader.
+   */
+  class RowReaderOptions {
+  private:
+    ORC_UNIQUE_PTR<RowReaderOptionsPrivate> privateBits;
+
+  public:
+    RowReaderOptions();
+    RowReaderOptions(const RowReaderOptions&);
+    RowReaderOptions(RowReaderOptions&);
+    RowReaderOptions& operator=(const RowReaderOptions&);
+    virtual ~RowReaderOptions();
+
+    /**
+     * For files that have structs as the top-level object, select the fields
+     * to read. The first field is 0, the second 1, and so on. By default,
+     * all columns are read. This option clears any previous setting of
+     * the selected columns.
+     * @param include a list of fields to read
+     * @return this
+     */
+    RowReaderOptions& include(const std::list<uint64_t>& include);
+
+    /**
+     * For files that have structs as the top-level object, select the fields
+     * to read by name. By default, all columns are read. This option clears
+     * any previous setting of the selected columns.
+     * @param include a list of fields to read
+     * @return this
+     */
+    RowReaderOptions& include(const std::list<std::string>& include);
+
+    /**
+     * Selects which type ids to read. The root type is always 0 and the
+     * rest of the types are labeled in a preorder traversal of the tree.
+     * The parent types are automatically selected, but the children are not.
+     *
+     * This option clears any previous setting of the selected columns or
+     * types.
+     * @param types a list of the type ids to read
+     * @return this
+     */
+    RowReaderOptions& includeTypes(const std::list<uint64_t>& types);
+
+    /**
+     * Set the section of the file to process.
+     * @param offset the starting byte offset
+     * @param length the number of bytes to read
+     * @return this
+     */
+    RowReaderOptions& range(uint64_t offset, uint64_t length);
+
+    /**
+     * For Hive 0.11 (and 0.12) decimals, the precision was unlimited
+     * and thus may overflow the 38 digits that is supported. If one
+     * of the Hive 0.11 decimals is too large, the reader may either convert
+     * the value to NULL or throw an exception. That choice is controlled
+     * by this setting.
+     *
+     * Defaults to true.
+     *
+     * @param shouldThrow should the reader throw a ParseError?
+     * @return returns *this
+     */
+    RowReaderOptions& throwOnHive11DecimalOverflow(bool shouldThrow);
+
+    /**
+     * For Hive 0.11 (and 0.12) written decimals, which have unlimited
+     * scale and precision, the reader forces the scale to a consistent
+     * number that is configured. This setting changes the scale that is
+     * forced upon these old decimals. See also throwOnHive11DecimalOverflow.
+     *
+     * Defaults to 6.
+     *
+     * @param forcedScale the scale that will be forced on Hive 0.11 decimals
+     * @return returns *this
+     */
+    RowReaderOptions& forcedScaleOnHive11Decimal(int32_t forcedScale);
+
+    /**
+     * Set enable encoding block mode.
+     * By enable encoding block mode, Row Reader will not decode
+     * dictionary encoded string vector, but instead return an index array with
+     * reference to corresponding dictionary.
+     */
+    RowReaderOptions& setEnableLazyDecoding(bool enable);
+
+    /**
+     * Should enable encoding block mode
+     */
+    bool getEnableLazyDecoding() const;
+
+    /**
+     * Were the field ids set?
+     */
+    bool getIndexesSet() const;
+
+    /**
+     * Were the type ids set?
+     */
+    bool getTypeIdsSet() const;
+
+    /**
+     * Get the list of selected field or type ids to read.
+     */
+    const std::list<uint64_t>& getInclude() const;
+
+    /**
+     * Were the include names set?
+     */
+    bool getNamesSet() const;
+
+    /**
+     * Get the list of selected columns to read. All children of the selected
+     * columns are also selected.
+     */
+    const std::list<std::string>& getIncludeNames() const;
+
+    /**
+     * Get the start of the range for the data being processed.
+     * @return if not set, return 0
+     */
+    uint64_t getOffset() const;
+
+    /**
+     * Get the end of the range for the data being processed.
+     * @return if not set, return the maximum long
+     */
+    uint64_t getLength() const;
+
+    /**
+     * Should the reader throw a ParseError when a Hive 0.11 decimal is
+     * larger than the supported 38 digits of precision? Otherwise, the
+     * data item is replaced by a NULL.
+     */
+    bool getThrowOnHive11DecimalOverflow() const;
+
+    /**
+     * What scale should all Hive 0.11 decimals be normalized to?
+     */
+    int32_t getForcedScaleOnHive11Decimal() const;
+  };
+
+
+  class RowReader;
+
+  /**
+   * The interface for reading ORC file meta-data and constructing RowReaders.
+   * This is an an abstract class that will be subclassed as necessary.
+   */
+  class Reader {
+  public:
+    virtual ~Reader();
+
+    /**
+     * Get the format version of the file. Currently known values are:
+     * 0.11 and 0.12
+     * @return the FileVersion object
+     */
+    virtual FileVersion getFormatVersion() const = 0;
+
+    /**
+     * Get the number of rows in the file.
+     * @return the number of rows
+     */
+    virtual uint64_t getNumberOfRows() const = 0;
+
+    /**
      * Get the software instance and version that wrote this file.
      * @return a user-facing string that specifies the software version
      */
     virtual std::string getSoftwareVersion() const = 0;
 
     /**
-     * Get the user metadata keys. 
-     * @return the set of user metadata keys 
-     */ 
-    virtual std::list<std::string> getMetadataKeys() const = 0; 
- 
-    /** 
-     * Get a user metadata value. 
-     * @param key a key given by the user 
-     * @return the bytes associated with the given key 
-     */ 
-    virtual std::string getMetadataValue(const std::string& key) const = 0; 
- 
-    /** 
-     * Did the user set the given metadata value. 
-     * @param key the key to check 
-     * @return true if the metadata value was set 
-     */ 
-    virtual bool hasMetadataValue(const std::string& key) const = 0; 
- 
-    /** 
-     * Get the compression kind. 
-     * @return the kind of compression in the file 
-     */ 
-    virtual CompressionKind getCompression() const = 0; 
- 
-    /** 
-     * Get the buffer size for the compression. 
-     * @return number of bytes to buffer for the compression codec. 
-     */ 
-    virtual uint64_t getCompressionSize() const = 0; 
- 
-    /** 
-     * Get ID of writer that generated the file. 
-     * @return UNKNOWN_WRITER if the writer ID is undefined 
-     */ 
-    virtual WriterId getWriterId() const = 0; 
- 
-    /** 
-     * Get the writer id value when getWriterId() returns an unknown writer. 
-     * @return the integer value of the writer ID. 
-     */ 
-    virtual uint32_t getWriterIdValue() const = 0; 
- 
-    /** 
-     * Get the version of the writer. 
-     * @return the version of the writer. 
-     */ 
-    virtual WriterVersion getWriterVersion() const = 0; 
- 
-    /** 
-     * Get the number of rows per an entry in the row index. 
-     * @return the number of rows per an entry in the row index or 0 if there 
-     * is no row index. 
-     */ 
-    virtual uint64_t getRowIndexStride() const = 0; 
- 
-    /** 
-     * Get the number of stripes in the file. 
-     * @return the number of stripes 
-     */ 
-    virtual uint64_t getNumberOfStripes() const = 0; 
- 
-    /** 
-     * Get the information about a stripe. 
-     * @param stripeIndex the index of the stripe (0 to N-1) to get information about 
-     * @return the information about that stripe 
-     */ 
-    virtual ORC_UNIQUE_PTR<StripeInformation> 
-    getStripe(uint64_t stripeIndex) const = 0; 
- 
-    /** 
-     * Get the number of stripe statistics in the file. 
-     * @return the number of stripe statistics 
-     */ 
-    virtual uint64_t getNumberOfStripeStatistics() const = 0; 
- 
-    /** 
-     * Get the statistics about a stripe. 
-     * @param stripeIndex the index of the stripe (0 to N-1) to get statistics about 
-     * @return the statistics about that stripe 
-     */ 
-    virtual ORC_UNIQUE_PTR<StripeStatistics> 
-    getStripeStatistics(uint64_t stripeIndex) const = 0; 
- 
-    /** 
-     * Get the length of the data stripes in the file. 
-     * @return the number of bytes in stripes 
-     */ 
-    virtual uint64_t getContentLength() const = 0; 
- 
-    /** 
-     * Get the length of the file stripe statistics. 
-     * @return the number of compressed bytes in the file stripe statistics 
-     */ 
-    virtual uint64_t getStripeStatisticsLength() const = 0; 
- 
-    /** 
-     * Get the length of the file footer. 
-     * @return the number of compressed bytes in the file footer 
-     */ 
-    virtual uint64_t getFileFooterLength() const = 0; 
- 
-    /** 
-     * Get the length of the file postscript. 
-     * @return the number of bytes in the file postscript 
-     */ 
-    virtual uint64_t getFilePostscriptLength() const = 0; 
- 
-    /** 
-     * Get the total length of the file. 
-     * @return the number of bytes in the file 
-     */ 
-    virtual uint64_t getFileLength() const = 0; 
- 
-    /** 
-     * Get the statistics about the columns in the file. 
-     * @return the information about the column 
-     */ 
-    virtual ORC_UNIQUE_PTR<Statistics> getStatistics() const = 0; 
- 
-    /** 
-     * Get the statistics about a single column in the file. 
-     * @param columnId id of the column 
-     * @return the information about the column 
-     */ 
-    virtual ORC_UNIQUE_PTR<ColumnStatistics> 
-    getColumnStatistics(uint32_t columnId) const = 0; 
- 
-    /** 
-     * Check if the file has correct column statistics. 
-     */ 
-    virtual bool hasCorrectStatistics() const = 0; 
- 
-    /** 
-     * Get the serialized file tail. 
-     * Usefull if another reader of the same file wants to avoid re-reading 
-     * the file tail. See ReaderOptions.setSerializedFileTail(). 
-     * @return a string of bytes with the file tail 
-     */ 
-    virtual std::string getSerializedFileTail() const = 0; 
- 
-    /** 
-     * Get the type of the rows in the file. The top level is typically a 
-     * struct. 
-     * @return the root type 
-     */ 
-    virtual const Type& getType() const = 0; 
- 
-    /** 
-     * Create a RowReader based on this reader with the default options. 
-     * @return a RowReader to read the rows 
-     */ 
-    virtual ORC_UNIQUE_PTR<RowReader> createRowReader() const = 0; 
- 
-    /** 
-     * Create a RowReader based on this reader. 
-     * @param options RowReader Options 
-     * @return a RowReader to read the rows 
-     */ 
-    virtual ORC_UNIQUE_PTR<RowReader> createRowReader(const RowReaderOptions& options) const = 0; 
- 
-    /** 
-     * Get the name of the input stream. 
-     */ 
-    virtual const std::string& getStreamName() const = 0; 
- 
-    /** 
-     * Estimate an upper bound on heap memory allocation by the Reader 
-     * based on the information in the file footer. 
-     * The bound is less tight if only few columns are read or compression is 
-     * used. 
-    */ 
-    /** 
-     * @param stripeIx index of the stripe to be read (if not specified, 
-     *        all stripes are considered). 
-     * @return upper bound on memory use by all columns 
-     */ 
-    virtual uint64_t getMemoryUse(int stripeIx=-1) = 0; 
- 
-    /** 
-     * @param include Column Field Ids 
-     * @param stripeIx index of the stripe to be read (if not specified, 
-     *        all stripes are considered). 
-     * @return upper bound on memory use by selected columns 
-     */ 
-    virtual uint64_t getMemoryUseByFieldId(const std::list<uint64_t>& include, int stripeIx=-1) = 0; 
- 
-    /** 
-     * @param names Column Names 
-     * @param stripeIx index of the stripe to be read (if not specified, 
-     *        all stripes are considered). 
-     * @return upper bound on memory use by selected columns 
-     */ 
-    virtual uint64_t getMemoryUseByName(const std::list<std::string>& names, int stripeIx=-1) = 0; 
- 
-    /** 
-     * @param include Column Type Ids 
-     * @param stripeIx index of the stripe to be read (if not specified, 
-     *        all stripes are considered). 
-     * @return upper bound on memory use by selected columns 
-     */ 
-    virtual uint64_t getMemoryUseByTypeId(const std::list<uint64_t>& include, int stripeIx=-1) = 0; 
- 
-    /** 
-     * Get BloomFiters of all selected columns in the specified stripe 
-     * @param stripeIndex index of the stripe to be read for bloom filters. 
-     * @param included index of selected columns to return (if not specified, 
-     *        all columns that have bloom filters are considered). 
-     * @return map of bloom filters with the key standing for the index of column. 
-     */ 
-    virtual std::map<uint32_t, BloomFilterIndex> 
-    getBloomFilters(uint32_t stripeIndex, const std::set<uint32_t>& included) const = 0; 
-  }; 
- 
-  /** 
-   * The interface for reading rows in ORC files. 
-   * This is an an abstract class that will be subclassed as necessary. 
-   */ 
-  class RowReader { 
-  public: 
-    virtual ~RowReader(); 
-    /** 
-     * Get the selected type of the rows in the file. The file's row type 
-     * is projected down to just the selected columns. Thus, if the file's 
-     * type is struct<col0:int,col1:double,col2:string> and the selected 
-     * columns are "col0,col2" the selected type would be 
-     * struct<col0:int,col2:string>. 
-     * @return the root type 
-     */ 
-    virtual const Type& getSelectedType() const = 0; 
- 
-    /** 
-     * Get the selected columns of the file. 
-     */ 
-    virtual const std::vector<bool> getSelectedColumns() const = 0; 
- 
-    /** 
-     * Create a row batch for reading the selected columns of this file. 
-     * @param size the number of rows to read 
-     * @return a new ColumnVectorBatch to read into 
-     */ 
-    virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size 
-                                                             ) const = 0; 
- 
-    /** 
-     * Read the next row batch from the current position. 
-     * Caller must look at numElements in the row batch to determine how 
-     * many rows were read. 
-     * @param data the row batch to read into. 
-     * @return true if a non-zero number of rows were read or false if the 
-     *   end of the file was reached. 
-     */ 
-    virtual bool next(ColumnVectorBatch& data) = 0; 
- 
-    /** 
-     * Get the row number of the first row in the previously read batch. 
-     * @return the row number of the previous batch. 
-     */ 
-    virtual uint64_t getRowNumber() const = 0; 
- 
-    /** 
-     * Seek to a given row. 
-     * @param rowNumber the next row the reader should return 
-     */ 
-    virtual void seekToRow(uint64_t rowNumber) = 0; 
- 
-  }; 
-} 
- 
-#endif 
+     * Get the user metadata keys.
+     * @return the set of user metadata keys
+     */
+    virtual std::list<std::string> getMetadataKeys() const = 0;
+
+    /**
+     * Get a user metadata value.
+     * @param key a key given by the user
+     * @return the bytes associated with the given key
+     */
+    virtual std::string getMetadataValue(const std::string& key) const = 0;
+
+    /**
+     * Did the user set the given metadata value.
+     * @param key the key to check
+     * @return true if the metadata value was set
+     */
+    virtual bool hasMetadataValue(const std::string& key) const = 0;
+
+    /**
+     * Get the compression kind.
+     * @return the kind of compression in the file
+     */
+    virtual CompressionKind getCompression() const = 0;
+
+    /**
+     * Get the buffer size for the compression.
+     * @return number of bytes to buffer for the compression codec.
+     */
+    virtual uint64_t getCompressionSize() const = 0;
+
+    /**
+     * Get ID of writer that generated the file.
+     * @return UNKNOWN_WRITER if the writer ID is undefined
+     */
+    virtual WriterId getWriterId() const = 0;
+
+    /**
+     * Get the writer id value when getWriterId() returns an unknown writer.
+     * @return the integer value of the writer ID.
+     */
+    virtual uint32_t getWriterIdValue() const = 0;
+
+    /**
+     * Get the version of the writer.
+     * @return the version of the writer.
+     */
+    virtual WriterVersion getWriterVersion() const = 0;
+
+    /**
+     * Get the number of rows per an entry in the row index.
+     * @return the number of rows per an entry in the row index or 0 if there
+     * is no row index.
+     */
+    virtual uint64_t getRowIndexStride() const = 0;
+
+    /**
+     * Get the number of stripes in the file.
+     * @return the number of stripes
+     */
+    virtual uint64_t getNumberOfStripes() const = 0;
+
+    /**
+     * Get the information about a stripe.
+     * @param stripeIndex the index of the stripe (0 to N-1) to get information about
+     * @return the information about that stripe
+     */
+    virtual ORC_UNIQUE_PTR<StripeInformation>
+    getStripe(uint64_t stripeIndex) const = 0;
+
+    /**
+     * Get the number of stripe statistics in the file.
+     * @return the number of stripe statistics
+     */
+    virtual uint64_t getNumberOfStripeStatistics() const = 0;
+
+    /**
+     * Get the statistics about a stripe.
+     * @param stripeIndex the index of the stripe (0 to N-1) to get statistics about
+     * @return the statistics about that stripe
+     */
+    virtual ORC_UNIQUE_PTR<StripeStatistics>
+    getStripeStatistics(uint64_t stripeIndex) const = 0;
+
+    /**
+     * Get the length of the data stripes in the file.
+     * @return the number of bytes in stripes
+     */
+    virtual uint64_t getContentLength() const = 0;
+
+    /**
+     * Get the length of the file stripe statistics.
+     * @return the number of compressed bytes in the file stripe statistics
+     */
+    virtual uint64_t getStripeStatisticsLength() const = 0;
+
+    /**
+     * Get the length of the file footer.
+     * @return the number of compressed bytes in the file footer
+     */
+    virtual uint64_t getFileFooterLength() const = 0;
+
+    /**
+     * Get the length of the file postscript.
+     * @return the number of bytes in the file postscript
+     */
+    virtual uint64_t getFilePostscriptLength() const = 0;
+
+    /**
+     * Get the total length of the file.
+     * @return the number of bytes in the file
+     */
+    virtual uint64_t getFileLength() const = 0;
+
+    /**
+     * Get the statistics about the columns in the file.
+     * @return the information about the column
+     */
+    virtual ORC_UNIQUE_PTR<Statistics> getStatistics() const = 0;
+
+    /**
+     * Get the statistics about a single column in the file.
+     * @param columnId id of the column
+     * @return the information about the column
+     */
+    virtual ORC_UNIQUE_PTR<ColumnStatistics>
+    getColumnStatistics(uint32_t columnId) const = 0;
+
+    /**
+     * Check if the file has correct column statistics.
+     */
+    virtual bool hasCorrectStatistics() const = 0;
+
+    /**
+     * Get the serialized file tail.
+     * Usefull if another reader of the same file wants to avoid re-reading
+     * the file tail. See ReaderOptions.setSerializedFileTail().
+     * @return a string of bytes with the file tail
+     */
+    virtual std::string getSerializedFileTail() const = 0;
+
+    /**
+     * Get the type of the rows in the file. The top level is typically a
+     * struct.
+     * @return the root type
+     */
+    virtual const Type& getType() const = 0;
+
+    /**
+     * Create a RowReader based on this reader with the default options.
+     * @return a RowReader to read the rows
+     */
+    virtual ORC_UNIQUE_PTR<RowReader> createRowReader() const = 0;
+
+    /**
+     * Create a RowReader based on this reader.
+     * @param options RowReader Options
+     * @return a RowReader to read the rows
+     */
+    virtual ORC_UNIQUE_PTR<RowReader> createRowReader(const RowReaderOptions& options) const = 0;
+
+    /**
+     * Get the name of the input stream.
+     */
+    virtual const std::string& getStreamName() const = 0;
+
+    /**
+     * Estimate an upper bound on heap memory allocation by the Reader
+     * based on the information in the file footer.
+     * The bound is less tight if only few columns are read or compression is
+     * used.
+    */
+    /**
+     * @param stripeIx index of the stripe to be read (if not specified,
+     *        all stripes are considered).
+     * @return upper bound on memory use by all columns
+     */
+    virtual uint64_t getMemoryUse(int stripeIx=-1) = 0;
+
+    /**
+     * @param include Column Field Ids
+     * @param stripeIx index of the stripe to be read (if not specified,
+     *        all stripes are considered).
+     * @return upper bound on memory use by selected columns
+     */
+    virtual uint64_t getMemoryUseByFieldId(const std::list<uint64_t>& include, int stripeIx=-1) = 0;
+
+    /**
+     * @param names Column Names
+     * @param stripeIx index of the stripe to be read (if not specified,
+     *        all stripes are considered).
+     * @return upper bound on memory use by selected columns
+     */
+    virtual uint64_t getMemoryUseByName(const std::list<std::string>& names, int stripeIx=-1) = 0;
+
+    /**
+     * @param include Column Type Ids
+     * @param stripeIx index of the stripe to be read (if not specified,
+     *        all stripes are considered).
+     * @return upper bound on memory use by selected columns
+     */
+    virtual uint64_t getMemoryUseByTypeId(const std::list<uint64_t>& include, int stripeIx=-1) = 0;
+
+    /**
+     * Get BloomFiters of all selected columns in the specified stripe
+     * @param stripeIndex index of the stripe to be read for bloom filters.
+     * @param included index of selected columns to return (if not specified,
+     *        all columns that have bloom filters are considered).
+     * @return map of bloom filters with the key standing for the index of column.
+     */
+    virtual std::map<uint32_t, BloomFilterIndex>
+    getBloomFilters(uint32_t stripeIndex, const std::set<uint32_t>& included) const = 0;
+  };
+
+  /**
+   * The interface for reading rows in ORC files.
+   * This is an an abstract class that will be subclassed as necessary.
+   */
+  class RowReader {
+  public:
+    virtual ~RowReader();
+    /**
+     * Get the selected type of the rows in the file. The file's row type
+     * is projected down to just the selected columns. Thus, if the file's
+     * type is struct<col0:int,col1:double,col2:string> and the selected
+     * columns are "col0,col2" the selected type would be
+     * struct<col0:int,col2:string>.
+     * @return the root type
+     */
+    virtual const Type& getSelectedType() const = 0;
+
+    /**
+     * Get the selected columns of the file.
+     */
+    virtual const std::vector<bool> getSelectedColumns() const = 0;
+
+    /**
+     * Create a row batch for reading the selected columns of this file.
+     * @param size the number of rows to read
+     * @return a new ColumnVectorBatch to read into
+     */
+    virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size
+                                                             ) const = 0;
+
+    /**
+     * Read the next row batch from the current position.
+     * Caller must look at numElements in the row batch to determine how
+     * many rows were read.
+     * @param data the row batch to read into.
+     * @return true if a non-zero number of rows were read or false if the
+     *   end of the file was reached.
+     */
+    virtual bool next(ColumnVectorBatch& data) = 0;
+
+    /**
+     * Get the row number of the first row in the previously read batch.
+     * @return the row number of the previous batch.
+     */
+    virtual uint64_t getRowNumber() const = 0;
+
+    /**
+     * Seek to a given row.
+     * @param rowNumber the next row the reader should return
+     */
+    virtual void seekToRow(uint64_t rowNumber) = 0;
+
+  };
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Statistics.hh b/contrib/libs/apache/orc/c++/include/orc/Statistics.hh
index c7da63a542..1d4b0b6558 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Statistics.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Statistics.hh
@@ -1,400 +1,400 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_STATISTICS_HH 
-#define ORC_STATISTICS_HH 
- 
-#include "orc/orc-config.hh" 
-#include "orc/Type.hh" 
-#include "orc/Vector.hh" 
- 
-namespace orc { 
- 
-  /** 
-   * Statistics that are available for all types of columns. 
-   */ 
-  class ColumnStatistics { 
-  public: 
-    virtual ~ColumnStatistics(); 
- 
-    /** 
-     * Get the number of values in this column. It will differ from the number 
-     * of rows because of NULL values. 
-     * @return the number of values 
-     */ 
-    virtual uint64_t getNumberOfValues() const = 0; 
- 
-    /** 
-     * Check whether column has null value. 
-     * @return true if has null value 
-     */ 
-    virtual bool hasNull() const = 0; 
- 
-    /** 
-     * Print out statistics of column if any. 
-     */ 
-    virtual std::string toString() const = 0; 
-  }; 
- 
-  /** 
-   * Statistics for binary columns. 
-   */ 
-  class BinaryColumnStatistics: public ColumnStatistics { 
-  public: 
-    virtual ~BinaryColumnStatistics(); 
- 
-    /** 
-     * Check whether column has total length. 
-     * @return true if has total length 
-     */ 
-    virtual bool hasTotalLength() const = 0; 
- 
-    virtual uint64_t getTotalLength() const = 0; 
-  }; 
- 
-  /** 
-   * Statistics for boolean columns. 
-   */ 
-  class BooleanColumnStatistics: public ColumnStatistics { 
-  public: 
-    virtual ~BooleanColumnStatistics(); 
- 
-    /** 
-     * Check whether column has true/false count. 
-     * @return true if has true/false count 
-     */ 
-    virtual bool hasCount() const = 0; 
- 
-    virtual uint64_t getFalseCount() const = 0; 
-    virtual uint64_t getTrueCount() const = 0; 
-  }; 
- 
-  /** 
-   * Statistics for date columns. 
-   */ 
-  class DateColumnStatistics: public ColumnStatistics { 
-  public: 
-    virtual ~DateColumnStatistics(); 
- 
-    /** 
-     * Check whether column has minimum. 
-     * @return true if has minimum 
-     */ 
-    virtual bool hasMinimum() const = 0; 
- 
-    /** 
-     * Check whether column has maximum. 
-     * @return true if has maximum 
-     */ 
-    virtual bool hasMaximum() const = 0; 
- 
-    /** 
-     * Get the minimum value for the column. 
-     * @return minimum value 
-     */ 
-    virtual int32_t getMinimum() const = 0; 
- 
-    /** 
-     * Get the maximum value for the column. 
-     * @return maximum value 
-     */ 
-    virtual int32_t getMaximum() const = 0; 
-  }; 
- 
-  /** 
-   * Statistics for decimal columns. 
-   */ 
-  class DecimalColumnStatistics: public ColumnStatistics { 
-  public: 
-    virtual ~DecimalColumnStatistics(); 
- 
-    /** 
-     * Check whether column has minimum. 
-     * @return true if has minimum 
-     */ 
-    virtual bool hasMinimum() const = 0; 
- 
-    /** 
-     * Check whether column has maximum. 
-     * @return true if has maximum 
-     */ 
-    virtual bool hasMaximum() const = 0; 
- 
-    /** 
-     * Check whether column has sum. 
-     * @return true if has sum 
-     */ 
-    virtual bool hasSum() const = 0; 
- 
-    /** 
-     * Get the minimum value for the column. 
-     * @return minimum value 
-     */ 
-    virtual Decimal getMinimum() const = 0; 
- 
-    /** 
-     * Get the maximum value for the column. 
-     * @return maximum value 
-     */ 
-    virtual Decimal getMaximum() const = 0; 
- 
-    /** 
-     * Get the sum for the column. 
-     * @return sum of all the values 
-     */ 
-    virtual Decimal getSum() const = 0; 
-  }; 
- 
-  /** 
-   * Statistics for float and double columns. 
-   */ 
-  class DoubleColumnStatistics: public ColumnStatistics { 
-  public: 
-    virtual ~DoubleColumnStatistics(); 
- 
-    /** 
-     * Check whether column has minimum. 
-     * @return true if has minimum 
-     */ 
-    virtual bool hasMinimum() const = 0; 
- 
-    /** 
-     * Check whether column has maximum. 
-     * @return true if has maximum 
-     */ 
-    virtual bool hasMaximum() const = 0; 
- 
-    /** 
-     * Check whether column has sum. 
-     * @return true if has sum 
-     */ 
-    virtual bool hasSum() const = 0; 
- 
-    /** 
-     * Get the smallest value in the column. Only defined if getNumberOfValues 
-     * is non-zero. 
-     * @return the minimum 
-     */ 
-    virtual double getMinimum() const = 0; 
- 
-    /** 
-     * Get the largest value in the column. Only defined if getNumberOfValues 
-     * is non-zero. 
-     * @return the maximum 
-     */ 
-    virtual double getMaximum() const = 0; 
- 
-    /** 
-     * Get the sum of the values in the column. 
-     * @return the sum 
-     */ 
-    virtual double getSum() const = 0; 
-  }; 
- 
-  /** 
-   * Statistics for all of the integer columns, such as byte, short, int, and 
-   * long. 
-   */ 
-  class IntegerColumnStatistics: public ColumnStatistics { 
-  public: 
-    virtual ~IntegerColumnStatistics(); 
- 
-    /** 
-     * Check whether column has minimum. 
-     * @return true if has minimum 
-     */ 
-    virtual bool hasMinimum() const = 0; 
- 
-    /** 
-     * Check whether column has maximum. 
-     * @return true if has maximum 
-     */ 
-    virtual bool hasMaximum() const = 0; 
- 
-    /** 
-     * Check whether column has sum. 
-     * @return true if has sum 
-     */ 
-    virtual bool hasSum() const = 0; 
- 
-    /** 
-     * Get the smallest value in the column. Only defined if getNumberOfValues 
-     * is non-zero. 
-     * @return the minimum 
-     */ 
-    virtual int64_t getMinimum() const = 0; 
- 
-    /** 
-     * Get the largest value in the column. Only defined if getNumberOfValues 
-     * is non-zero. 
-     * @return the maximum 
-     */ 
-    virtual int64_t getMaximum() const = 0; 
- 
-    /** 
-     * Get the sum of the column. Only valid if isSumDefined returns true. 
-     * @return the sum of the column 
-     */ 
-    virtual int64_t getSum() const = 0; 
-  }; 
- 
-  /** 
-   * Statistics for string columns. 
-   */ 
-  class StringColumnStatistics: public ColumnStatistics { 
-  public: 
-    virtual ~StringColumnStatistics(); 
- 
-    /** 
-     * Check whether column has minimum. 
-     * @return true if has minimum 
-     */ 
-    virtual bool hasMinimum() const = 0; 
- 
-    /** 
-     * Check whether column has maximum. 
-     * @return true if has maximum 
-     */ 
-    virtual bool hasMaximum() const = 0; 
- 
-    /** 
-     * Check whether column has total length. 
-     * @return true if has total length 
-     */ 
-    virtual bool hasTotalLength() const = 0; 
- 
-    /** 
-     * Get the minimum value for the column. 
-     * @return minimum value 
-     */ 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_STATISTICS_HH
+#define ORC_STATISTICS_HH
+
+#include "orc/orc-config.hh"
+#include "orc/Type.hh"
+#include "orc/Vector.hh"
+
+namespace orc {
+
+  /**
+   * Statistics that are available for all types of columns.
+   */
+  class ColumnStatistics {
+  public:
+    virtual ~ColumnStatistics();
+
+    /**
+     * Get the number of values in this column. It will differ from the number
+     * of rows because of NULL values.
+     * @return the number of values
+     */
+    virtual uint64_t getNumberOfValues() const = 0;
+
+    /**
+     * Check whether column has null value.
+     * @return true if has null value
+     */
+    virtual bool hasNull() const = 0;
+
+    /**
+     * Print out statistics of column if any.
+     */
+    virtual std::string toString() const = 0;
+  };
+
+  /**
+   * Statistics for binary columns.
+   */
+  class BinaryColumnStatistics: public ColumnStatistics {
+  public:
+    virtual ~BinaryColumnStatistics();
+
+    /**
+     * Check whether column has total length.
+     * @return true if has total length
+     */
+    virtual bool hasTotalLength() const = 0;
+
+    virtual uint64_t getTotalLength() const = 0;
+  };
+
+  /**
+   * Statistics for boolean columns.
+   */
+  class BooleanColumnStatistics: public ColumnStatistics {
+  public:
+    virtual ~BooleanColumnStatistics();
+
+    /**
+     * Check whether column has true/false count.
+     * @return true if has true/false count
+     */
+    virtual bool hasCount() const = 0;
+
+    virtual uint64_t getFalseCount() const = 0;
+    virtual uint64_t getTrueCount() const = 0;
+  };
+
+  /**
+   * Statistics for date columns.
+   */
+  class DateColumnStatistics: public ColumnStatistics {
+  public:
+    virtual ~DateColumnStatistics();
+
+    /**
+     * Check whether column has minimum.
+     * @return true if has minimum
+     */
+    virtual bool hasMinimum() const = 0;
+
+    /**
+     * Check whether column has maximum.
+     * @return true if has maximum
+     */
+    virtual bool hasMaximum() const = 0;
+
+    /**
+     * Get the minimum value for the column.
+     * @return minimum value
+     */
+    virtual int32_t getMinimum() const = 0;
+
+    /**
+     * Get the maximum value for the column.
+     * @return maximum value
+     */
+    virtual int32_t getMaximum() const = 0;
+  };
+
+  /**
+   * Statistics for decimal columns.
+   */
+  class DecimalColumnStatistics: public ColumnStatistics {
+  public:
+    virtual ~DecimalColumnStatistics();
+
+    /**
+     * Check whether column has minimum.
+     * @return true if has minimum
+     */
+    virtual bool hasMinimum() const = 0;
+
+    /**
+     * Check whether column has maximum.
+     * @return true if has maximum
+     */
+    virtual bool hasMaximum() const = 0;
+
+    /**
+     * Check whether column has sum.
+     * @return true if has sum
+     */
+    virtual bool hasSum() const = 0;
+
+    /**
+     * Get the minimum value for the column.
+     * @return minimum value
+     */
+    virtual Decimal getMinimum() const = 0;
+
+    /**
+     * Get the maximum value for the column.
+     * @return maximum value
+     */
+    virtual Decimal getMaximum() const = 0;
+
+    /**
+     * Get the sum for the column.
+     * @return sum of all the values
+     */
+    virtual Decimal getSum() const = 0;
+  };
+
+  /**
+   * Statistics for float and double columns.
+   */
+  class DoubleColumnStatistics: public ColumnStatistics {
+  public:
+    virtual ~DoubleColumnStatistics();
+
+    /**
+     * Check whether column has minimum.
+     * @return true if has minimum
+     */
+    virtual bool hasMinimum() const = 0;
+
+    /**
+     * Check whether column has maximum.
+     * @return true if has maximum
+     */
+    virtual bool hasMaximum() const = 0;
+
+    /**
+     * Check whether column has sum.
+     * @return true if has sum
+     */
+    virtual bool hasSum() const = 0;
+
+    /**
+     * Get the smallest value in the column. Only defined if getNumberOfValues
+     * is non-zero.
+     * @return the minimum
+     */
+    virtual double getMinimum() const = 0;
+
+    /**
+     * Get the largest value in the column. Only defined if getNumberOfValues
+     * is non-zero.
+     * @return the maximum
+     */
+    virtual double getMaximum() const = 0;
+
+    /**
+     * Get the sum of the values in the column.
+     * @return the sum
+     */
+    virtual double getSum() const = 0;
+  };
+
+  /**
+   * Statistics for all of the integer columns, such as byte, short, int, and
+   * long.
+   */
+  class IntegerColumnStatistics: public ColumnStatistics {
+  public:
+    virtual ~IntegerColumnStatistics();
+
+    /**
+     * Check whether column has minimum.
+     * @return true if has minimum
+     */
+    virtual bool hasMinimum() const = 0;
+
+    /**
+     * Check whether column has maximum.
+     * @return true if has maximum
+     */
+    virtual bool hasMaximum() const = 0;
+
+    /**
+     * Check whether column has sum.
+     * @return true if has sum
+     */
+    virtual bool hasSum() const = 0;
+
+    /**
+     * Get the smallest value in the column. Only defined if getNumberOfValues
+     * is non-zero.
+     * @return the minimum
+     */
+    virtual int64_t getMinimum() const = 0;
+
+    /**
+     * Get the largest value in the column. Only defined if getNumberOfValues
+     * is non-zero.
+     * @return the maximum
+     */
+    virtual int64_t getMaximum() const = 0;
+
+    /**
+     * Get the sum of the column. Only valid if isSumDefined returns true.
+     * @return the sum of the column
+     */
+    virtual int64_t getSum() const = 0;
+  };
+
+  /**
+   * Statistics for string columns.
+   */
+  class StringColumnStatistics: public ColumnStatistics {
+  public:
+    virtual ~StringColumnStatistics();
+
+    /**
+     * Check whether column has minimum.
+     * @return true if has minimum
+     */
+    virtual bool hasMinimum() const = 0;
+
+    /**
+     * Check whether column has maximum.
+     * @return true if has maximum
+     */
+    virtual bool hasMaximum() const = 0;
+
+    /**
+     * Check whether column has total length.
+     * @return true if has total length
+     */
+    virtual bool hasTotalLength() const = 0;
+
+    /**
+     * Get the minimum value for the column.
+     * @return minimum value
+     */
     virtual const std::string & getMinimum() const = 0;
- 
-    /** 
-     * Get the maximum value for the column. 
-     * @return maximum value 
-     */ 
+
+    /**
+     * Get the maximum value for the column.
+     * @return maximum value
+     */
     virtual const std::string & getMaximum() const = 0;
- 
-    /** 
-     * Get the total length of all values. 
-     * @return total length of all the values 
-     */ 
-    virtual uint64_t getTotalLength() const = 0; 
-  }; 
- 
-  /** 
-   * Statistics for timestamp columns. 
-   */ 
-  class TimestampColumnStatistics: public ColumnStatistics { 
-  public: 
-    virtual ~TimestampColumnStatistics(); 
- 
-    /** 
-     * Check whether column minimum. 
-     * @return true if has minimum 
-     */ 
-    virtual bool hasMinimum() const = 0; 
- 
-    /** 
-     * Check whether column maximum. 
-     * @return true if has maximum 
-     */ 
-    virtual bool hasMaximum() const = 0; 
- 
-    /** 
-     * Get the minimum value for the column. 
-     * @return minimum value 
-     */ 
-    virtual int64_t getMinimum() const = 0; 
- 
-    /** 
-     * Get the maximum value for the column. 
-     * @return maximum value 
-     */ 
-    virtual int64_t getMaximum() const = 0; 
- 
-    /** 
-     * Check whether column has a lowerBound. 
-     * @return true if column has a lowerBound 
-     */ 
-    virtual bool hasLowerBound() const = 0; 
- 
-    /** 
-     * Check whether column has an upperBound. 
-     * @return true if column has an upperBound 
-     */ 
-    virtual bool hasUpperBound() const = 0; 
- 
-    /** 
-     * Get the lowerBound value for the column. 
-     * @return lowerBound value 
-     */ 
-    virtual int64_t getLowerBound() const = 0; 
- 
-    /** 
-     * Get the upperBound value for the column. 
-     * @return upperBound value 
-     */ 
-    virtual int64_t getUpperBound() const = 0; 
- 
- 
-  }; 
- 
-  class Statistics { 
-  public: 
-    virtual ~Statistics(); 
- 
-    /** 
-     * Get the statistics of the given column. 
-     * @param colId id of the column 
-     * @return one column's statistics 
-     */ 
-    virtual const ColumnStatistics* getColumnStatistics(uint32_t colId 
-                                                        ) const = 0; 
- 
-    /** 
-     * Get the number of columns. 
-     * @return the number of columns 
-     */ 
-    virtual uint32_t getNumberOfColumns() const = 0; 
-  }; 
- 
-  class StripeStatistics : public Statistics { 
-  public: 
-    virtual ~StripeStatistics(); 
- 
-    /** 
-     * Get the statistics of a given RowIndex entry in a given column. 
-     * @param columnId id of the column 
-     * @param rowIndexId RowIndex entry id 
-     * @return statistics of the given RowIndex entry 
-     */ 
-    virtual const ColumnStatistics* 
-                      getRowIndexStatistics( 
-                          uint32_t columnId, uint32_t rowIndexId) const = 0; 
- 
-    /** 
-     * Get the number of RowIndex statistics in a given column. 
-     * @param columnId id of the column 
-     * @return the number of RowIndex statistics 
-     */ 
-    virtual uint32_t getNumberOfRowIndexStats(uint32_t columnId) const = 0; 
-  }; 
-} 
- 
-#endif 
+
+    /**
+     * Get the total length of all values.
+     * @return total length of all the values
+     */
+    virtual uint64_t getTotalLength() const = 0;
+  };
+
+  /**
+   * Statistics for timestamp columns.
+   */
+  class TimestampColumnStatistics: public ColumnStatistics {
+  public:
+    virtual ~TimestampColumnStatistics();
+
+    /**
+     * Check whether column minimum.
+     * @return true if has minimum
+     */
+    virtual bool hasMinimum() const = 0;
+
+    /**
+     * Check whether column maximum.
+     * @return true if has maximum
+     */
+    virtual bool hasMaximum() const = 0;
+
+    /**
+     * Get the minimum value for the column.
+     * @return minimum value
+     */
+    virtual int64_t getMinimum() const = 0;
+
+    /**
+     * Get the maximum value for the column.
+     * @return maximum value
+     */
+    virtual int64_t getMaximum() const = 0;
+
+    /**
+     * Check whether column has a lowerBound.
+     * @return true if column has a lowerBound
+     */
+    virtual bool hasLowerBound() const = 0;
+
+    /**
+     * Check whether column has an upperBound.
+     * @return true if column has an upperBound
+     */
+    virtual bool hasUpperBound() const = 0;
+
+    /**
+     * Get the lowerBound value for the column.
+     * @return lowerBound value
+     */
+    virtual int64_t getLowerBound() const = 0;
+
+    /**
+     * Get the upperBound value for the column.
+     * @return upperBound value
+     */
+    virtual int64_t getUpperBound() const = 0;
+
+
+  };
+
+  class Statistics {
+  public:
+    virtual ~Statistics();
+
+    /**
+     * Get the statistics of the given column.
+     * @param colId id of the column
+     * @return one column's statistics
+     */
+    virtual const ColumnStatistics* getColumnStatistics(uint32_t colId
+                                                        ) const = 0;
+
+    /**
+     * Get the number of columns.
+     * @return the number of columns
+     */
+    virtual uint32_t getNumberOfColumns() const = 0;
+  };
+
+  class StripeStatistics : public Statistics {
+  public:
+    virtual ~StripeStatistics();
+
+    /**
+     * Get the statistics of a given RowIndex entry in a given column.
+     * @param columnId id of the column
+     * @param rowIndexId RowIndex entry id
+     * @return statistics of the given RowIndex entry
+     */
+    virtual const ColumnStatistics*
+                      getRowIndexStatistics(
+                          uint32_t columnId, uint32_t rowIndexId) const = 0;
+
+    /**
+     * Get the number of RowIndex statistics in a given column.
+     * @param columnId id of the column
+     * @return the number of RowIndex statistics
+     */
+    virtual uint32_t getNumberOfRowIndexStats(uint32_t columnId) const = 0;
+  };
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Type.hh b/contrib/libs/apache/orc/c++/include/orc/Type.hh
index ba0f87e9b2..c0cbf2d671 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Type.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Type.hh
@@ -1,111 +1,111 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_TYPE_HH 
-#define ORC_TYPE_HH 
- 
-#include "orc/orc-config.hh" 
-#include "orc/Vector.hh" 
-#include "MemoryPool.hh" 
- 
-namespace orc { 
- 
-  enum TypeKind { 
-    BOOLEAN = 0, 
-    BYTE = 1, 
-    SHORT = 2, 
-    INT = 3, 
-    LONG = 4, 
-    FLOAT = 5, 
-    DOUBLE = 6, 
-    STRING = 7, 
-    BINARY = 8, 
-    TIMESTAMP = 9, 
-    LIST = 10, 
-    MAP = 11, 
-    STRUCT = 12, 
-    UNION = 13, 
-    DECIMAL = 14, 
-    DATE = 15, 
-    VARCHAR = 16, 
-    CHAR = 17 
-  }; 
- 
-  class Type { 
-  public: 
-    virtual ~Type(); 
-    virtual uint64_t getColumnId() const = 0; 
-    virtual uint64_t getMaximumColumnId() const = 0; 
-    virtual TypeKind getKind() const = 0; 
-    virtual uint64_t getSubtypeCount() const = 0; 
-    virtual const Type* getSubtype(uint64_t childId) const = 0; 
-    virtual const std::string& getFieldName(uint64_t childId) const = 0; 
-    virtual uint64_t getMaximumLength() const = 0; 
-    virtual uint64_t getPrecision() const = 0; 
-    virtual uint64_t getScale() const = 0; 
-    virtual std::string toString() const = 0; 
- 
-    /** 
-     * Create a row batch for this type. 
-     */ 
-    virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size, 
-                                                             MemoryPool& pool, 
-                                                             bool encoded = false 
-                                                             ) const = 0; 
- 
-    /** 
-     * Add a new field to a struct type. 
-     * @param fieldName the name of the new field 
-     * @param fieldType the type of the new field 
-     * @return a reference to the struct type 
-     */ 
-    virtual Type* addStructField(const std::string& fieldName, 
-                                 ORC_UNIQUE_PTR<Type> fieldType) = 0; 
- 
-    /** 
-     * Add a new child to a union type. 
-     * @param fieldType the type of the new field 
-     * @return a reference to the union type 
-     */ 
-    virtual Type* addUnionChild(ORC_UNIQUE_PTR<Type> fieldType) = 0; 
- 
-    /** 
-     * Build a Type object from string text representation. 
-     */ 
-    static ORC_UNIQUE_PTR<Type> buildTypeFromString(const std::string& input); 
-  }; 
- 
-  const int64_t DEFAULT_DECIMAL_SCALE = 18; 
-  const int64_t DEFAULT_DECIMAL_PRECISION = 38; 
- 
-  ORC_UNIQUE_PTR<Type> createPrimitiveType(TypeKind kind); 
-  ORC_UNIQUE_PTR<Type> createCharType(TypeKind kind, 
-                                      uint64_t maxLength); 
-  ORC_UNIQUE_PTR<Type> 
-                createDecimalType(uint64_t precision= 
-                                    DEFAULT_DECIMAL_PRECISION, 
-                                  uint64_t scale=DEFAULT_DECIMAL_SCALE); 
- 
-  ORC_UNIQUE_PTR<Type> createStructType(); 
-  ORC_UNIQUE_PTR<Type> createListType(ORC_UNIQUE_PTR<Type> elements); 
-  ORC_UNIQUE_PTR<Type> createMapType(ORC_UNIQUE_PTR<Type> key, 
-                                      ORC_UNIQUE_PTR<Type> value); 
-  ORC_UNIQUE_PTR<Type> createUnionType(); 
- 
-} 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_TYPE_HH
+#define ORC_TYPE_HH
+
+#include "orc/orc-config.hh"
+#include "orc/Vector.hh"
+#include "MemoryPool.hh"
+
+namespace orc {
+
+  enum TypeKind {
+    BOOLEAN = 0,
+    BYTE = 1,
+    SHORT = 2,
+    INT = 3,
+    LONG = 4,
+    FLOAT = 5,
+    DOUBLE = 6,
+    STRING = 7,
+    BINARY = 8,
+    TIMESTAMP = 9,
+    LIST = 10,
+    MAP = 11,
+    STRUCT = 12,
+    UNION = 13,
+    DECIMAL = 14,
+    DATE = 15,
+    VARCHAR = 16,
+    CHAR = 17
+  };
+
+  class Type {
+  public:
+    virtual ~Type();
+    virtual uint64_t getColumnId() const = 0;
+    virtual uint64_t getMaximumColumnId() const = 0;
+    virtual TypeKind getKind() const = 0;
+    virtual uint64_t getSubtypeCount() const = 0;
+    virtual const Type* getSubtype(uint64_t childId) const = 0;
+    virtual const std::string& getFieldName(uint64_t childId) const = 0;
+    virtual uint64_t getMaximumLength() const = 0;
+    virtual uint64_t getPrecision() const = 0;
+    virtual uint64_t getScale() const = 0;
+    virtual std::string toString() const = 0;
+
+    /**
+     * Create a row batch for this type.
+     */
+    virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size,
+                                                             MemoryPool& pool,
+                                                             bool encoded = false
+                                                             ) const = 0;
+
+    /**
+     * Add a new field to a struct type.
+     * @param fieldName the name of the new field
+     * @param fieldType the type of the new field
+     * @return a reference to the struct type
+     */
+    virtual Type* addStructField(const std::string& fieldName,
+                                 ORC_UNIQUE_PTR<Type> fieldType) = 0;
+
+    /**
+     * Add a new child to a union type.
+     * @param fieldType the type of the new field
+     * @return a reference to the union type
+     */
+    virtual Type* addUnionChild(ORC_UNIQUE_PTR<Type> fieldType) = 0;
+
+    /**
+     * Build a Type object from string text representation.
+     */
+    static ORC_UNIQUE_PTR<Type> buildTypeFromString(const std::string& input);
+  };
+
+  const int64_t DEFAULT_DECIMAL_SCALE = 18;
+  const int64_t DEFAULT_DECIMAL_PRECISION = 38;
+
+  ORC_UNIQUE_PTR<Type> createPrimitiveType(TypeKind kind);
+  ORC_UNIQUE_PTR<Type> createCharType(TypeKind kind,
+                                      uint64_t maxLength);
+  ORC_UNIQUE_PTR<Type>
+                createDecimalType(uint64_t precision=
+                                    DEFAULT_DECIMAL_PRECISION,
+                                  uint64_t scale=DEFAULT_DECIMAL_SCALE);
+
+  ORC_UNIQUE_PTR<Type> createStructType();
+  ORC_UNIQUE_PTR<Type> createListType(ORC_UNIQUE_PTR<Type> elements);
+  ORC_UNIQUE_PTR<Type> createMapType(ORC_UNIQUE_PTR<Type> key,
+                                      ORC_UNIQUE_PTR<Type> value);
+  ORC_UNIQUE_PTR<Type> createUnionType();
+
+}
+#endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Vector.hh b/contrib/libs/apache/orc/c++/include/orc/Vector.hh
index 97bba1ef83..629c0b7f6b 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Vector.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Vector.hh
@@ -1,326 +1,326 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_VECTOR_HH 
-#define ORC_VECTOR_HH 
- 
-#include "orc/orc-config.hh" 
-#include "MemoryPool.hh" 
-#include "Int128.hh" 
- 
-#include <list> 
-#include <memory> 
-#include <cstring> 
-#include <vector> 
-#include <stdexcept> 
-#include <cstdlib> 
-#include <iostream> 
- 
-namespace orc { 
- 
-  /** 
-   * The base class for each of the column vectors. This class handles 
-   * the generic attributes such as number of elements, capacity, and 
-   * notNull vector. 
-   */ 
-  struct ColumnVectorBatch { 
-    ColumnVectorBatch(uint64_t capacity, MemoryPool& pool); 
-    virtual ~ColumnVectorBatch(); 
- 
-    // the number of slots available 
-    uint64_t capacity; 
-    // the number of current occupied slots 
-    uint64_t numElements; 
-    // an array of capacity length marking non-null values 
-    DataBuffer<char> notNull; 
-    // whether there are any null values 
-    bool hasNulls; 
-    // whether the vector batch is encoded 
-    bool isEncoded; 
- 
-    // custom memory pool 
-    MemoryPool& memoryPool; 
- 
-    /** 
-     * Generate a description of this vector as a string. 
-     */ 
-    virtual std::string toString() const = 0; 
- 
-    /** 
-     * Change the number of slots to at least the given capacity. 
-     * This function is not recursive into subtypes. 
-     */ 
-    virtual void resize(uint64_t capacity); 
- 
-    /** 
-     * Empties the vector from all its elements, recursively. 
-     * Do not alter the current capacity. 
-     */ 
-    virtual void clear(); 
- 
-    /** 
-     * Heap memory used by the batch. 
-     */ 
-    virtual uint64_t getMemoryUsage(); 
- 
-    /** 
-     * Check whether the batch length varies depending on data. 
-     */ 
-    virtual bool hasVariableLength(); 
- 
-  private: 
-    ColumnVectorBatch(const ColumnVectorBatch&); 
-    ColumnVectorBatch& operator=(const ColumnVectorBatch&); 
-  }; 
- 
-  struct LongVectorBatch: public ColumnVectorBatch { 
-    LongVectorBatch(uint64_t capacity, MemoryPool& pool); 
-    virtual ~LongVectorBatch(); 
- 
-    DataBuffer<int64_t> data; 
-    std::string toString() const; 
-    void resize(uint64_t capacity); 
-    void clear(); 
-    uint64_t getMemoryUsage(); 
-  }; 
- 
-  struct DoubleVectorBatch: public ColumnVectorBatch { 
-    DoubleVectorBatch(uint64_t capacity, MemoryPool& pool); 
-    virtual ~DoubleVectorBatch(); 
-    std::string toString() const; 
-    void resize(uint64_t capacity); 
-    void clear(); 
-    uint64_t getMemoryUsage(); 
- 
-    DataBuffer<double> data; 
-  }; 
- 
-  struct StringVectorBatch: public ColumnVectorBatch { 
-    StringVectorBatch(uint64_t capacity, MemoryPool& pool); 
-    virtual ~StringVectorBatch(); 
-    std::string toString() const; 
-    void resize(uint64_t capacity); 
-    void clear(); 
-    uint64_t getMemoryUsage(); 
- 
-    // pointers to the start of each string 
-    DataBuffer<char*> data; 
-    // the length of each string 
-    DataBuffer<int64_t> length; 
-    // string blob 
-    DataBuffer<char> blob; 
-  }; 
- 
-  struct StringDictionary { 
-    StringDictionary(MemoryPool& pool); 
-    DataBuffer<char> dictionaryBlob; 
- 
-    // Offset for each dictionary key entry. 
-    DataBuffer<int64_t> dictionaryOffset; 
- 
-    void getValueByIndex(int64_t index, char*& valPtr, int64_t& length) { 
-      if (index < 0 || static_cast<uint64_t>(index) >= dictionaryOffset.size()) { 
-        throw std::out_of_range("index out of range."); 
-      } 
- 
-      int64_t* offsetPtr = dictionaryOffset.data(); 
- 
-      valPtr = dictionaryBlob.data() + offsetPtr[index]; 
-      length = offsetPtr[index + 1] - offsetPtr[index]; 
-    } 
-  }; 
- 
-  /** 
-   * Include a index array with reference to corresponding dictionary. 
-   * User first obtain index from index array and retrieve string pointer 
-   * and length by calling getValueByIndex() from dictionary. 
-   */ 
-  struct EncodedStringVectorBatch : public StringVectorBatch { 
-    EncodedStringVectorBatch(uint64_t capacity, MemoryPool& pool); 
-    virtual ~EncodedStringVectorBatch(); 
-    std::string toString() const; 
-    std::shared_ptr<StringDictionary> dictionary; 
- 
-    // index for dictionary entry 
-    DataBuffer<int64_t> index; 
-  }; 
- 
-  struct StructVectorBatch: public ColumnVectorBatch { 
-    StructVectorBatch(uint64_t capacity, MemoryPool& pool); 
-    virtual ~StructVectorBatch(); 
-    std::string toString() const; 
-    void resize(uint64_t capacity); 
-    void clear(); 
-    uint64_t getMemoryUsage(); 
-    bool hasVariableLength(); 
- 
-    std::vector<ColumnVectorBatch*> fields; 
-  }; 
- 
-  struct ListVectorBatch: public ColumnVectorBatch { 
-    ListVectorBatch(uint64_t capacity, MemoryPool& pool); 
-    virtual ~ListVectorBatch(); 
-    std::string toString() const; 
-    void resize(uint64_t capacity); 
-    void clear(); 
-    uint64_t getMemoryUsage(); 
-    bool hasVariableLength(); 
- 
-    /** 
-     * The offset of the first element of each list. 
-     * The length of list i is offsets[i+1] - offsets[i]. 
-     */ 
-    DataBuffer<int64_t> offsets; 
- 
-    // the concatenated elements 
-    ORC_UNIQUE_PTR<ColumnVectorBatch> elements; 
-  }; 
- 
-  struct MapVectorBatch: public ColumnVectorBatch { 
-    MapVectorBatch(uint64_t capacity, MemoryPool& pool); 
-    virtual ~MapVectorBatch(); 
-    std::string toString() const; 
-    void resize(uint64_t capacity); 
-    void clear(); 
-    uint64_t getMemoryUsage(); 
-    bool hasVariableLength(); 
- 
-    /** 
-     * The offset of the first element of each map. 
-     * The size of map i is offsets[i+1] - offsets[i]. 
-     */ 
-    DataBuffer<int64_t> offsets; 
- 
-    // the concatenated keys 
-    ORC_UNIQUE_PTR<ColumnVectorBatch> keys; 
-    // the concatenated elements 
-    ORC_UNIQUE_PTR<ColumnVectorBatch> elements; 
-  }; 
- 
-  struct UnionVectorBatch: public ColumnVectorBatch { 
-    UnionVectorBatch(uint64_t capacity, MemoryPool& pool); 
-    virtual ~UnionVectorBatch(); 
-    std::string toString() const; 
-    void resize(uint64_t capacity); 
-    void clear(); 
-    uint64_t getMemoryUsage(); 
-    bool hasVariableLength(); 
- 
-    /** 
-     * For each value, which element of children has the value. 
-     */ 
-    DataBuffer<unsigned char> tags; 
- 
-    /** 
-     * For each value, the index inside of the child ColumnVectorBatch. 
-     */ 
-    DataBuffer<uint64_t> offsets; 
- 
-    // the sub-columns 
-    std::vector<ColumnVectorBatch*> children; 
-  }; 
- 
-  struct Decimal { 
-    Decimal(const Int128& value, int32_t scale); 
-    explicit Decimal(const std::string& value); 
-    Decimal(); 
- 
-    std::string toString() const; 
-    Int128 value; 
-    int32_t scale; 
-  }; 
- 
-  struct Decimal64VectorBatch: public ColumnVectorBatch { 
-    Decimal64VectorBatch(uint64_t capacity, MemoryPool& pool); 
-    virtual ~Decimal64VectorBatch(); 
-    std::string toString() const; 
-    void resize(uint64_t capacity); 
-    void clear(); 
-    uint64_t getMemoryUsage(); 
- 
-    // total number of digits 
-    int32_t precision; 
-    // the number of places after the decimal 
-    int32_t scale; 
- 
-    // the numeric values 
-    DataBuffer<int64_t> values; 
- 
-  protected: 
-    /** 
-     * Contains the scales that were read from the file. Should NOT be 
-     * used. 
-     */ 
-    DataBuffer<int64_t> readScales; 
-    friend class Decimal64ColumnReader; 
-    friend class Decimal64ColumnWriter; 
-  }; 
- 
-  struct Decimal128VectorBatch: public ColumnVectorBatch { 
-    Decimal128VectorBatch(uint64_t capacity, MemoryPool& pool); 
-    virtual ~Decimal128VectorBatch(); 
-    std::string toString() const; 
-    void resize(uint64_t capacity); 
-    void clear(); 
-    uint64_t getMemoryUsage(); 
- 
-    // total number of digits 
-    int32_t precision; 
-    // the number of places after the decimal 
-    int32_t scale; 
- 
-    // the numeric values 
-    DataBuffer<Int128> values; 
- 
-  protected: 
-    /** 
-     * Contains the scales that were read from the file. Should NOT be 
-     * used. 
-     */ 
-    DataBuffer<int64_t> readScales; 
-    friend class Decimal128ColumnReader; 
-    friend class DecimalHive11ColumnReader; 
-    friend class Decimal128ColumnWriter; 
-  }; 
- 
-  /** 
-   * A column vector batch for storing timestamp values. 
-   * The timestamps are stored split into the time_t value (seconds since 
-   * 1 Jan 1970 00:00:00) and the nanoseconds within the time_t value. 
-   */ 
-  struct TimestampVectorBatch: public ColumnVectorBatch { 
-    TimestampVectorBatch(uint64_t capacity, MemoryPool& pool); 
-    virtual ~TimestampVectorBatch(); 
-    std::string toString() const; 
-    void resize(uint64_t capacity); 
-    void clear(); 
-    uint64_t getMemoryUsage(); 
- 
-    // the number of seconds past 1 Jan 1970 00:00 UTC (aka time_t) 
-    // Note that we always assume data is in GMT timezone; therefore it is 
-    // user's responsibility to convert wall clock time in local timezone 
-    // to GMT. 
-    DataBuffer<int64_t> data; 
- 
-    // the nanoseconds of each value 
-    DataBuffer<int64_t> nanoseconds; 
-  }; 
- 
-} 
- 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_VECTOR_HH
+#define ORC_VECTOR_HH
+
+#include "orc/orc-config.hh"
+#include "MemoryPool.hh"
+#include "Int128.hh"
+
+#include <list>
+#include <memory>
+#include <cstring>
+#include <vector>
+#include <stdexcept>
+#include <cstdlib>
+#include <iostream>
+
+namespace orc {
+
+  /**
+   * The base class for each of the column vectors. This class handles
+   * the generic attributes such as number of elements, capacity, and
+   * notNull vector.
+   */
+  struct ColumnVectorBatch {
+    ColumnVectorBatch(uint64_t capacity, MemoryPool& pool);
+    virtual ~ColumnVectorBatch();
+
+    // the number of slots available
+    uint64_t capacity;
+    // the number of current occupied slots
+    uint64_t numElements;
+    // an array of capacity length marking non-null values
+    DataBuffer<char> notNull;
+    // whether there are any null values
+    bool hasNulls;
+    // whether the vector batch is encoded
+    bool isEncoded;
+
+    // custom memory pool
+    MemoryPool& memoryPool;
+
+    /**
+     * Generate a description of this vector as a string.
+     */
+    virtual std::string toString() const = 0;
+
+    /**
+     * Change the number of slots to at least the given capacity.
+     * This function is not recursive into subtypes.
+     */
+    virtual void resize(uint64_t capacity);
+
+    /**
+     * Empties the vector from all its elements, recursively.
+     * Do not alter the current capacity.
+     */
+    virtual void clear();
+
+    /**
+     * Heap memory used by the batch.
+     */
+    virtual uint64_t getMemoryUsage();
+
+    /**
+     * Check whether the batch length varies depending on data.
+     */
+    virtual bool hasVariableLength();
+
+  private:
+    ColumnVectorBatch(const ColumnVectorBatch&);
+    ColumnVectorBatch& operator=(const ColumnVectorBatch&);
+  };
+
+  struct LongVectorBatch: public ColumnVectorBatch {
+    LongVectorBatch(uint64_t capacity, MemoryPool& pool);
+    virtual ~LongVectorBatch();
+
+    DataBuffer<int64_t> data;
+    std::string toString() const;
+    void resize(uint64_t capacity);
+    void clear();
+    uint64_t getMemoryUsage();
+  };
+
+  struct DoubleVectorBatch: public ColumnVectorBatch {
+    DoubleVectorBatch(uint64_t capacity, MemoryPool& pool);
+    virtual ~DoubleVectorBatch();
+    std::string toString() const;
+    void resize(uint64_t capacity);
+    void clear();
+    uint64_t getMemoryUsage();
+
+    DataBuffer<double> data;
+  };
+
+  struct StringVectorBatch: public ColumnVectorBatch {
+    StringVectorBatch(uint64_t capacity, MemoryPool& pool);
+    virtual ~StringVectorBatch();
+    std::string toString() const;
+    void resize(uint64_t capacity);
+    void clear();
+    uint64_t getMemoryUsage();
+
+    // pointers to the start of each string
+    DataBuffer<char*> data;
+    // the length of each string
+    DataBuffer<int64_t> length;
+    // string blob
+    DataBuffer<char> blob;
+  };
+
+  struct StringDictionary {
+    StringDictionary(MemoryPool& pool);
+    DataBuffer<char> dictionaryBlob;
+
+    // Offset for each dictionary key entry.
+    DataBuffer<int64_t> dictionaryOffset;
+
+    void getValueByIndex(int64_t index, char*& valPtr, int64_t& length) {
+      if (index < 0 || static_cast<uint64_t>(index) >= dictionaryOffset.size()) {
+        throw std::out_of_range("index out of range.");
+      }
+
+      int64_t* offsetPtr = dictionaryOffset.data();
+
+      valPtr = dictionaryBlob.data() + offsetPtr[index];
+      length = offsetPtr[index + 1] - offsetPtr[index];
+    }
+  };
+
+  /**
+   * Include a index array with reference to corresponding dictionary.
+   * User first obtain index from index array and retrieve string pointer
+   * and length by calling getValueByIndex() from dictionary.
+   */
+  struct EncodedStringVectorBatch : public StringVectorBatch {
+    EncodedStringVectorBatch(uint64_t capacity, MemoryPool& pool);
+    virtual ~EncodedStringVectorBatch();
+    std::string toString() const;
+    std::shared_ptr<StringDictionary> dictionary;
+
+    // index for dictionary entry
+    DataBuffer<int64_t> index;
+  };
+
+  struct StructVectorBatch: public ColumnVectorBatch {
+    StructVectorBatch(uint64_t capacity, MemoryPool& pool);
+    virtual ~StructVectorBatch();
+    std::string toString() const;
+    void resize(uint64_t capacity);
+    void clear();
+    uint64_t getMemoryUsage();
+    bool hasVariableLength();
+
+    std::vector<ColumnVectorBatch*> fields;
+  };
+
+  struct ListVectorBatch: public ColumnVectorBatch {
+    ListVectorBatch(uint64_t capacity, MemoryPool& pool);
+    virtual ~ListVectorBatch();
+    std::string toString() const;
+    void resize(uint64_t capacity);
+    void clear();
+    uint64_t getMemoryUsage();
+    bool hasVariableLength();
+
+    /**
+     * The offset of the first element of each list.
+     * The length of list i is offsets[i+1] - offsets[i].
+     */
+    DataBuffer<int64_t> offsets;
+
+    // the concatenated elements
+    ORC_UNIQUE_PTR<ColumnVectorBatch> elements;
+  };
+
+  struct MapVectorBatch: public ColumnVectorBatch {
+    MapVectorBatch(uint64_t capacity, MemoryPool& pool);
+    virtual ~MapVectorBatch();
+    std::string toString() const;
+    void resize(uint64_t capacity);
+    void clear();
+    uint64_t getMemoryUsage();
+    bool hasVariableLength();
+
+    /**
+     * The offset of the first element of each map.
+     * The size of map i is offsets[i+1] - offsets[i].
+     */
+    DataBuffer<int64_t> offsets;
+
+    // the concatenated keys
+    ORC_UNIQUE_PTR<ColumnVectorBatch> keys;
+    // the concatenated elements
+    ORC_UNIQUE_PTR<ColumnVectorBatch> elements;
+  };
+
+  struct UnionVectorBatch: public ColumnVectorBatch {
+    UnionVectorBatch(uint64_t capacity, MemoryPool& pool);
+    virtual ~UnionVectorBatch();
+    std::string toString() const;
+    void resize(uint64_t capacity);
+    void clear();
+    uint64_t getMemoryUsage();
+    bool hasVariableLength();
+
+    /**
+     * For each value, which element of children has the value.
+     */
+    DataBuffer<unsigned char> tags;
+
+    /**
+     * For each value, the index inside of the child ColumnVectorBatch.
+     */
+    DataBuffer<uint64_t> offsets;
+
+    // the sub-columns
+    std::vector<ColumnVectorBatch*> children;
+  };
+
+  struct Decimal {
+    Decimal(const Int128& value, int32_t scale);
+    explicit Decimal(const std::string& value);
+    Decimal();
+
+    std::string toString() const;
+    Int128 value;
+    int32_t scale;
+  };
+
+  struct Decimal64VectorBatch: public ColumnVectorBatch {
+    Decimal64VectorBatch(uint64_t capacity, MemoryPool& pool);
+    virtual ~Decimal64VectorBatch();
+    std::string toString() const;
+    void resize(uint64_t capacity);
+    void clear();
+    uint64_t getMemoryUsage();
+
+    // total number of digits
+    int32_t precision;
+    // the number of places after the decimal
+    int32_t scale;
+
+    // the numeric values
+    DataBuffer<int64_t> values;
+
+  protected:
+    /**
+     * Contains the scales that were read from the file. Should NOT be
+     * used.
+     */
+    DataBuffer<int64_t> readScales;
+    friend class Decimal64ColumnReader;
+    friend class Decimal64ColumnWriter;
+  };
+
+  struct Decimal128VectorBatch: public ColumnVectorBatch {
+    Decimal128VectorBatch(uint64_t capacity, MemoryPool& pool);
+    virtual ~Decimal128VectorBatch();
+    std::string toString() const;
+    void resize(uint64_t capacity);
+    void clear();
+    uint64_t getMemoryUsage();
+
+    // total number of digits
+    int32_t precision;
+    // the number of places after the decimal
+    int32_t scale;
+
+    // the numeric values
+    DataBuffer<Int128> values;
+
+  protected:
+    /**
+     * Contains the scales that were read from the file. Should NOT be
+     * used.
+     */
+    DataBuffer<int64_t> readScales;
+    friend class Decimal128ColumnReader;
+    friend class DecimalHive11ColumnReader;
+    friend class Decimal128ColumnWriter;
+  };
+
+  /**
+   * A column vector batch for storing timestamp values.
+   * The timestamps are stored split into the time_t value (seconds since
+   * 1 Jan 1970 00:00:00) and the nanoseconds within the time_t value.
+   */
+  struct TimestampVectorBatch: public ColumnVectorBatch {
+    TimestampVectorBatch(uint64_t capacity, MemoryPool& pool);
+    virtual ~TimestampVectorBatch();
+    std::string toString() const;
+    void resize(uint64_t capacity);
+    void clear();
+    uint64_t getMemoryUsage();
+
+    // the number of seconds past 1 Jan 1970 00:00 UTC (aka time_t)
+    // Note that we always assume data is in GMT timezone; therefore it is
+    // user's responsibility to convert wall clock time in local timezone
+    // to GMT.
+    DataBuffer<int64_t> data;
+
+    // the nanoseconds of each value
+    DataBuffer<int64_t> nanoseconds;
+  };
+
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/Writer.hh b/contrib/libs/apache/orc/c++/include/orc/Writer.hh
index 2588d62151..5b333861b1 100644
--- a/contrib/libs/apache/orc/c++/include/orc/Writer.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/Writer.hh
@@ -1,252 +1,252 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_WRITER_HH 
-#define ORC_WRITER_HH 
- 
-#include "orc/Common.hh" 
-#include "orc/orc-config.hh" 
-#include "orc/Type.hh" 
-#include "orc/Vector.hh" 
- 
-#include <memory> 
-#include <set> 
-#include <string> 
-#include <vector> 
- 
-namespace orc { 
- 
-  // classes that hold data members so we can maintain binary compatibility 
-  struct WriterOptionsPrivate; 
- 
-  enum CompressionStrategy { 
-    CompressionStrategy_SPEED = 0, 
-    CompressionStrategy_COMPRESSION 
-  }; 
- 
-  enum RleVersion { 
-    RleVersion_1 = 0, 
-    RleVersion_2 = 1 
-  }; 
- 
-  class Timezone; 
- 
-  /** 
-   * Options for creating a Writer. 
-   */ 
-  class WriterOptions { 
-  private: 
-    ORC_UNIQUE_PTR<WriterOptionsPrivate> privateBits; 
- 
-  public: 
-    WriterOptions(); 
-    WriterOptions(const WriterOptions&); 
-    WriterOptions(WriterOptions&); 
-    WriterOptions& operator=(const WriterOptions&); 
-    virtual ~WriterOptions(); 
- 
-    /** 
-     * Set the strip size. 
-     */ 
-    WriterOptions& setStripeSize(uint64_t size); 
- 
-    /** 
-     * Get the strip size. 
-     * @return if not set, return default value. 
-     */ 
-    uint64_t getStripeSize() const; 
- 
-    /** 
-     * Set the data compression block size. 
-     */ 
-    WriterOptions& setCompressionBlockSize(uint64_t size); 
- 
-    /** 
-     * Get the data compression block size. 
-     * @return if not set, return default value. 
-     */ 
-    uint64_t getCompressionBlockSize() const; 
- 
-    /** 
-     * Set row index stride (the number of rows per an entry in the row index). Use value 0 to disable row index. 
-     */ 
-    WriterOptions& setRowIndexStride(uint64_t stride); 
- 
-    /** 
-     * Get the row index stride (the number of rows per an entry in the row index). 
-     * @return if not set, return default value. 
-     */ 
-    uint64_t getRowIndexStride() const; 
- 
-    /** 
-     * Set the dictionary key size threshold. 
-     * 0 to disable dictionary encoding. 
-     * 1 to always enable dictionary encoding. 
-     */ 
-    WriterOptions& setDictionaryKeySizeThreshold(double val); 
- 
-    /** 
-     * Get the dictionary key size threshold. 
-     */ 
-    double getDictionaryKeySizeThreshold() const; 
- 
-    /** 
-     * Set Orc file version 
-     */ 
-    WriterOptions& setFileVersion(const FileVersion& version); 
- 
-    /** 
-     * Get Orc file version 
-     */ 
-    FileVersion getFileVersion() const; 
- 
-    /** 
-     * Set compression kind. 
-     */ 
-    WriterOptions& setCompression(CompressionKind comp); 
- 
-    /** 
-     * Get the compression kind. 
-     * @return if not set, return default value which is ZLIB. 
-     */ 
-    CompressionKind getCompression() const; 
- 
-    /** 
-     * Set the compression strategy. 
-     */ 
-    WriterOptions& setCompressionStrategy(CompressionStrategy strategy); 
- 
-    /** 
-     * Get the compression strategy. 
-     * @return if not set, return default value which is speed. 
-     */ 
-    CompressionStrategy getCompressionStrategy() const; 
- 
-    /** 
-     * Get if the bitpacking should be aligned. 
-     * @return true if should be aligned, return false otherwise 
-     */ 
-    bool getAlignedBitpacking() const; 
- 
-    /** 
-     * Set the padding tolerance. 
-     */ 
-    WriterOptions& setPaddingTolerance(double tolerance); 
- 
-    /** 
-     * Get the padding tolerance. 
-     * @return if not set, return default value which is zero. 
-     */ 
-    double getPaddingTolerance() const; 
- 
-    /** 
-     * Set the memory pool. 
-     */ 
-    WriterOptions& setMemoryPool(MemoryPool * memoryPool); 
- 
-    /** 
-     * Get the memory pool. 
-     * @return if not set, return default memory pool. 
-     */ 
-    MemoryPool * getMemoryPool() const; 
- 
-    /** 
-     * Set the error stream. 
-     */ 
-    WriterOptions& setErrorStream(std::ostream& errStream); 
- 
-    /** 
-     * Get the error stream. 
-     * @return if not set, return std::err. 
-     */ 
-    std::ostream * getErrorStream() const; 
- 
-    /** 
-     * Get the RLE version. 
-     */ 
-    RleVersion getRleVersion() const; 
- 
-    /** 
-     * Get whether or not to write row group index 
-     * @return if not set, the default is false 
-     */ 
-    bool getEnableIndex() const; 
- 
-    /** 
-     * Get whether or not to enable dictionary encoding 
-     * @return if not set, the default is false 
-     */ 
-    bool getEnableDictionary() const; 
- 
-    /** 
-     * Set columns that use BloomFilter 
-     */ 
-    WriterOptions& setColumnsUseBloomFilter(const std::set<uint64_t>& columns); 
- 
-    /** 
-     * Get whether this column uses BloomFilter 
-     */ 
-    bool isColumnUseBloomFilter(uint64_t column) const; 
- 
-    /** 
-     * Set false positive probability of BloomFilter 
-     */ 
-    WriterOptions& setBloomFilterFPP(double fpp); 
- 
-    /** 
-     * Get false positive probability of BloomFilter 
-     */ 
-    double getBloomFilterFPP() const; 
- 
-    /** 
-     * Get version of BloomFilter 
-     */ 
-    BloomFilterVersion getBloomFilterVersion() const; 
-  }; 
- 
-  class Writer { 
-  public: 
-    virtual ~Writer(); 
- 
-    /** 
-     * Create a row batch for writing the columns into this file. 
-     * @param size the number of rows to write. 
-     * @return a new ColumnVectorBatch to write into. 
-     */ 
-    virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size 
-                                                             ) const = 0; 
- 
-    /** 
-     * Add a row batch into current writer. 
-     * @param rowsToAdd the row batch data to write. 
-     */ 
-    virtual void add(ColumnVectorBatch& rowsToAdd) = 0; 
- 
-    /** 
-     * Close the writer and flush any pending data to the output stream. 
-     */ 
-    virtual void close() = 0; 
- 
-    /** 
-     * Add user metadata to the writer. 
-     */ 
-    virtual void addUserMetadata(const std::string name, const std::string value) = 0; 
-  }; 
-} 
- 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_WRITER_HH
+#define ORC_WRITER_HH
+
+#include "orc/Common.hh"
+#include "orc/orc-config.hh"
+#include "orc/Type.hh"
+#include "orc/Vector.hh"
+
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace orc {
+
+  // classes that hold data members so we can maintain binary compatibility
+  struct WriterOptionsPrivate;
+
+  enum CompressionStrategy {
+    CompressionStrategy_SPEED = 0,
+    CompressionStrategy_COMPRESSION
+  };
+
+  enum RleVersion {
+    RleVersion_1 = 0,
+    RleVersion_2 = 1
+  };
+
+  class Timezone;
+
+  /**
+   * Options for creating a Writer.
+   */
+  class WriterOptions {
+  private:
+    ORC_UNIQUE_PTR<WriterOptionsPrivate> privateBits;
+
+  public:
+    WriterOptions();
+    WriterOptions(const WriterOptions&);
+    WriterOptions(WriterOptions&);
+    WriterOptions& operator=(const WriterOptions&);
+    virtual ~WriterOptions();
+
+    /**
+     * Set the strip size.
+     */
+    WriterOptions& setStripeSize(uint64_t size);
+
+    /**
+     * Get the strip size.
+     * @return if not set, return default value.
+     */
+    uint64_t getStripeSize() const;
+
+    /**
+     * Set the data compression block size.
+     */
+    WriterOptions& setCompressionBlockSize(uint64_t size);
+
+    /**
+     * Get the data compression block size.
+     * @return if not set, return default value.
+     */
+    uint64_t getCompressionBlockSize() const;
+
+    /**
+     * Set row index stride (the number of rows per an entry in the row index). Use value 0 to disable row index.
+     */
+    WriterOptions& setRowIndexStride(uint64_t stride);
+
+    /**
+     * Get the row index stride (the number of rows per an entry in the row index).
+     * @return if not set, return default value.
+     */
+    uint64_t getRowIndexStride() const;
+
+    /**
+     * Set the dictionary key size threshold.
+     * 0 to disable dictionary encoding.
+     * 1 to always enable dictionary encoding.
+     */
+    WriterOptions& setDictionaryKeySizeThreshold(double val);
+
+    /**
+     * Get the dictionary key size threshold.
+     */
+    double getDictionaryKeySizeThreshold() const;
+
+    /**
+     * Set Orc file version
+     */
+    WriterOptions& setFileVersion(const FileVersion& version);
+
+    /**
+     * Get Orc file version
+     */
+    FileVersion getFileVersion() const;
+
+    /**
+     * Set compression kind.
+     */
+    WriterOptions& setCompression(CompressionKind comp);
+
+    /**
+     * Get the compression kind.
+     * @return if not set, return default value which is ZLIB.
+     */
+    CompressionKind getCompression() const;
+
+    /**
+     * Set the compression strategy.
+     */
+    WriterOptions& setCompressionStrategy(CompressionStrategy strategy);
+
+    /**
+     * Get the compression strategy.
+     * @return if not set, return default value which is speed.
+     */
+    CompressionStrategy getCompressionStrategy() const;
+
+    /**
+     * Get if the bitpacking should be aligned.
+     * @return true if should be aligned, return false otherwise
+     */
+    bool getAlignedBitpacking() const;
+
+    /**
+     * Set the padding tolerance.
+     */
+    WriterOptions& setPaddingTolerance(double tolerance);
+
+    /**
+     * Get the padding tolerance.
+     * @return if not set, return default value which is zero.
+     */
+    double getPaddingTolerance() const;
+
+    /**
+     * Set the memory pool.
+     */
+    WriterOptions& setMemoryPool(MemoryPool * memoryPool);
+
+    /**
+     * Get the memory pool.
+     * @return if not set, return default memory pool.
+     */
+    MemoryPool * getMemoryPool() const;
+
+    /**
+     * Set the error stream.
+     */
+    WriterOptions& setErrorStream(std::ostream& errStream);
+
+    /**
+     * Get the error stream.
+     * @return if not set, return std::err.
+     */
+    std::ostream * getErrorStream() const;
+
+    /**
+     * Get the RLE version.
+     */
+    RleVersion getRleVersion() const;
+
+    /**
+     * Get whether or not to write row group index
+     * @return if not set, the default is false
+     */
+    bool getEnableIndex() const;
+
+    /**
+     * Get whether or not to enable dictionary encoding
+     * @return if not set, the default is false
+     */
+    bool getEnableDictionary() const;
+
+    /**
+     * Set columns that use BloomFilter
+     */
+    WriterOptions& setColumnsUseBloomFilter(const std::set<uint64_t>& columns);
+
+    /**
+     * Get whether this column uses BloomFilter
+     */
+    bool isColumnUseBloomFilter(uint64_t column) const;
+
+    /**
+     * Set false positive probability of BloomFilter
+     */
+    WriterOptions& setBloomFilterFPP(double fpp);
+
+    /**
+     * Get false positive probability of BloomFilter
+     */
+    double getBloomFilterFPP() const;
+
+    /**
+     * Get version of BloomFilter
+     */
+    BloomFilterVersion getBloomFilterVersion() const;
+  };
+
+  class Writer {
+  public:
+    virtual ~Writer();
+
+    /**
+     * Create a row batch for writing the columns into this file.
+     * @param size the number of rows to write.
+     * @return a new ColumnVectorBatch to write into.
+     */
+    virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size
+                                                             ) const = 0;
+
+    /**
+     * Add a row batch into current writer.
+     * @param rowsToAdd the row batch data to write.
+     */
+    virtual void add(ColumnVectorBatch& rowsToAdd) = 0;
+
+    /**
+     * Close the writer and flush any pending data to the output stream.
+     */
+    virtual void close() = 0;
+
+    /**
+     * Add user metadata to the writer.
+     */
+    virtual void addUserMetadata(const std::string name, const std::string value) = 0;
+  };
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/include/orc/orc-config.hh b/contrib/libs/apache/orc/c++/include/orc/orc-config.hh
index d06d892b41..18bbbd78e1 100644
--- a/contrib/libs/apache/orc/c++/include/orc/orc-config.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/orc-config.hh
@@ -1,78 +1,78 @@
-/* 
- * Licensed under the Apache License, Version 2.0 (the "License"); 
- * you may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_CONFIG_HH 
-#define ORC_CONFIG_HH 
- 
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_CONFIG_HH
+#define ORC_CONFIG_HH
+
 #define ORC_VERSION "1.6.12"
- 
-#define ORC_CXX_HAS_CSTDINT 
-#define ORC_CXX_HAS_INITIALIZER_LIST 
-#define ORC_CXX_HAS_NOEXCEPT 
-#define ORC_CXX_HAS_NULLPTR 
-#define ORC_CXX_HAS_OVERRIDE 
-#define ORC_CXX_HAS_UNIQUE_PTR 
- 
-#ifdef ORC_CXX_HAS_CSTDINT 
-  #include <cstdint> 
-#else 
-  #include <stdint.h> 
-#endif 
- 
-#ifdef ORC_CXX_HAS_NOEXCEPT 
-  #define ORC_NOEXCEPT noexcept 
-#else 
-  #define ORC_NOEXCEPT throw () 
-#endif 
- 
-#ifdef ORC_CXX_HAS_NULLPTR 
-  #define ORC_NULLPTR nullptr 
-#else 
-  namespace orc { 
-    class nullptr_t { 
-    public: 
-      template<class T> 
-      operator T*() const { 
-       return 0; 
-      } 
- 
-      template<class C, class T> 
-      operator T C::*() const { 
-        return 0; 
-      } 
-    private: 
-      void operator&() const;    // whose address can't be taken 
-    }; 
-    const nullptr_t nullptr = {}; 
-  } 
-  #define ORC_NULLPTR orc::nullptr 
-#endif 
- 
-#ifdef ORC_CXX_HAS_OVERRIDE 
-  #define ORC_OVERRIDE override 
-#else 
-  #define ORC_OVERRIDE 
-#endif 
- 
-#ifdef ORC_CXX_HAS_UNIQUE_PTR 
-  #define ORC_UNIQUE_PTR std::unique_ptr 
-#else 
-  #define ORC_UNIQUE_PTR std::auto_ptr 
-  namespace std { 
-    template<typename T> 
-    inline T move(T& x) { return x; } 
-  } 
-#endif 
- 
-#endif 
+
+#define ORC_CXX_HAS_CSTDINT
+#define ORC_CXX_HAS_INITIALIZER_LIST
+#define ORC_CXX_HAS_NOEXCEPT
+#define ORC_CXX_HAS_NULLPTR
+#define ORC_CXX_HAS_OVERRIDE
+#define ORC_CXX_HAS_UNIQUE_PTR
+
+#ifdef ORC_CXX_HAS_CSTDINT
+  #include <cstdint>
+#else
+  #include <stdint.h>
+#endif
+
+#ifdef ORC_CXX_HAS_NOEXCEPT
+  #define ORC_NOEXCEPT noexcept
+#else
+  #define ORC_NOEXCEPT throw ()
+#endif
+
+#ifdef ORC_CXX_HAS_NULLPTR
+  #define ORC_NULLPTR nullptr
+#else
+  namespace orc {
+    class nullptr_t {
+    public:
+      template<class T>
+      operator T*() const {
+       return 0;
+      }
+
+      template<class C, class T>
+      operator T C::*() const {
+        return 0;
+      }
+    private:
+      void operator&() const;    // whose address can't be taken
+    };
+    const nullptr_t nullptr = {};
+  }
+  #define ORC_NULLPTR orc::nullptr
+#endif
+
+#ifdef ORC_CXX_HAS_OVERRIDE
+  #define ORC_OVERRIDE override
+#else
+  #define ORC_OVERRIDE
+#endif
+
+#ifdef ORC_CXX_HAS_UNIQUE_PTR
+  #define ORC_UNIQUE_PTR std::unique_ptr
+#else
+  #define ORC_UNIQUE_PTR std::auto_ptr
+  namespace std {
+    template<typename T>
+    inline T move(T& x) { return x; }
+  }
+#endif
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/Adaptor.cc b/contrib/libs/apache/orc/c++/src/Adaptor.cc
index f402d65adf..bf3a3e181b 100644
--- a/contrib/libs/apache/orc/c++/src/Adaptor.cc
+++ b/contrib/libs/apache/orc/c++/src/Adaptor.cc
@@ -1,88 +1,88 @@
-/** 
-* Licensed to the Apache Software Foundation (ASF) under one 
-* or more contributor license agreements.  See the NOTICE file 
-* distributed with this work for additional information 
-* regarding copyright ownership.  The ASF licenses this file 
-* to you under the Apache License, Version 2.0 (the 
-* "License"); you may not use this file except in compliance 
-* with the License.  You may obtain a copy of the License at 
-* 
-*     http://www.apache.org/licenses/LICENSE-2.0 
-* 
-* Unless required by applicable law or agreed to in writing, software 
-* distributed under the License is distributed on an "AS IS" BASIS, 
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-* See the License for the specific language governing permissions and 
-* limitations under the License. 
-*/ 
- 
-#include "Adaptor.hh" 
-#include <sstream> 
-#include <iomanip> 
- 
-#ifndef HAS_STOLL 
-namespace std { 
-  int64_t std::stoll(std::string str) { 
-    int64_t val = 0; 
-    stringstream ss; 
-    ss << str; 
-    ss >> val; 
-    return val; 
-  } 
-} 
-#endif 
- 
-#ifndef HAS_STRPTIME 
-char* strptime(const char* s, const char* f, struct tm* tm) { 
-  std::istringstream input(s); 
-  input.imbue(std::locale(setlocale(LC_ALL, nullptr))); 
-  input >> std::get_time(tm, f); 
-  if (input.fail()) return nullptr; 
-  return (char*)(s + input.tellg()); 
-} 
-#endif 
- 
-#ifndef HAS_PREAD 
-  #ifdef _WIN32 
-#include <Windows.h> 
-#include <io.h> 
-ssize_t pread(int fd, void* buf, size_t size, off_t offset) { 
-  auto handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd)); 
- 
-  OVERLAPPED ol; 
-  memset(&ol, 0, sizeof(OVERLAPPED)); 
-  ol.Offset = offset; 
- 
-  DWORD rt; 
-  if (!ReadFile(handle, buf, static_cast<DWORD>(size), &rt, &ol)) { 
-    errno = GetLastError(); 
-    return -1; 
-  } 
-  return static_cast<ssize_t>(rt); 
-} 
-  #else 
-    #error("pread() undefined: unknown environment") 
-  #endif 
-#endif 
- 
-namespace orc { 
-#ifdef HAS_DOUBLE_TO_STRING 
-  std::string to_string(double val) { 
-    return std::to_string(val); 
-  } 
-#else 
-  std::string to_string(double val) { 
-    return std::to_string(static_cast<long double>(val)); 
-  } 
-#endif 
- 
-#ifdef HAS_INT64_TO_STRING 
-  std::string to_string(int64_t val) { 
-    return std::to_string(val); 
-  } 
-#else 
-  std::string to_string(int64_t val) { 
-    return std::to_string(static_cast<long long int>(val)); 
-  } 
-#endif 
-} 
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include "Adaptor.hh"
+#include <sstream>
+#include <iomanip>
+
+#ifndef HAS_STOLL
+namespace std {
+  int64_t std::stoll(std::string str) {
+    int64_t val = 0;
+    stringstream ss;
+    ss << str;
+    ss >> val;
+    return val;
+  }
+}
+#endif
+
+#ifndef HAS_STRPTIME
+char* strptime(const char* s, const char* f, struct tm* tm) {
+  std::istringstream input(s);
+  input.imbue(std::locale(setlocale(LC_ALL, nullptr)));
+  input >> std::get_time(tm, f);
+  if (input.fail()) return nullptr;
+  return (char*)(s + input.tellg());
+}
+#endif
+
+#ifndef HAS_PREAD
+  #ifdef _WIN32
+#include <Windows.h>
+#include <io.h>
+ssize_t pread(int fd, void* buf, size_t size, off_t offset) {
+  auto handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+
+  OVERLAPPED ol;
+  memset(&ol, 0, sizeof(OVERLAPPED));
+  ol.Offset = offset;
+
+  DWORD rt;
+  if (!ReadFile(handle, buf, static_cast<DWORD>(size), &rt, &ol)) {
+    errno = GetLastError();
+    return -1;
+  }
+  return static_cast<ssize_t>(rt);
+}
+  #else
+    #error("pread() undefined: unknown environment")
+  #endif
+#endif
+
+namespace orc {
+#ifdef HAS_DOUBLE_TO_STRING
+  std::string to_string(double val) {
+    return std::to_string(val);
+  }
+#else
+  std::string to_string(double val) {
+    return std::to_string(static_cast<long double>(val));
+  }
+#endif
+
+#ifdef HAS_INT64_TO_STRING
+  std::string to_string(int64_t val) {
+    return std::to_string(val);
+  }
+#else
+  std::string to_string(int64_t val) {
+    return std::to_string(static_cast<long long int>(val));
+  }
+#endif
+}
diff --git a/contrib/libs/apache/orc/c++/src/Adaptor.hh b/contrib/libs/apache/orc/c++/src/Adaptor.hh
index 2d6be71faa..a91b9c894d 100644
--- a/contrib/libs/apache/orc/c++/src/Adaptor.hh
+++ b/contrib/libs/apache/orc/c++/src/Adaptor.hh
@@ -1,175 +1,175 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ADAPTER_HH 
-#define ADAPTER_HH 
- 
-/* #undef INT64_IS_LL */ 
-#define HAS_CONSTEXPR 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ADAPTER_HH
+#define ADAPTER_HH
+
+/* #undef INT64_IS_LL */
+#define HAS_CONSTEXPR
 #ifndef _MSC_VER
-#define HAS_PREAD 
-#endif
-#define HAS_STRPTIME 
-#define HAS_STOLL 
-#define HAS_DIAGNOSTIC_PUSH 
-#define HAS_DOUBLE_TO_STRING 
-#define HAS_INT64_TO_STRING 
-#define HAS_PRE_1970 
+#define HAS_PREAD
+#endif
+#define HAS_STRPTIME
+#define HAS_STOLL
+#define HAS_DIAGNOSTIC_PUSH
+#define HAS_DOUBLE_TO_STRING
+#define HAS_INT64_TO_STRING
+#define HAS_PRE_1970
 #define HAS_POST_2038
-#define HAS_STD_ISNAN 
-#define HAS_STD_MUTEX 
+#define HAS_STD_ISNAN
+#define HAS_STD_MUTEX
 #ifndef _MSC_VER
 #define HAS_BUILTIN_OVERFLOW_CHECK
 #endif
-/* #undef NEEDS_REDUNDANT_MOVE */ 
-/* #undef NEEDS_Z_PREFIX */ 
- 
-#include "orc/orc-config.hh" 
-#include <string> 
- 
-#ifdef _MSC_VER 
-#include <BaseTsd.h> 
-typedef SSIZE_T ssize_t; 
-#define timegm(tm) _mkgmtime(tm) 
-#define gmtime_r(timep, result) (gmtime_s(result, timep) ? NULL : result) 
-#define asctime_r(tm, buf) (asctime_s(buf, 26, tm) ? NULL : buf) 
-#endif 
- 
-#ifndef HAS_STOLL 
-  // A poor man's stoll that converts str to a long long int base 10 
-  namespace std { 
-    int64_t stoll(std::string str); 
-  } 
-#endif 
- 
-#ifndef HAS_STRPTIME 
-  char* strptime(const char* buf, const char* format, struct tm* tm); 
-#endif 
- 
-#ifndef HAS_PREAD 
-  ssize_t pread(int fd, void* buf, size_t count, off_t offset); 
-#endif 
- 
-#ifdef INT64_IS_LL 
-  #define INT64_FORMAT_STRING "ll" 
-#else 
-  #define INT64_FORMAT_STRING "l" 
-#endif 
- 
-#ifndef ORC_CXX_HAS_NOEXCEPT 
-  #define noexcept ORC_NOEXCEPT 
-#endif 
- 
-#ifndef ORC_CXX_HAS_OVERRIDE 
-  #define override ORC_OVERRIDE 
-#endif 
- 
-#ifdef HAS_DIAGNOSTIC_PUSH 
-  #ifdef __clang__ 
-    #define DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") 
-    #define DIAGNOSTIC_POP _Pragma("clang diagnostic pop") 
-  #elif defined(__GNUC__) 
-    #define DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") 
-    #define DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") 
-  #elif defined(_MSC_VER) 
-    #define DIAGNOSTIC_PUSH __pragma(warning(push)) 
-    #define DIAGNOSTIC_POP __pragma(warning(pop)) 
-  #else 
-    #error("Unknown compiler") 
-  #endif 
-#else 
-  #define DIAGNOSTIC_PUSH 
-  #define DIAGNOSTIC_POP 
-#endif 
- 
-#define PRAGMA(TXT) _Pragma(#TXT) 
- 
-  #define DIAGNOSTIC_IGNORE(XXX) 
- 
-#ifndef ORC_CXX_HAS_UNIQUE_PTR 
-  #define unique_ptr auto_ptr 
-#endif 
- 
-#ifndef UINT32_MAX 
-  #define UINT32_MAX 0xffffffff 
-#endif 
- 
-#ifndef INT64_MAX 
-  #define INT64_MAX 0x7fffffffffffffff 
-#endif 
- 
-#ifndef INT64_MIN 
-  #define INT64_MIN (-0x7fffffffffffffff - 1) 
-#endif 
- 
-#define GTEST_LANG_CXX11 0 
- 
-#ifdef NEEDS_REDUNDANT_MOVE 
-  #define REDUNDANT_MOVE(XXX) std::move(XXX) 
-#else 
-  #define REDUNDANT_MOVE(XXX) XXX 
-#endif 
- 
-#ifndef HAS_STD_ISNAN 
-  #include <math.h> 
-  #define std::isnan(XXX) isnan(XXX) 
-#else 
-  #include <cmath> 
-#endif 
- 
-#ifndef HAS_STD_MUTEX 
-  #include <pthread.h> 
-  namespace orc { 
-    /** 
-     * Lock guard for pthread_mutex_t object using RAII 
-     * The Lock is automatically release when exiting current scope. 
-     */ 
-    class LockORC { 
-      public: 
-        explicit LockORC(pthread_mutex_t& mutex) : mutex_ref_(mutex) { 
-          pthread_mutex_lock(&mutex_ref_); 
-        } 
-        ~LockORC() { pthread_mutex_unlock(&mutex_ref_); } 
-      private: 
-        // no default constructor 
-        LockORC(); 
-        // prohibit copying 
-        LockORC(const LockORC&); 
-        LockORC& operator=(const LockORC&); 
- 
-        pthread_mutex_t& mutex_ref_; 
-    }; 
-  } 
-  #define std::mutex pthread_mutex_t 
-  #define std::lock_guard<std::mutex> LockORC 
-#else 
-  #include <mutex> 
-#endif 
- 
-#ifdef NEEDS_Z_PREFIX 
-#define Z_PREFIX 1 
-#endif 
- 
-namespace orc { 
-  std::string to_string(double val); 
-  std::string to_string(int64_t val); 
-} 
- 
+/* #undef NEEDS_REDUNDANT_MOVE */
+/* #undef NEEDS_Z_PREFIX */
+
+#include "orc/orc-config.hh"
+#include <string>
+
+#ifdef _MSC_VER
+#include <BaseTsd.h>
+typedef SSIZE_T ssize_t;
+#define timegm(tm) _mkgmtime(tm)
+#define gmtime_r(timep, result) (gmtime_s(result, timep) ? NULL : result)
+#define asctime_r(tm, buf) (asctime_s(buf, 26, tm) ? NULL : buf)
+#endif
+
+#ifndef HAS_STOLL
+  // A poor man's stoll that converts str to a long long int base 10
+  namespace std {
+    int64_t stoll(std::string str);
+  }
+#endif
+
+#ifndef HAS_STRPTIME
+  char* strptime(const char* buf, const char* format, struct tm* tm);
+#endif
+
+#ifndef HAS_PREAD
+  ssize_t pread(int fd, void* buf, size_t count, off_t offset);
+#endif
+
+#ifdef INT64_IS_LL
+  #define INT64_FORMAT_STRING "ll"
+#else
+  #define INT64_FORMAT_STRING "l"
+#endif
+
+#ifndef ORC_CXX_HAS_NOEXCEPT
+  #define noexcept ORC_NOEXCEPT
+#endif
+
+#ifndef ORC_CXX_HAS_OVERRIDE
+  #define override ORC_OVERRIDE
+#endif
+
+#ifdef HAS_DIAGNOSTIC_PUSH
+  #ifdef __clang__
+    #define DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
+    #define DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
+  #elif defined(__GNUC__)
+    #define DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
+    #define DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
+  #elif defined(_MSC_VER)
+    #define DIAGNOSTIC_PUSH __pragma(warning(push))
+    #define DIAGNOSTIC_POP __pragma(warning(pop))
+  #else
+    #error("Unknown compiler")
+  #endif
+#else
+  #define DIAGNOSTIC_PUSH
+  #define DIAGNOSTIC_POP
+#endif
+
+#define PRAGMA(TXT) _Pragma(#TXT)
+
+  #define DIAGNOSTIC_IGNORE(XXX)
+
+#ifndef ORC_CXX_HAS_UNIQUE_PTR
+  #define unique_ptr auto_ptr
+#endif
+
+#ifndef UINT32_MAX
+  #define UINT32_MAX 0xffffffff
+#endif
+
+#ifndef INT64_MAX
+  #define INT64_MAX 0x7fffffffffffffff
+#endif
+
+#ifndef INT64_MIN
+  #define INT64_MIN (-0x7fffffffffffffff - 1)
+#endif
+
+#define GTEST_LANG_CXX11 0
+
+#ifdef NEEDS_REDUNDANT_MOVE
+  #define REDUNDANT_MOVE(XXX) std::move(XXX)
+#else
+  #define REDUNDANT_MOVE(XXX) XXX
+#endif
+
+#ifndef HAS_STD_ISNAN
+  #include <math.h>
+  #define std::isnan(XXX) isnan(XXX)
+#else
+  #include <cmath>
+#endif
+
+#ifndef HAS_STD_MUTEX
+  #include <pthread.h>
+  namespace orc {
+    /**
+     * Lock guard for pthread_mutex_t object using RAII
+     * The Lock is automatically release when exiting current scope.
+     */
+    class LockORC {
+      public:
+        explicit LockORC(pthread_mutex_t& mutex) : mutex_ref_(mutex) {
+          pthread_mutex_lock(&mutex_ref_);
+        }
+        ~LockORC() { pthread_mutex_unlock(&mutex_ref_); }
+      private:
+        // no default constructor
+        LockORC();
+        // prohibit copying
+        LockORC(const LockORC&);
+        LockORC& operator=(const LockORC&);
+
+        pthread_mutex_t& mutex_ref_;
+    };
+  }
+  #define std::mutex pthread_mutex_t
+  #define std::lock_guard<std::mutex> LockORC
+#else
+  #include <mutex>
+#endif
+
+#ifdef NEEDS_Z_PREFIX
+#define Z_PREFIX 1
+#endif
+
+namespace orc {
+  std::string to_string(double val);
+  std::string to_string(int64_t val);
+}
+
 #ifdef HAS_BUILTIN_OVERFLOW_CHECK
   #define multiplyExact !__builtin_mul_overflow
   #define addExact !__builtin_add_overflow
@@ -204,8 +204,8 @@ namespace orc {
 }
 #endif
 
-#ifndef HAS_CONSTEXPR 
-#define constexpr const 
-#endif 
- 
-#endif /* ADAPTER_HH */ 
+#ifndef HAS_CONSTEXPR
+#define constexpr const
+#endif
+
+#endif /* ADAPTER_HH */
diff --git a/contrib/libs/apache/orc/c++/src/BloomFilter.cc b/contrib/libs/apache/orc/c++/src/BloomFilter.cc
index 8ec0acda8c..8a1f1880e7 100644
--- a/contrib/libs/apache/orc/c++/src/BloomFilter.cc
+++ b/contrib/libs/apache/orc/c++/src/BloomFilter.cc
@@ -1,328 +1,328 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "BloomFilter.hh" 
-#include "Murmur3.hh" 
- 
-namespace orc { 
- 
-  constexpr uint64_t BITS_OF_LONG = 64; 
-  constexpr uint8_t  SHIFT_6_BITS = 6; 
-  constexpr uint8_t  SHIFT_3_BITS = 3; 
- 
-  static bool isLittleEndian() { 
-    static union { uint32_t i; char c[4]; } num = { 0x01020304 }; 
-    return num.c[0] == 4; 
-  } 
- 
-  /** 
-   * Implementation of BitSet 
-   */ 
-  BitSet::BitSet(uint64_t numBits) { 
-    mData.resize(static_cast<size_t>(ceil( 
-      static_cast<double>(numBits) / BITS_OF_LONG)), 0); 
-  } 
- 
-  BitSet::BitSet(const uint64_t * bits, uint64_t numBits) { 
-    // caller should make sure numBits is multiple of 64 
-    mData.resize(numBits >> SHIFT_6_BITS, 0); 
-    memcpy(mData.data(), bits, numBits >> SHIFT_3_BITS); 
-  } 
- 
-  void BitSet::set(uint64_t index) { 
-    mData[index >> SHIFT_6_BITS] |= (1ULL << (index % BITS_OF_LONG)); 
-  } 
- 
-  bool BitSet::get(uint64_t index) { 
-    return (mData[index >> SHIFT_6_BITS] & (1ULL << (index % BITS_OF_LONG))) != 0; 
-  } 
- 
-  uint64_t BitSet::bitSize() { 
-    return mData.size() << SHIFT_6_BITS; 
-  } 
- 
-  void BitSet::merge(const BitSet& other) { 
-    if (mData.size() != other.mData.size()) { 
-      std::stringstream ss; 
-      ss << "BitSet must be of equal length (" 
-         << mData.size() << " != " << other.mData.size() << ")"; 
-      throw std::logic_error(ss.str()); 
-    } 
- 
-    for (size_t i = 0; i != mData.size(); i++) { 
-      mData[i] |= other.mData[i]; 
-    } 
-  } 
- 
-  void BitSet::clear() { 
-    memset(mData.data(), 0, sizeof(uint64_t) * mData.size()); 
-  } 
- 
-  const uint64_t * BitSet::getData() const { 
-    return mData.data(); 
-  } 
- 
-  bool BitSet::operator==(const BitSet& other) const { 
-    return mData == other.mData; 
-  } 
- 
-  /** 
-   * Helper functions 
-   */ 
-  void checkArgument(bool expression, const std::string& message) { 
-    if (!expression) { 
-      throw std::logic_error(message); 
-    } 
-  } 
- 
-  int32_t optimalNumOfHashFunctions(uint64_t expectedEntries, uint64_t numBits) { 
-    double n = static_cast<double>(expectedEntries); 
-    return std::max<int32_t>(1, static_cast<int32_t>( 
-      std::round(static_cast<double>(numBits) / n * std::log(2.0)))); 
-  } 
- 
-  int32_t optimalNumOfBits(uint64_t expectedEntries, double fpp) { 
-    double n = static_cast<double>(expectedEntries); 
-    return static_cast<int32_t>(-n * std::log(fpp) / (std::log(2.0) * std::log(2.0))); 
-  } 
- 
-  // We use the trick mentioned in "Less Hashing, Same Performance: 
-  // Building a Better Bloom Filter" by Kirsch et.al. From abstract 
-  // 'only two hash functions are necessary to effectively implement 
-  // a Bloom filter without any loss in the asymptotic false positive 
-  // probability' 
-  // Lets split up 64-bit hashcode into two 32-bit hash codes and employ 
-  // the technique mentioned in the above paper 
-  inline uint64_t getBytesHash(const char * data, int64_t length) { 
-    if (data == nullptr) { 
-      return Murmur3::NULL_HASHCODE; 
-    } 
- 
-    return Murmur3::hash64(reinterpret_cast<const uint8_t *>(data), 
-                           static_cast<uint32_t>(length)); 
-  } 
- 
-  /** 
-   * Implementation of BloomFilter 
-   */ 
-  BloomFilterImpl::BloomFilterImpl(uint64_t expectedEntries, double fpp) { 
-    checkArgument(expectedEntries > 0, 
-                  "expectedEntries should be > 0"); 
-    checkArgument(fpp > 0.0 && fpp < 1.0, 
-                  "False positive probability should be > 0.0 & < 1.0"); 
- 
-    uint64_t nb = static_cast<uint64_t>(optimalNumOfBits(expectedEntries, fpp)); 
-    // make 'mNumBits' multiple of 64 
-    mNumBits = nb + (BITS_OF_LONG - (nb % BITS_OF_LONG)); 
-    mNumHashFunctions = optimalNumOfHashFunctions(expectedEntries, mNumBits); 
-    mBitSet.reset(new BitSet(mNumBits)); 
-  } 
- 
-  void BloomFilterImpl::addBytes(const char * data, int64_t length) { 
-    uint64_t hash64 = getBytesHash(data, length); 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BloomFilter.hh"
+#include "Murmur3.hh"
+
+namespace orc {
+
+  constexpr uint64_t BITS_OF_LONG = 64;
+  constexpr uint8_t  SHIFT_6_BITS = 6;
+  constexpr uint8_t  SHIFT_3_BITS = 3;
+
+  static bool isLittleEndian() {
+    static union { uint32_t i; char c[4]; } num = { 0x01020304 };
+    return num.c[0] == 4;
+  }
+
+  /**
+   * Implementation of BitSet
+   */
+  BitSet::BitSet(uint64_t numBits) {
+    mData.resize(static_cast<size_t>(ceil(
+      static_cast<double>(numBits) / BITS_OF_LONG)), 0);
+  }
+
+  BitSet::BitSet(const uint64_t * bits, uint64_t numBits) {
+    // caller should make sure numBits is multiple of 64
+    mData.resize(numBits >> SHIFT_6_BITS, 0);
+    memcpy(mData.data(), bits, numBits >> SHIFT_3_BITS);
+  }
+
+  void BitSet::set(uint64_t index) {
+    mData[index >> SHIFT_6_BITS] |= (1ULL << (index % BITS_OF_LONG));
+  }
+
+  bool BitSet::get(uint64_t index) {
+    return (mData[index >> SHIFT_6_BITS] & (1ULL << (index % BITS_OF_LONG))) != 0;
+  }
+
+  uint64_t BitSet::bitSize() {
+    return mData.size() << SHIFT_6_BITS;
+  }
+
+  void BitSet::merge(const BitSet& other) {
+    if (mData.size() != other.mData.size()) {
+      std::stringstream ss;
+      ss << "BitSet must be of equal length ("
+         << mData.size() << " != " << other.mData.size() << ")";
+      throw std::logic_error(ss.str());
+    }
+
+    for (size_t i = 0; i != mData.size(); i++) {
+      mData[i] |= other.mData[i];
+    }
+  }
+
+  void BitSet::clear() {
+    memset(mData.data(), 0, sizeof(uint64_t) * mData.size());
+  }
+
+  const uint64_t * BitSet::getData() const {
+    return mData.data();
+  }
+
+  bool BitSet::operator==(const BitSet& other) const {
+    return mData == other.mData;
+  }
+
+  /**
+   * Helper functions
+   */
+  void checkArgument(bool expression, const std::string& message) {
+    if (!expression) {
+      throw std::logic_error(message);
+    }
+  }
+
+  int32_t optimalNumOfHashFunctions(uint64_t expectedEntries, uint64_t numBits) {
+    double n = static_cast<double>(expectedEntries);
+    return std::max<int32_t>(1, static_cast<int32_t>(
+      std::round(static_cast<double>(numBits) / n * std::log(2.0))));
+  }
+
+  int32_t optimalNumOfBits(uint64_t expectedEntries, double fpp) {
+    double n = static_cast<double>(expectedEntries);
+    return static_cast<int32_t>(-n * std::log(fpp) / (std::log(2.0) * std::log(2.0)));
+  }
+
+  // We use the trick mentioned in "Less Hashing, Same Performance:
+  // Building a Better Bloom Filter" by Kirsch et.al. From abstract
+  // 'only two hash functions are necessary to effectively implement
+  // a Bloom filter without any loss in the asymptotic false positive
+  // probability'
+  // Lets split up 64-bit hashcode into two 32-bit hash codes and employ
+  // the technique mentioned in the above paper
+  inline uint64_t getBytesHash(const char * data, int64_t length) {
+    if (data == nullptr) {
+      return Murmur3::NULL_HASHCODE;
+    }
+
+    return Murmur3::hash64(reinterpret_cast<const uint8_t *>(data),
+                           static_cast<uint32_t>(length));
+  }
+
+  /**
+   * Implementation of BloomFilter
+   */
+  BloomFilterImpl::BloomFilterImpl(uint64_t expectedEntries, double fpp) {
+    checkArgument(expectedEntries > 0,
+                  "expectedEntries should be > 0");
+    checkArgument(fpp > 0.0 && fpp < 1.0,
+                  "False positive probability should be > 0.0 & < 1.0");
+
+    uint64_t nb = static_cast<uint64_t>(optimalNumOfBits(expectedEntries, fpp));
+    // make 'mNumBits' multiple of 64
+    mNumBits = nb + (BITS_OF_LONG - (nb % BITS_OF_LONG));
+    mNumHashFunctions = optimalNumOfHashFunctions(expectedEntries, mNumBits);
+    mBitSet.reset(new BitSet(mNumBits));
+  }
+
+  void BloomFilterImpl::addBytes(const char * data, int64_t length) {
+    uint64_t hash64 = getBytesHash(data, length);
     addHash(static_cast<int64_t>(hash64));
-  } 
- 
-  void BloomFilterImpl::addLong(int64_t data) { 
+  }
+
+  void BloomFilterImpl::addLong(int64_t data) {
     addHash(getLongHash(data));
-  } 
- 
-  bool BloomFilterImpl::testBytes(const char * data, int64_t length) const { 
-    uint64_t hash64 = getBytesHash(data, length); 
+  }
+
+  bool BloomFilterImpl::testBytes(const char * data, int64_t length) const {
+    uint64_t hash64 = getBytesHash(data, length);
     return testHash(static_cast<int64_t>(hash64));
-  } 
- 
-  bool BloomFilterImpl::testLong(int64_t data) const { 
+  }
+
+  bool BloomFilterImpl::testLong(int64_t data) const {
     return testHash(getLongHash(data));
-  } 
- 
-  uint64_t BloomFilterImpl::sizeInBytes() const { 
-    return getBitSize() >> SHIFT_3_BITS; 
-  } 
- 
-  uint64_t BloomFilterImpl::getBitSize() const { 
-    return mBitSet->bitSize(); 
-  } 
- 
-  int32_t BloomFilterImpl::getNumHashFunctions() const { 
-    return mNumHashFunctions; 
-  } 
- 
-  DIAGNOSTIC_PUSH 
- 
-#if defined(__clang__) 
-  DIAGNOSTIC_IGNORE("-Wundefined-reinterpret-cast") 
-#endif 
- 
-#if defined(__GNUC__) 
-  DIAGNOSTIC_IGNORE("-Wstrict-aliasing") 
-#endif 
- 
-  // caller should make sure input proto::BloomFilter is valid since 
-  // no check will be performed in the following constructor 
-  BloomFilterImpl::BloomFilterImpl(const proto::BloomFilter& bloomFilter) { 
-    mNumHashFunctions = static_cast<int32_t>(bloomFilter.numhashfunctions()); 
- 
-    const std::string& bitsetStr = bloomFilter.utf8bitset(); 
-    mNumBits = bitsetStr.size() << SHIFT_3_BITS; 
-    checkArgument(mNumBits % BITS_OF_LONG == 0, "numBits should be multiple of 64!"); 
- 
-    const uint64_t * bitset = reinterpret_cast<const uint64_t *>(bitsetStr.data()); 
-    if (isLittleEndian()) { 
-      mBitSet.reset(new BitSet(bitset, mNumBits)); 
-    } else { 
-      std::vector<uint64_t> longs(mNumBits >> SHIFT_6_BITS); 
-      for (size_t i = 0; i != longs.size(); ++i) { 
-        // convert little-endian to big-endian 
-        const uint64_t src = bitset[i]; 
-        uint64_t& dst = longs[i]; 
-        for (size_t bit = 0; bit != 64; bit += 8) { 
-          dst |= (((src & (0xFFu << bit)) >> bit) << (56 - bit)); 
-        } 
-      } 
- 
-      mBitSet.reset(new BitSet(longs.data(), mNumBits)); 
-    } 
-  } 
- 
-  void BloomFilterImpl::addDouble(double data) { 
-    addLong(reinterpret_cast<int64_t&>(data)); 
-  } 
- 
-  bool BloomFilterImpl::testDouble(double data) const{ 
-    return testLong(reinterpret_cast<int64_t&>(data)); 
-  } 
- 
-  DIAGNOSTIC_POP 
- 
+  }
+
+  uint64_t BloomFilterImpl::sizeInBytes() const {
+    return getBitSize() >> SHIFT_3_BITS;
+  }
+
+  uint64_t BloomFilterImpl::getBitSize() const {
+    return mBitSet->bitSize();
+  }
+
+  int32_t BloomFilterImpl::getNumHashFunctions() const {
+    return mNumHashFunctions;
+  }
+
+  DIAGNOSTIC_PUSH
+
+#if defined(__clang__)
+  DIAGNOSTIC_IGNORE("-Wundefined-reinterpret-cast")
+#endif
+
+#if defined(__GNUC__)
+  DIAGNOSTIC_IGNORE("-Wstrict-aliasing")
+#endif
+
+  // caller should make sure input proto::BloomFilter is valid since
+  // no check will be performed in the following constructor
+  BloomFilterImpl::BloomFilterImpl(const proto::BloomFilter& bloomFilter) {
+    mNumHashFunctions = static_cast<int32_t>(bloomFilter.numhashfunctions());
+
+    const std::string& bitsetStr = bloomFilter.utf8bitset();
+    mNumBits = bitsetStr.size() << SHIFT_3_BITS;
+    checkArgument(mNumBits % BITS_OF_LONG == 0, "numBits should be multiple of 64!");
+
+    const uint64_t * bitset = reinterpret_cast<const uint64_t *>(bitsetStr.data());
+    if (isLittleEndian()) {
+      mBitSet.reset(new BitSet(bitset, mNumBits));
+    } else {
+      std::vector<uint64_t> longs(mNumBits >> SHIFT_6_BITS);
+      for (size_t i = 0; i != longs.size(); ++i) {
+        // convert little-endian to big-endian
+        const uint64_t src = bitset[i];
+        uint64_t& dst = longs[i];
+        for (size_t bit = 0; bit != 64; bit += 8) {
+          dst |= (((src & (0xFFu << bit)) >> bit) << (56 - bit));
+        }
+      }
+
+      mBitSet.reset(new BitSet(longs.data(), mNumBits));
+    }
+  }
+
+  void BloomFilterImpl::addDouble(double data) {
+    addLong(reinterpret_cast<int64_t&>(data));
+  }
+
+  bool BloomFilterImpl::testDouble(double data) const{
+    return testLong(reinterpret_cast<int64_t&>(data));
+  }
+
+  DIAGNOSTIC_POP
+
   void BloomFilterImpl::addHash(int64_t hash64) {
-    int32_t hash1 = static_cast<int32_t>(hash64 & 0xffffffff); 
+    int32_t hash1 = static_cast<int32_t>(hash64 & 0xffffffff);
     // In Java codes, we use "hash64 >>> 32" which is an unsigned shift op.
     // So we cast hash64 to uint64_t here for an unsigned right shift.
     int32_t hash2 = static_cast<int32_t>(static_cast<uint64_t>(hash64) >> 32);
- 
-    for (int32_t i = 1; i <= mNumHashFunctions; ++i) { 
-      int32_t combinedHash = hash1 + i * hash2; 
-      // hashcode should be positive, flip all the bits if it's negative 
-      if (combinedHash < 0) { 
-        combinedHash = ~combinedHash; 
-      } 
-      uint64_t pos = static_cast<uint64_t>(combinedHash) % mNumBits; 
-      mBitSet->set(pos); 
-    } 
-  } 
- 
+
+    for (int32_t i = 1; i <= mNumHashFunctions; ++i) {
+      int32_t combinedHash = hash1 + i * hash2;
+      // hashcode should be positive, flip all the bits if it's negative
+      if (combinedHash < 0) {
+        combinedHash = ~combinedHash;
+      }
+      uint64_t pos = static_cast<uint64_t>(combinedHash) % mNumBits;
+      mBitSet->set(pos);
+    }
+  }
+
   bool BloomFilterImpl::testHash(int64_t hash64) const{
-    int32_t hash1 = static_cast<int32_t>(hash64 & 0xffffffff); 
+    int32_t hash1 = static_cast<int32_t>(hash64 & 0xffffffff);
     // In Java codes, we use "hash64 >>> 32" which is an unsigned shift op.
     // So we cast hash64 to uint64_t here for an unsigned right shift.
     int32_t hash2 = static_cast<int32_t>(static_cast<uint64_t>(hash64) >> 32);
- 
-    for (int32_t i = 1; i <= mNumHashFunctions; ++i) { 
-      int32_t combinedHash = hash1 + i * hash2; 
-      // hashcode should be positive, flip all the bits if it's negative 
-      if (combinedHash < 0) { 
-        combinedHash = ~combinedHash; 
-      } 
-      uint64_t pos = static_cast<uint64_t>(combinedHash) % mNumBits; 
-      if (!mBitSet->get(pos)) { 
-        return false; 
-      } 
-    } 
-    return true; 
-  } 
- 
-  void BloomFilterImpl::merge(const BloomFilterImpl& other) { 
-    if (mNumBits != other.mNumBits || mNumHashFunctions != other.mNumHashFunctions) { 
-      std::stringstream ss; 
-      ss << "BloomFilters are not compatible for merging: " 
-         << "this: numBits:" << mNumBits 
-         << ",numHashFunctions:" << mNumHashFunctions 
-         << ", that: numBits:" << other.mNumBits 
-         << ",numHashFunctions:" << other.mNumHashFunctions; 
-      throw std::logic_error(ss.str()); 
-    } 
- 
-    mBitSet->merge(*other.mBitSet); 
-  } 
- 
-  void BloomFilterImpl::reset() { 
-    mBitSet->clear(); 
-  } 
- 
-  void BloomFilterImpl::serialize(proto::BloomFilter& bloomFilter) const { 
-    bloomFilter.set_numhashfunctions(static_cast<uint32_t>(mNumHashFunctions)); 
- 
-    // According to ORC standard, the encoding is a sequence of bytes with 
-    // a little endian encoding in the utf8bitset field. 
-    if (isLittleEndian()) { 
-      // bytes are already organized in little endian; thus no conversion needed 
-      const char * bitset = reinterpret_cast<const char *>(mBitSet->getData()); 
-      bloomFilter.set_utf8bitset(bitset, sizeInBytes()); 
-    } else { 
-      std::vector<uint64_t> bitset(sizeInBytes() / sizeof(uint64_t), 0); 
-      const uint64_t * longs = mBitSet->getData(); 
-      for (size_t i = 0; i != bitset.size(); ++i) { 
-        uint64_t& dst = bitset[i]; 
-        const uint64_t src = longs[i]; 
-        // convert big-endian to little-endian 
-        for (size_t bit = 0; bit != 64; bit += 8) { 
-          dst |= (((src & (0xFFu << bit)) >> bit) << (56 - bit)); 
-        } 
-      } 
-      bloomFilter.set_utf8bitset(bitset.data(), sizeInBytes()); 
-    } 
-  } 
- 
-  bool BloomFilterImpl::operator==(const BloomFilterImpl& other) const { 
-    return mNumBits == other.mNumBits && 
-           mNumHashFunctions == other.mNumHashFunctions && 
-           *mBitSet == *other.mBitSet; 
-  } 
- 
-  BloomFilter::~BloomFilter() { 
-    // PASS 
-  } 
- 
-  std::unique_ptr<BloomFilter> BloomFilterUTF8Utils::deserialize( 
-    const proto::Stream_Kind& streamKind, 
-    const proto::ColumnEncoding& encoding, 
-    const proto::BloomFilter& bloomFilter) { 
- 
-    std::unique_ptr<BloomFilter> ret(nullptr); 
- 
-    // only BLOOM_FILTER_UTF8 is supported 
-    if (streamKind != proto::Stream_Kind_BLOOM_FILTER_UTF8) { 
-      return ret; 
-    } 
- 
-    // make sure we don't use unknown encodings or original timestamp encodings 
-    if (!encoding.has_bloomencoding() || encoding.bloomencoding() != 1) { 
-      return ret; 
-    } 
- 
-    // make sure all required fields exist 
-    if (!bloomFilter.has_numhashfunctions() || !bloomFilter.has_utf8bitset()) { 
-      return ret; 
-    } 
- 
-    ret.reset(new BloomFilterImpl(bloomFilter)); 
-    return ret; 
-  } 
- 
-} 
+
+    for (int32_t i = 1; i <= mNumHashFunctions; ++i) {
+      int32_t combinedHash = hash1 + i * hash2;
+      // hashcode should be positive, flip all the bits if it's negative
+      if (combinedHash < 0) {
+        combinedHash = ~combinedHash;
+      }
+      uint64_t pos = static_cast<uint64_t>(combinedHash) % mNumBits;
+      if (!mBitSet->get(pos)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  void BloomFilterImpl::merge(const BloomFilterImpl& other) {
+    if (mNumBits != other.mNumBits || mNumHashFunctions != other.mNumHashFunctions) {
+      std::stringstream ss;
+      ss << "BloomFilters are not compatible for merging: "
+         << "this: numBits:" << mNumBits
+         << ",numHashFunctions:" << mNumHashFunctions
+         << ", that: numBits:" << other.mNumBits
+         << ",numHashFunctions:" << other.mNumHashFunctions;
+      throw std::logic_error(ss.str());
+    }
+
+    mBitSet->merge(*other.mBitSet);
+  }
+
+  void BloomFilterImpl::reset() {
+    mBitSet->clear();
+  }
+
+  void BloomFilterImpl::serialize(proto::BloomFilter& bloomFilter) const {
+    bloomFilter.set_numhashfunctions(static_cast<uint32_t>(mNumHashFunctions));
+
+    // According to ORC standard, the encoding is a sequence of bytes with
+    // a little endian encoding in the utf8bitset field.
+    if (isLittleEndian()) {
+      // bytes are already organized in little endian; thus no conversion needed
+      const char * bitset = reinterpret_cast<const char *>(mBitSet->getData());
+      bloomFilter.set_utf8bitset(bitset, sizeInBytes());
+    } else {
+      std::vector<uint64_t> bitset(sizeInBytes() / sizeof(uint64_t), 0);
+      const uint64_t * longs = mBitSet->getData();
+      for (size_t i = 0; i != bitset.size(); ++i) {
+        uint64_t& dst = bitset[i];
+        const uint64_t src = longs[i];
+        // convert big-endian to little-endian
+        for (size_t bit = 0; bit != 64; bit += 8) {
+          dst |= (((src & (0xFFu << bit)) >> bit) << (56 - bit));
+        }
+      }
+      bloomFilter.set_utf8bitset(bitset.data(), sizeInBytes());
+    }
+  }
+
+  bool BloomFilterImpl::operator==(const BloomFilterImpl& other) const {
+    return mNumBits == other.mNumBits &&
+           mNumHashFunctions == other.mNumHashFunctions &&
+           *mBitSet == *other.mBitSet;
+  }
+
+  BloomFilter::~BloomFilter() {
+    // PASS
+  }
+
+  std::unique_ptr<BloomFilter> BloomFilterUTF8Utils::deserialize(
+    const proto::Stream_Kind& streamKind,
+    const proto::ColumnEncoding& encoding,
+    const proto::BloomFilter& bloomFilter) {
+
+    std::unique_ptr<BloomFilter> ret(nullptr);
+
+    // only BLOOM_FILTER_UTF8 is supported
+    if (streamKind != proto::Stream_Kind_BLOOM_FILTER_UTF8) {
+      return ret;
+    }
+
+    // make sure we don't use unknown encodings or original timestamp encodings
+    if (!encoding.has_bloomencoding() || encoding.bloomencoding() != 1) {
+      return ret;
+    }
+
+    // make sure all required fields exist
+    if (!bloomFilter.has_numhashfunctions() || !bloomFilter.has_utf8bitset()) {
+      return ret;
+    }
+
+    ret.reset(new BloomFilterImpl(bloomFilter));
+    return ret;
+  }
+
+}
diff --git a/contrib/libs/apache/orc/c++/src/BloomFilter.hh b/contrib/libs/apache/orc/c++/src/BloomFilter.hh
index ab2006bdae..cf18a46fd9 100644
--- a/contrib/libs/apache/orc/c++/src/BloomFilter.hh
+++ b/contrib/libs/apache/orc/c++/src/BloomFilter.hh
@@ -1,197 +1,197 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_BLOOMFILTER_IMPL_HH 
-#define ORC_BLOOMFILTER_IMPL_HH 
- 
-#include "orc/BloomFilter.hh" 
-#include "wrap/orc-proto-wrapper.hh" 
- 
-#include <cmath> 
-#include <sstream> 
-#include <vector> 
- 
-namespace orc { 
- 
-  /** 
-   * Bare metal bit set implementation. For performance reasons, this implementation does not check 
-   * for index bounds nor expand the bit set size if the specified index is greater than the size. 
-   */ 
-  class BitSet { 
-  public: 
-    /** 
-     * Creates an empty BitSet 
-     * 
-     * @param numBits - number of bits used 
-     */ 
-    BitSet(uint64_t numBits); 
- 
-    /** 
-     * Creates BitSet from serialized uint64_t buffer 
-     * 
-     * @param bits - serialized uint64_t buffer of bitset 
-     * @param numBits - number of bits used 
-     */ 
-    BitSet(const uint64_t * bits, uint64_t numBits); 
- 
-    /** 
-     * Sets the bit at specified index. 
-     * 
-     * @param index - position 
-     */ 
-    void set(uint64_t index); 
- 
-    /** 
-     * Returns true if the bit is set in the specified index. 
-     * 
-     * @param index - position 
-     * @return - value at the bit position 
-     */ 
-    bool get(uint64_t index); 
- 
-    /** 
-     * Number of bits 
-     */ 
-    uint64_t bitSize(); 
- 
-    /** 
-     * Combines the two BitSets using bitwise OR. 
-     */ 
-    void merge(const BitSet& other); 
- 
-    /** 
-     * Clears the bit set. 
-     */ 
-    void clear(); 
- 
-    /** 
-     * Gets underlying raw data 
-     */ 
-    const uint64_t * getData() const; 
- 
-    /** 
-     * Compares two BitSets 
-     */ 
-    bool operator==(const BitSet& other) const; 
- 
-  private: 
-    std::vector<uint64_t> mData; 
-  }; 
- 
-  /** 
-   * BloomFilter is a probabilistic data structure for set membership check. 
-   * BloomFilters are highly space efficient when compared to using a HashSet. 
-   * Because of the probabilistic nature of bloom filter false positive (element 
-   * not present in bloom filter but test() says true) are possible but false 
-   * negatives are not possible (if element is present then test() will never 
-   * say false). The false positive probability is configurable (default: 5%) 
-   * depending on which storage requirement may increase or decrease. Lower the 
-   * false positive probability greater is the space requirement. 
-   * 
-   * Bloom filters are sensitive to number of elements that will be inserted in 
-   * the bloom filter. During the creation of bloom filter expected number of 
-   * entries must be specified. If the number of insertions exceed the specified 
-   * initial number of entries then false positive probability will increase 
-   * accordingly. 
-   * 
-   * Internally, this implementation of bloom filter uses Murmur3 fast 
-   * non-cryptographic hash algorithm. Although Murmur2 is slightly faster than 
-   * Murmur3 in Java, it suffers from hash collisions for specific sequence of 
-   * repeating bytes. Check the following link for more info 
-   * https://code.google.com/p/smhasher/wiki/MurmurHash2Flaw 
-   * 
-   * Note that this class is here for backwards compatibility, because it uses 
-   * the JVM default character set for strings. All new users should 
-   * BloomFilterUtf8, which always uses UTF8 for the encoding. 
-   */ 
-  class BloomFilterImpl : public BloomFilter { 
-  public: 
-    /** 
-     * Creates an empty BloomFilter 
-     * 
-     * @param expectedEntries - number of entries it will hold 
-     * @param fpp - false positive probability 
-     */ 
-    BloomFilterImpl(uint64_t expectedEntries, double fpp=DEFAULT_FPP); 
- 
-    /** 
-     * Creates a BloomFilter by deserializing the proto-buf version 
-     * 
-     * caller should make sure input proto::BloomFilter is valid 
-     */ 
-    BloomFilterImpl(const proto::BloomFilter& bloomFilter); 
- 
-    /** 
-     * Adds a new element to the BloomFilter 
-     */ 
-    void addBytes(const char * data, int64_t length); 
-    void addLong(int64_t data); 
-    void addDouble(double data); 
- 
-    /** 
-     * Test if the element exists in BloomFilter 
-     */ 
-    bool testBytes(const char * data, int64_t length) const override; 
-    bool testLong(int64_t data) const override; 
-    bool testDouble(double data) const override; 
- 
-    uint64_t sizeInBytes() const; 
-    uint64_t getBitSize() const; 
-    int32_t getNumHashFunctions() const; 
- 
-    void merge(const BloomFilterImpl& other); 
- 
-    void reset(); 
- 
-    bool operator==(const BloomFilterImpl& other) const; 
- 
-  private: 
-    friend struct BloomFilterUTF8Utils; 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_BLOOMFILTER_IMPL_HH
+#define ORC_BLOOMFILTER_IMPL_HH
+
+#include "orc/BloomFilter.hh"
+#include "wrap/orc-proto-wrapper.hh"
+
+#include <cmath>
+#include <sstream>
+#include <vector>
+
+namespace orc {
+
+  /**
+   * Bare metal bit set implementation. For performance reasons, this implementation does not check
+   * for index bounds nor expand the bit set size if the specified index is greater than the size.
+   */
+  class BitSet {
+  public:
+    /**
+     * Creates an empty BitSet
+     *
+     * @param numBits - number of bits used
+     */
+    BitSet(uint64_t numBits);
+
+    /**
+     * Creates BitSet from serialized uint64_t buffer
+     *
+     * @param bits - serialized uint64_t buffer of bitset
+     * @param numBits - number of bits used
+     */
+    BitSet(const uint64_t * bits, uint64_t numBits);
+
+    /**
+     * Sets the bit at specified index.
+     *
+     * @param index - position
+     */
+    void set(uint64_t index);
+
+    /**
+     * Returns true if the bit is set in the specified index.
+     *
+     * @param index - position
+     * @return - value at the bit position
+     */
+    bool get(uint64_t index);
+
+    /**
+     * Number of bits
+     */
+    uint64_t bitSize();
+
+    /**
+     * Combines the two BitSets using bitwise OR.
+     */
+    void merge(const BitSet& other);
+
+    /**
+     * Clears the bit set.
+     */
+    void clear();
+
+    /**
+     * Gets underlying raw data
+     */
+    const uint64_t * getData() const;
+
+    /**
+     * Compares two BitSets
+     */
+    bool operator==(const BitSet& other) const;
+
+  private:
+    std::vector<uint64_t> mData;
+  };
+
+  /**
+   * BloomFilter is a probabilistic data structure for set membership check.
+   * BloomFilters are highly space efficient when compared to using a HashSet.
+   * Because of the probabilistic nature of bloom filter false positive (element
+   * not present in bloom filter but test() says true) are possible but false
+   * negatives are not possible (if element is present then test() will never
+   * say false). The false positive probability is configurable (default: 5%)
+   * depending on which storage requirement may increase or decrease. Lower the
+   * false positive probability greater is the space requirement.
+   *
+   * Bloom filters are sensitive to number of elements that will be inserted in
+   * the bloom filter. During the creation of bloom filter expected number of
+   * entries must be specified. If the number of insertions exceed the specified
+   * initial number of entries then false positive probability will increase
+   * accordingly.
+   *
+   * Internally, this implementation of bloom filter uses Murmur3 fast
+   * non-cryptographic hash algorithm. Although Murmur2 is slightly faster than
+   * Murmur3 in Java, it suffers from hash collisions for specific sequence of
+   * repeating bytes. Check the following link for more info
+   * https://code.google.com/p/smhasher/wiki/MurmurHash2Flaw
+   *
+   * Note that this class is here for backwards compatibility, because it uses
+   * the JVM default character set for strings. All new users should
+   * BloomFilterUtf8, which always uses UTF8 for the encoding.
+   */
+  class BloomFilterImpl : public BloomFilter {
+  public:
+    /**
+     * Creates an empty BloomFilter
+     *
+     * @param expectedEntries - number of entries it will hold
+     * @param fpp - false positive probability
+     */
+    BloomFilterImpl(uint64_t expectedEntries, double fpp=DEFAULT_FPP);
+
+    /**
+     * Creates a BloomFilter by deserializing the proto-buf version
+     *
+     * caller should make sure input proto::BloomFilter is valid
+     */
+    BloomFilterImpl(const proto::BloomFilter& bloomFilter);
+
+    /**
+     * Adds a new element to the BloomFilter
+     */
+    void addBytes(const char * data, int64_t length);
+    void addLong(int64_t data);
+    void addDouble(double data);
+
+    /**
+     * Test if the element exists in BloomFilter
+     */
+    bool testBytes(const char * data, int64_t length) const override;
+    bool testLong(int64_t data) const override;
+    bool testDouble(double data) const override;
+
+    uint64_t sizeInBytes() const;
+    uint64_t getBitSize() const;
+    int32_t getNumHashFunctions() const;
+
+    void merge(const BloomFilterImpl& other);
+
+    void reset();
+
+    bool operator==(const BloomFilterImpl& other) const;
+
+  private:
+    friend struct BloomFilterUTF8Utils;
     friend class TestBloomFilter_testBloomFilterBasicOperations_Test;
- 
-    // compute k hash values from hash64 and set bits 
+
+    // compute k hash values from hash64 and set bits
     void addHash(int64_t hash64);
- 
-    // compute k hash values from hash64 and check bits 
+
+    // compute k hash values from hash64 and check bits
     bool testHash(int64_t hash64) const;
- 
-    void serialize(proto::BloomFilter& bloomFilter) const; 
- 
-  private: 
-    static constexpr double DEFAULT_FPP = 0.05; 
-    uint64_t mNumBits; 
-    int32_t mNumHashFunctions; 
-    std::unique_ptr<BitSet> mBitSet; 
-  }; 
- 
-  struct BloomFilterUTF8Utils { 
-    // serialize BloomFilter in protobuf 
-    static void serialize(const BloomFilterImpl& in, proto::BloomFilter& out) { 
-      in.serialize(out); 
-    } 
- 
-    // deserialize BloomFilter from protobuf 
-    static std::unique_ptr<BloomFilter> 
-    deserialize(const proto::Stream_Kind& streamKind, 
-                const proto::ColumnEncoding& columnEncoding, 
-                const proto::BloomFilter& bloomFilter); 
-  }; 
- 
+
+    void serialize(proto::BloomFilter& bloomFilter) const;
+
+  private:
+    static constexpr double DEFAULT_FPP = 0.05;
+    uint64_t mNumBits;
+    int32_t mNumHashFunctions;
+    std::unique_ptr<BitSet> mBitSet;
+  };
+
+  struct BloomFilterUTF8Utils {
+    // serialize BloomFilter in protobuf
+    static void serialize(const BloomFilterImpl& in, proto::BloomFilter& out) {
+      in.serialize(out);
+    }
+
+    // deserialize BloomFilter from protobuf
+    static std::unique_ptr<BloomFilter>
+    deserialize(const proto::Stream_Kind& streamKind,
+                const proto::ColumnEncoding& columnEncoding,
+                const proto::BloomFilter& bloomFilter);
+  };
+
   // Thomas Wang's integer hash function
   // http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm
   // Put this in header file so tests can use it as well.
@@ -205,6 +205,6 @@ namespace orc {
     key = key + (key << 31);
     return key;
   }
-} 
- 
-#endif //ORC_BLOOMFILTER_IMPL_HH 
+}
+
+#endif //ORC_BLOOMFILTER_IMPL_HH
diff --git a/contrib/libs/apache/orc/c++/src/ByteRLE.cc b/contrib/libs/apache/orc/c++/src/ByteRLE.cc
index 30f5148b7c..ee1a4575dc 100644
--- a/contrib/libs/apache/orc/c++/src/ByteRLE.cc
+++ b/contrib/libs/apache/orc/c++/src/ByteRLE.cc
@@ -1,626 +1,626 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include <algorithm> 
-#include <iostream> 
-#include <string.h> 
-#include <utility> 
- 
-#include "ByteRLE.hh" 
-#include "orc/Exceptions.hh" 
- 
-namespace orc { 
- 
-  const int MINIMUM_REPEAT = 3; 
-  const int MAXIMUM_REPEAT = 127 + MINIMUM_REPEAT; 
-  const int MAX_LITERAL_SIZE = 128; 
- 
-  ByteRleEncoder::~ByteRleEncoder() { 
-    // PASS 
-  } 
- 
-  class ByteRleEncoderImpl : public ByteRleEncoder { 
-  public: 
-    ByteRleEncoderImpl(std::unique_ptr<BufferedOutputStream> output); 
-    virtual ~ByteRleEncoderImpl() override; 
- 
-    /** 
-     * Encode the next batch of values. 
-     * @param data to be encoded 
-     * @param numValues the number of values to be encoded 
-     * @param notNull If the pointer is null, all values are read. If the 
-     *    pointer is not null, positions that are false are skipped. 
-     */ 
-    virtual void add(const char* data, uint64_t numValues, 
-                      const char* notNull) override; 
- 
-    /** 
-     * Get size of buffer used so far. 
-     */ 
-    virtual uint64_t getBufferSize() const override; 
- 
-    /** 
-     * Flush underlying BufferedOutputStream. 
-     */ 
-    virtual uint64_t flush() override; 
- 
-    virtual void recordPosition(PositionRecorder* recorder) const override; 
- 
-  protected: 
-    std::unique_ptr<BufferedOutputStream> outputStream; 
-    char* literals; 
-    int numLiterals; 
-    bool repeat; 
-    int tailRunLength; 
-    int bufferPosition; 
-    int bufferLength; 
-    char* buffer; 
- 
-    void writeByte(char c); 
-    void writeValues(); 
-    void write(char c); 
-  }; 
- 
-  ByteRleEncoderImpl::ByteRleEncoderImpl( 
-                                std::unique_ptr<BufferedOutputStream> output) 
-                                  : outputStream(std::move(output)) { 
-    literals = new char[MAX_LITERAL_SIZE]; 
-    numLiterals = 0; 
-    tailRunLength = 0; 
-    repeat = false; 
-    bufferPosition = 0; 
-    bufferLength = 0; 
-    buffer = nullptr; 
-  } 
- 
-  ByteRleEncoderImpl::~ByteRleEncoderImpl() { 
-    // PASS 
-    delete [] literals; 
-  } 
- 
-  void ByteRleEncoderImpl::writeByte(char c) { 
-    if (bufferPosition == bufferLength) { 
-      int addedSize = 0; 
-      if (!outputStream->Next(reinterpret_cast<void **>(&buffer), &addedSize)) { 
-        throw std::bad_alloc(); 
-      } 
-      bufferPosition = 0; 
-      bufferLength = addedSize; 
-    } 
-    buffer[bufferPosition++] = c; 
-  } 
- 
-  void ByteRleEncoderImpl::add( 
-                               const char* data, 
-                               uint64_t numValues, 
-                               const char* notNull) { 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (!notNull || notNull[i]) { 
-        write(data[i]); 
-      } 
-    } 
-  } 
- 
-  void ByteRleEncoderImpl::writeValues() { 
-    if (numLiterals != 0) { 
-      if (repeat) { 
-        writeByte( 
-            static_cast<char>(numLiterals - static_cast<int>(MINIMUM_REPEAT))); 
-        writeByte(literals[0]); 
-      } else { 
-        writeByte(static_cast<char>(-numLiterals)); 
-        for (int i = 0; i < numLiterals; ++i) { 
-          writeByte(literals[i]); 
-        } 
-      } 
-      repeat = false; 
-      tailRunLength = 0; 
-      numLiterals = 0; 
-    } 
-  } 
- 
-  uint64_t ByteRleEncoderImpl::flush() { 
-    writeValues(); 
-    outputStream->BackUp(bufferLength - bufferPosition); 
-    uint64_t dataSize = outputStream->flush(); 
-    bufferLength = bufferPosition = 0; 
-    return dataSize; 
-  } 
- 
-  void ByteRleEncoderImpl::write(char value) { 
-    if (numLiterals == 0) { 
-      literals[numLiterals++] = value; 
-      tailRunLength = 1; 
-    } else if (repeat) { 
-      if (value == literals[0]) { 
-        numLiterals += 1; 
-        if (numLiterals == MAXIMUM_REPEAT) { 
-          writeValues(); 
-        } 
-      } else { 
-        writeValues(); 
-        literals[numLiterals++] = value; 
-        tailRunLength = 1; 
-      } 
-    } else { 
-      if (value == literals[numLiterals - 1]) { 
-        tailRunLength += 1; 
-      } else { 
-        tailRunLength = 1; 
-      } 
-      if (tailRunLength == MINIMUM_REPEAT) { 
-        if (numLiterals + 1 == MINIMUM_REPEAT) { 
-          repeat = true; 
-          numLiterals += 1; 
-        } else { 
-          numLiterals -= static_cast<int>(MINIMUM_REPEAT - 1); 
-          writeValues(); 
-          literals[0] = value; 
-          repeat = true; 
-          numLiterals = MINIMUM_REPEAT; 
-        } 
-      } else { 
-        literals[numLiterals++] = value; 
-        if (numLiterals == MAX_LITERAL_SIZE) { 
-          writeValues(); 
-        } 
-      } 
-    } 
-  } 
- 
-  uint64_t ByteRleEncoderImpl::getBufferSize() const { 
-    return outputStream->getSize(); 
-  } 
- 
-  void ByteRleEncoderImpl::recordPosition(PositionRecorder *recorder) const { 
-    uint64_t flushedSize = outputStream->getSize(); 
-    uint64_t unflushedSize = static_cast<uint64_t>(bufferPosition); 
-    if (outputStream->isCompressed()) { 
-      // start of the compression chunk in the stream 
-      recorder->add(flushedSize); 
-      // number of decompressed bytes that need to be consumed 
-      recorder->add(unflushedSize); 
-    } else { 
-      flushedSize -= static_cast<uint64_t>(bufferLength); 
-      // byte offset of the RLE run’s start location 
-      recorder->add(flushedSize + unflushedSize); 
-    } 
-    recorder->add(static_cast<uint64_t>(numLiterals)); 
-  } 
- 
-  std::unique_ptr<ByteRleEncoder> createByteRleEncoder 
-                              (std::unique_ptr<BufferedOutputStream> output) { 
-    return std::unique_ptr<ByteRleEncoder>(new ByteRleEncoderImpl 
-                                           (std::move(output))); 
-  } 
- 
-  class BooleanRleEncoderImpl : public ByteRleEncoderImpl { 
-  public: 
-    BooleanRleEncoderImpl(std::unique_ptr<BufferedOutputStream> output); 
-    virtual ~BooleanRleEncoderImpl() override; 
- 
-    /** 
-     * Encode the next batch of values 
-     * @param data to be encoded 
-     * @param numValues the number of values to be encoded 
-     * @param notNull If the pointer is null, all values are read. If the 
-     *    pointer is not null, positions that are false are skipped. 
-     */ 
-    virtual void add(const char* data, uint64_t numValues, 
-                      const char* notNull) override; 
- 
-    /** 
-     * Flushing underlying BufferedOutputStream 
-     */ 
-    virtual uint64_t flush() override; 
- 
-    virtual void recordPosition(PositionRecorder* recorder) const override; 
- 
-  private: 
-    int bitsRemained; 
-    char current; 
- 
-  }; 
- 
-  BooleanRleEncoderImpl::BooleanRleEncoderImpl( 
-                        std::unique_ptr<BufferedOutputStream> output) 
-                        : ByteRleEncoderImpl(std::move(output)) { 
-    bitsRemained = 8; 
-    current = static_cast<char>(0); 
-  } 
- 
-  BooleanRleEncoderImpl::~BooleanRleEncoderImpl() { 
-    // PASS 
-  } 
- 
-  void BooleanRleEncoderImpl::add( 
-                                  const char* data, 
-                                  uint64_t numValues, 
-                                  const char* notNull) { 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (bitsRemained == 0) { 
-        write(current); 
-        current = static_cast<char>(0); 
-        bitsRemained = 8; 
-      } 
-      if (!notNull || notNull[i]) { 
-        if (!data || data[i]) { 
-          current = 
-            static_cast<char>(current | (0x80 >> (8 - bitsRemained))); 
-        } 
-        --bitsRemained; 
-      } 
-    } 
-    if (bitsRemained == 0) { 
-      write(current); 
-      current = static_cast<char>(0); 
-      bitsRemained = 8; 
-    } 
-  } 
- 
-  uint64_t BooleanRleEncoderImpl::flush() { 
-    if (bitsRemained != 8) { 
-      write(current); 
-    } 
-    bitsRemained = 8; 
-    current = static_cast<char>(0); 
-    return ByteRleEncoderImpl::flush(); 
-  } 
- 
-  void BooleanRleEncoderImpl::recordPosition(PositionRecorder* recorder) const { 
-    ByteRleEncoderImpl::recordPosition(recorder); 
-    recorder->add(static_cast<uint64_t>(8 - bitsRemained)); 
-  } 
- 
-  std::unique_ptr<ByteRleEncoder> createBooleanRleEncoder 
-                                (std::unique_ptr<BufferedOutputStream> output) { 
-    BooleanRleEncoderImpl* encoder = 
-      new BooleanRleEncoderImpl(std::move(output)) ; 
-    return std::unique_ptr<ByteRleEncoder>( 
-                                    reinterpret_cast<ByteRleEncoder*>(encoder)); 
-  } 
- 
-  ByteRleDecoder::~ByteRleDecoder() { 
-    // PASS 
-  } 
- 
-  class ByteRleDecoderImpl: public ByteRleDecoder { 
-  public: 
-    ByteRleDecoderImpl(std::unique_ptr<SeekableInputStream> input); 
- 
-    virtual ~ByteRleDecoderImpl(); 
- 
-    /** 
-     * Seek to a particular spot. 
-     */ 
-    virtual void seek(PositionProvider&); 
- 
-    /** 
-     * Seek over a given number of values. 
-     */ 
-    virtual void skip(uint64_t numValues); 
- 
-    /** 
-     * Read a number of values into the batch. 
-     */ 
-    virtual void next(char* data, uint64_t numValues, char* notNull); 
- 
-  protected: 
-    inline void nextBuffer(); 
-    inline signed char readByte(); 
-    inline void readHeader(); 
- 
-    std::unique_ptr<SeekableInputStream> inputStream; 
-    size_t remainingValues; 
-    char value; 
-    const char* bufferStart; 
-    const char* bufferEnd; 
-    bool repeating; 
-  }; 
- 
-  void ByteRleDecoderImpl::nextBuffer() { 
-    int bufferLength; 
-    const void* bufferPointer; 
-    bool result = inputStream->Next(&bufferPointer, &bufferLength); 
-    if (!result) { 
-      throw ParseError("bad read in nextBuffer"); 
-    } 
-    bufferStart = static_cast<const char*>(bufferPointer); 
-    bufferEnd = bufferStart + bufferLength; 
-  } 
- 
-  signed char ByteRleDecoderImpl::readByte() { 
-    if (bufferStart == bufferEnd) { 
-      nextBuffer(); 
-    } 
-    return *(bufferStart++); 
-  } 
- 
-  void ByteRleDecoderImpl::readHeader() { 
-    signed char ch = readByte(); 
-    if (ch < 0) { 
-      remainingValues = static_cast<size_t>(-ch); 
-      repeating = false; 
-    } else { 
-      remainingValues = static_cast<size_t>(ch) + MINIMUM_REPEAT; 
-      repeating = true; 
-      value = readByte(); 
-    } 
-  } 
- 
-  ByteRleDecoderImpl::ByteRleDecoderImpl(std::unique_ptr<SeekableInputStream> 
-                                         input) { 
-    inputStream = std::move(input); 
-    repeating = false; 
-    remainingValues = 0; 
-    value = 0; 
-    bufferStart = nullptr; 
-    bufferEnd = nullptr; 
-  } 
- 
-  ByteRleDecoderImpl::~ByteRleDecoderImpl() { 
-    // PASS 
-  } 
- 
-  void ByteRleDecoderImpl::seek(PositionProvider& location) { 
-    // move the input stream 
-    inputStream->seek(location); 
-    // force a re-read from the stream 
-    bufferEnd = bufferStart; 
-    // read a new header 
-    readHeader(); 
-    // skip ahead the given number of records 
-    ByteRleDecoderImpl::skip(location.next()); 
-  } 
- 
-  void ByteRleDecoderImpl::skip(uint64_t numValues) { 
-    while (numValues > 0) { 
-      if (remainingValues == 0) { 
-        readHeader(); 
-      } 
-      size_t count = std::min(static_cast<size_t>(numValues), remainingValues); 
-      remainingValues -= count; 
-      numValues -= count; 
-      // for literals we need to skip over count bytes, which may involve 
-      // reading from the underlying stream 
-      if (!repeating) { 
-        size_t consumedBytes = count; 
-        while (consumedBytes > 0) { 
-          if (bufferStart == bufferEnd) { 
-            nextBuffer(); 
-          } 
-          size_t skipSize = std::min(static_cast<size_t>(consumedBytes), 
-                                     static_cast<size_t>(bufferEnd - 
-                                                         bufferStart)); 
-          bufferStart += skipSize; 
-          consumedBytes -= skipSize; 
-        } 
-      } 
-    } 
-  } 
- 
-  void ByteRleDecoderImpl::next(char* data, uint64_t numValues, 
-                                char* notNull) { 
-    uint64_t position = 0; 
-    // skip over null values 
-    while (notNull && position < numValues && !notNull[position]) { 
-      position += 1; 
-    } 
-    while (position < numValues) { 
-      // if we are out of values, read more 
-      if (remainingValues == 0) { 
-        readHeader(); 
-      } 
-      // how many do we read out of this block? 
-      size_t count = std::min(static_cast<size_t>(numValues - position), 
-                              remainingValues); 
-      uint64_t consumed = 0; 
-      if (repeating) { 
-        if (notNull) { 
-          for(uint64_t i=0; i < count; ++i) { 
-            if (notNull[position + i]) { 
-              data[position + i] = value; 
-              consumed += 1; 
-            } 
-          } 
-        } else { 
-          memset(data + position, value, count); 
-          consumed = count; 
-        } 
-      } else { 
-        if (notNull) { 
-          for(uint64_t i=0; i < count; ++i) { 
-            if (notNull[position + i]) { 
-              data[position + i] = readByte(); 
-              consumed += 1; 
-            } 
-          } 
-        } else { 
-          uint64_t i = 0; 
-          while (i < count) { 
-            if (bufferStart == bufferEnd) { 
-              nextBuffer(); 
-            } 
-            uint64_t copyBytes = 
-              std::min(static_cast<uint64_t>(count - i), 
-                       static_cast<uint64_t>(bufferEnd - bufferStart)); 
-            memcpy(data + position + i, bufferStart, copyBytes); 
-            bufferStart += copyBytes; 
-            i += copyBytes; 
-          } 
-          consumed = count; 
-        } 
-      } 
-      remainingValues -= consumed; 
-      position += count; 
-      // skip over any null values 
-      while (notNull && position < numValues && !notNull[position]) { 
-        position += 1; 
-      } 
-    } 
-  } 
- 
-  std::unique_ptr<ByteRleDecoder> createByteRleDecoder 
-                                 (std::unique_ptr<SeekableInputStream> input) { 
-    return std::unique_ptr<ByteRleDecoder>(new ByteRleDecoderImpl 
-                                           (std::move(input))); 
-  } 
- 
-  class BooleanRleDecoderImpl: public ByteRleDecoderImpl { 
-  public: 
-    BooleanRleDecoderImpl(std::unique_ptr<SeekableInputStream> input); 
- 
-    virtual ~BooleanRleDecoderImpl(); 
- 
-    /** 
-     * Seek to a particular spot. 
-     */ 
-    virtual void seek(PositionProvider&); 
- 
-    /** 
-     * Seek over a given number of values. 
-     */ 
-    virtual void skip(uint64_t numValues); 
- 
-    /** 
-     * Read a number of values into the batch. 
-     */ 
-    virtual void next(char* data, uint64_t numValues, char* notNull); 
- 
-  protected: 
-    size_t remainingBits; 
-    char lastByte; 
-  }; 
- 
-  BooleanRleDecoderImpl::BooleanRleDecoderImpl 
-                                (std::unique_ptr<SeekableInputStream> input 
-                                 ): ByteRleDecoderImpl(std::move(input)) { 
-    remainingBits = 0; 
-    lastByte = 0; 
-  } 
- 
-  BooleanRleDecoderImpl::~BooleanRleDecoderImpl() { 
-    // PASS 
-  } 
- 
-  void BooleanRleDecoderImpl::seek(PositionProvider& location) { 
-    ByteRleDecoderImpl::seek(location); 
-    uint64_t consumed = location.next(); 
-    remainingBits = 0; 
-    if (consumed > 8) { 
-      throw ParseError("bad position"); 
-    } 
-    if (consumed != 0) { 
-      remainingBits = 8 - consumed; 
-      ByteRleDecoderImpl::next(&lastByte, 1, nullptr); 
-    } 
-  } 
- 
-  void BooleanRleDecoderImpl::skip(uint64_t numValues) { 
-    if (numValues <= remainingBits) { 
-      remainingBits -= numValues; 
-    } else { 
-      numValues -= remainingBits; 
-      uint64_t bytesSkipped = numValues / 8; 
-      ByteRleDecoderImpl::skip(bytesSkipped); 
-      if (numValues % 8 != 0) { 
-        ByteRleDecoderImpl::next(&lastByte, 1, nullptr); 
-        remainingBits = 8 - (numValues % 8); 
-      } else { 
-        remainingBits = 0; 
-      } 
-    } 
-  } 
- 
-  void BooleanRleDecoderImpl::next(char* data, uint64_t numValues, 
-                                   char* notNull) { 
-    // next spot to fill in 
-    uint64_t position = 0; 
- 
-    // use up any remaining bits 
-    if (notNull) { 
-      while(remainingBits > 0 && position < numValues) { 
-        if (notNull[position]) { 
-          remainingBits -= 1; 
-          data[position] = (static_cast<unsigned char>(lastByte) >> 
-                            remainingBits) & 0x1; 
-        } else { 
-          data[position] = 0; 
-        } 
-        position += 1; 
-      } 
-    } else { 
-      while(remainingBits > 0 && position < numValues) { 
-        remainingBits -= 1; 
-        data[position++] = (static_cast<unsigned char>(lastByte) >> 
-                            remainingBits) & 0x1; 
-      } 
-    } 
- 
-    // count the number of nonNulls remaining 
-    uint64_t nonNulls = numValues - position; 
-    if (notNull) { 
-      for(uint64_t i=position; i < numValues; ++i) { 
-        if (!notNull[i]) { 
-          nonNulls -= 1; 
-        } 
-      } 
-    } 
- 
-    // fill in the remaining values 
-    if (nonNulls == 0) { 
-      while (position < numValues) { 
-        data[position++] = 0; 
-      } 
-    } else if (position < numValues) { 
-      // read the new bytes into the array 
-      uint64_t bytesRead = (nonNulls + 7) / 8; 
-      ByteRleDecoderImpl::next(data + position, bytesRead, nullptr); 
-      lastByte = data[position + bytesRead - 1]; 
-      remainingBits = bytesRead * 8 - nonNulls; 
-      // expand the array backwards so that we don't clobber the data 
-      uint64_t bitsLeft = bytesRead * 8 - remainingBits; 
-      if (notNull) { 
-        for(int64_t i=static_cast<int64_t>(numValues) - 1; 
-            i >= static_cast<int64_t>(position); --i) { 
-          if (notNull[i]) { 
-            uint64_t shiftPosn = (-bitsLeft) % 8; 
-            data[i] = (data[position + (bitsLeft - 1) / 8] >> shiftPosn) & 0x1; 
-            bitsLeft -= 1; 
-          } else { 
-            data[i] = 0; 
-          } 
-        } 
-      } else { 
-        for(int64_t i=static_cast<int64_t>(numValues) - 1; 
-            i >= static_cast<int64_t>(position); --i, --bitsLeft) { 
-          uint64_t shiftPosn = (-bitsLeft) % 8; 
-          data[i] = (data[position + (bitsLeft - 1) / 8] >> shiftPosn) & 0x1; 
-        } 
-      } 
-    } 
-  } 
- 
-  std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder 
-                                 (std::unique_ptr<SeekableInputStream> input) { 
-    BooleanRleDecoderImpl* decoder = 
-      new BooleanRleDecoderImpl(std::move(input)); 
-    return std::unique_ptr<ByteRleDecoder>( 
-                                    reinterpret_cast<ByteRleDecoder*>(decoder)); 
-  } 
-} 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <iostream>
+#include <string.h>
+#include <utility>
+
+#include "ByteRLE.hh"
+#include "orc/Exceptions.hh"
+
+namespace orc {
+
+  const int MINIMUM_REPEAT = 3;
+  const int MAXIMUM_REPEAT = 127 + MINIMUM_REPEAT;
+  const int MAX_LITERAL_SIZE = 128;
+
+  ByteRleEncoder::~ByteRleEncoder() {
+    // PASS
+  }
+
+  class ByteRleEncoderImpl : public ByteRleEncoder {
+  public:
+    ByteRleEncoderImpl(std::unique_ptr<BufferedOutputStream> output);
+    virtual ~ByteRleEncoderImpl() override;
+
+    /**
+     * Encode the next batch of values.
+     * @param data to be encoded
+     * @param numValues the number of values to be encoded
+     * @param notNull If the pointer is null, all values are read. If the
+     *    pointer is not null, positions that are false are skipped.
+     */
+    virtual void add(const char* data, uint64_t numValues,
+                      const char* notNull) override;
+
+    /**
+     * Get size of buffer used so far.
+     */
+    virtual uint64_t getBufferSize() const override;
+
+    /**
+     * Flush underlying BufferedOutputStream.
+     */
+    virtual uint64_t flush() override;
+
+    virtual void recordPosition(PositionRecorder* recorder) const override;
+
+  protected:
+    std::unique_ptr<BufferedOutputStream> outputStream;
+    char* literals;
+    int numLiterals;
+    bool repeat;
+    int tailRunLength;
+    int bufferPosition;
+    int bufferLength;
+    char* buffer;
+
+    void writeByte(char c);
+    void writeValues();
+    void write(char c);
+  };
+
+  ByteRleEncoderImpl::ByteRleEncoderImpl(
+                                std::unique_ptr<BufferedOutputStream> output)
+                                  : outputStream(std::move(output)) {
+    literals = new char[MAX_LITERAL_SIZE];
+    numLiterals = 0;
+    tailRunLength = 0;
+    repeat = false;
+    bufferPosition = 0;
+    bufferLength = 0;
+    buffer = nullptr;
+  }
+
+  ByteRleEncoderImpl::~ByteRleEncoderImpl() {
+    // PASS
+    delete [] literals;
+  }
+
+  void ByteRleEncoderImpl::writeByte(char c) {
+    if (bufferPosition == bufferLength) {
+      int addedSize = 0;
+      if (!outputStream->Next(reinterpret_cast<void **>(&buffer), &addedSize)) {
+        throw std::bad_alloc();
+      }
+      bufferPosition = 0;
+      bufferLength = addedSize;
+    }
+    buffer[bufferPosition++] = c;
+  }
+
+  void ByteRleEncoderImpl::add(
+                               const char* data,
+                               uint64_t numValues,
+                               const char* notNull) {
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!notNull || notNull[i]) {
+        write(data[i]);
+      }
+    }
+  }
+
+  void ByteRleEncoderImpl::writeValues() {
+    if (numLiterals != 0) {
+      if (repeat) {
+        writeByte(
+            static_cast<char>(numLiterals - static_cast<int>(MINIMUM_REPEAT)));
+        writeByte(literals[0]);
+      } else {
+        writeByte(static_cast<char>(-numLiterals));
+        for (int i = 0; i < numLiterals; ++i) {
+          writeByte(literals[i]);
+        }
+      }
+      repeat = false;
+      tailRunLength = 0;
+      numLiterals = 0;
+    }
+  }
+
+  uint64_t ByteRleEncoderImpl::flush() {
+    writeValues();
+    outputStream->BackUp(bufferLength - bufferPosition);
+    uint64_t dataSize = outputStream->flush();
+    bufferLength = bufferPosition = 0;
+    return dataSize;
+  }
+
+  void ByteRleEncoderImpl::write(char value) {
+    if (numLiterals == 0) {
+      literals[numLiterals++] = value;
+      tailRunLength = 1;
+    } else if (repeat) {
+      if (value == literals[0]) {
+        numLiterals += 1;
+        if (numLiterals == MAXIMUM_REPEAT) {
+          writeValues();
+        }
+      } else {
+        writeValues();
+        literals[numLiterals++] = value;
+        tailRunLength = 1;
+      }
+    } else {
+      if (value == literals[numLiterals - 1]) {
+        tailRunLength += 1;
+      } else {
+        tailRunLength = 1;
+      }
+      if (tailRunLength == MINIMUM_REPEAT) {
+        if (numLiterals + 1 == MINIMUM_REPEAT) {
+          repeat = true;
+          numLiterals += 1;
+        } else {
+          numLiterals -= static_cast<int>(MINIMUM_REPEAT - 1);
+          writeValues();
+          literals[0] = value;
+          repeat = true;
+          numLiterals = MINIMUM_REPEAT;
+        }
+      } else {
+        literals[numLiterals++] = value;
+        if (numLiterals == MAX_LITERAL_SIZE) {
+          writeValues();
+        }
+      }
+    }
+  }
+
+  uint64_t ByteRleEncoderImpl::getBufferSize() const {
+    return outputStream->getSize();
+  }
+
+  void ByteRleEncoderImpl::recordPosition(PositionRecorder *recorder) const {
+    uint64_t flushedSize = outputStream->getSize();
+    uint64_t unflushedSize = static_cast<uint64_t>(bufferPosition);
+    if (outputStream->isCompressed()) {
+      // start of the compression chunk in the stream
+      recorder->add(flushedSize);
+      // number of decompressed bytes that need to be consumed
+      recorder->add(unflushedSize);
+    } else {
+      flushedSize -= static_cast<uint64_t>(bufferLength);
+      // byte offset of the RLE run’s start location
+      recorder->add(flushedSize + unflushedSize);
+    }
+    recorder->add(static_cast<uint64_t>(numLiterals));
+  }
+
+  std::unique_ptr<ByteRleEncoder> createByteRleEncoder
+                              (std::unique_ptr<BufferedOutputStream> output) {
+    return std::unique_ptr<ByteRleEncoder>(new ByteRleEncoderImpl
+                                           (std::move(output)));
+  }
+
+  class BooleanRleEncoderImpl : public ByteRleEncoderImpl {
+  public:
+    BooleanRleEncoderImpl(std::unique_ptr<BufferedOutputStream> output);
+    virtual ~BooleanRleEncoderImpl() override;
+
+    /**
+     * Encode the next batch of values
+     * @param data to be encoded
+     * @param numValues the number of values to be encoded
+     * @param notNull If the pointer is null, all values are read. If the
+     *    pointer is not null, positions that are false are skipped.
+     */
+    virtual void add(const char* data, uint64_t numValues,
+                      const char* notNull) override;
+
+    /**
+     * Flushing underlying BufferedOutputStream
+     */
+    virtual uint64_t flush() override;
+
+    virtual void recordPosition(PositionRecorder* recorder) const override;
+
+  private:
+    int bitsRemained;
+    char current;
+
+  };
+
+  BooleanRleEncoderImpl::BooleanRleEncoderImpl(
+                        std::unique_ptr<BufferedOutputStream> output)
+                        : ByteRleEncoderImpl(std::move(output)) {
+    bitsRemained = 8;
+    current = static_cast<char>(0);
+  }
+
+  BooleanRleEncoderImpl::~BooleanRleEncoderImpl() {
+    // PASS
+  }
+
+  void BooleanRleEncoderImpl::add(
+                                  const char* data,
+                                  uint64_t numValues,
+                                  const char* notNull) {
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (bitsRemained == 0) {
+        write(current);
+        current = static_cast<char>(0);
+        bitsRemained = 8;
+      }
+      if (!notNull || notNull[i]) {
+        if (!data || data[i]) {
+          current =
+            static_cast<char>(current | (0x80 >> (8 - bitsRemained)));
+        }
+        --bitsRemained;
+      }
+    }
+    if (bitsRemained == 0) {
+      write(current);
+      current = static_cast<char>(0);
+      bitsRemained = 8;
+    }
+  }
+
+  uint64_t BooleanRleEncoderImpl::flush() {
+    if (bitsRemained != 8) {
+      write(current);
+    }
+    bitsRemained = 8;
+    current = static_cast<char>(0);
+    return ByteRleEncoderImpl::flush();
+  }
+
+  void BooleanRleEncoderImpl::recordPosition(PositionRecorder* recorder) const {
+    ByteRleEncoderImpl::recordPosition(recorder);
+    recorder->add(static_cast<uint64_t>(8 - bitsRemained));
+  }
+
+  std::unique_ptr<ByteRleEncoder> createBooleanRleEncoder
+                                (std::unique_ptr<BufferedOutputStream> output) {
+    BooleanRleEncoderImpl* encoder =
+      new BooleanRleEncoderImpl(std::move(output)) ;
+    return std::unique_ptr<ByteRleEncoder>(
+                                    reinterpret_cast<ByteRleEncoder*>(encoder));
+  }
+
+  ByteRleDecoder::~ByteRleDecoder() {
+    // PASS
+  }
+
+  class ByteRleDecoderImpl: public ByteRleDecoder {
+  public:
+    ByteRleDecoderImpl(std::unique_ptr<SeekableInputStream> input);
+
+    virtual ~ByteRleDecoderImpl();
+
+    /**
+     * Seek to a particular spot.
+     */
+    virtual void seek(PositionProvider&);
+
+    /**
+     * Seek over a given number of values.
+     */
+    virtual void skip(uint64_t numValues);
+
+    /**
+     * Read a number of values into the batch.
+     */
+    virtual void next(char* data, uint64_t numValues, char* notNull);
+
+  protected:
+    inline void nextBuffer();
+    inline signed char readByte();
+    inline void readHeader();
+
+    std::unique_ptr<SeekableInputStream> inputStream;
+    size_t remainingValues;
+    char value;
+    const char* bufferStart;
+    const char* bufferEnd;
+    bool repeating;
+  };
+
+  void ByteRleDecoderImpl::nextBuffer() {
+    int bufferLength;
+    const void* bufferPointer;
+    bool result = inputStream->Next(&bufferPointer, &bufferLength);
+    if (!result) {
+      throw ParseError("bad read in nextBuffer");
+    }
+    bufferStart = static_cast<const char*>(bufferPointer);
+    bufferEnd = bufferStart + bufferLength;
+  }
+
+  signed char ByteRleDecoderImpl::readByte() {
+    if (bufferStart == bufferEnd) {
+      nextBuffer();
+    }
+    return *(bufferStart++);
+  }
+
+  void ByteRleDecoderImpl::readHeader() {
+    signed char ch = readByte();
+    if (ch < 0) {
+      remainingValues = static_cast<size_t>(-ch);
+      repeating = false;
+    } else {
+      remainingValues = static_cast<size_t>(ch) + MINIMUM_REPEAT;
+      repeating = true;
+      value = readByte();
+    }
+  }
+
+  ByteRleDecoderImpl::ByteRleDecoderImpl(std::unique_ptr<SeekableInputStream>
+                                         input) {
+    inputStream = std::move(input);
+    repeating = false;
+    remainingValues = 0;
+    value = 0;
+    bufferStart = nullptr;
+    bufferEnd = nullptr;
+  }
+
+  ByteRleDecoderImpl::~ByteRleDecoderImpl() {
+    // PASS
+  }
+
+  void ByteRleDecoderImpl::seek(PositionProvider& location) {
+    // move the input stream
+    inputStream->seek(location);
+    // force a re-read from the stream
+    bufferEnd = bufferStart;
+    // read a new header
+    readHeader();
+    // skip ahead the given number of records
+    ByteRleDecoderImpl::skip(location.next());
+  }
+
+  void ByteRleDecoderImpl::skip(uint64_t numValues) {
+    while (numValues > 0) {
+      if (remainingValues == 0) {
+        readHeader();
+      }
+      size_t count = std::min(static_cast<size_t>(numValues), remainingValues);
+      remainingValues -= count;
+      numValues -= count;
+      // for literals we need to skip over count bytes, which may involve
+      // reading from the underlying stream
+      if (!repeating) {
+        size_t consumedBytes = count;
+        while (consumedBytes > 0) {
+          if (bufferStart == bufferEnd) {
+            nextBuffer();
+          }
+          size_t skipSize = std::min(static_cast<size_t>(consumedBytes),
+                                     static_cast<size_t>(bufferEnd -
+                                                         bufferStart));
+          bufferStart += skipSize;
+          consumedBytes -= skipSize;
+        }
+      }
+    }
+  }
+
+  void ByteRleDecoderImpl::next(char* data, uint64_t numValues,
+                                char* notNull) {
+    uint64_t position = 0;
+    // skip over null values
+    while (notNull && position < numValues && !notNull[position]) {
+      position += 1;
+    }
+    while (position < numValues) {
+      // if we are out of values, read more
+      if (remainingValues == 0) {
+        readHeader();
+      }
+      // how many do we read out of this block?
+      size_t count = std::min(static_cast<size_t>(numValues - position),
+                              remainingValues);
+      uint64_t consumed = 0;
+      if (repeating) {
+        if (notNull) {
+          for(uint64_t i=0; i < count; ++i) {
+            if (notNull[position + i]) {
+              data[position + i] = value;
+              consumed += 1;
+            }
+          }
+        } else {
+          memset(data + position, value, count);
+          consumed = count;
+        }
+      } else {
+        if (notNull) {
+          for(uint64_t i=0; i < count; ++i) {
+            if (notNull[position + i]) {
+              data[position + i] = readByte();
+              consumed += 1;
+            }
+          }
+        } else {
+          uint64_t i = 0;
+          while (i < count) {
+            if (bufferStart == bufferEnd) {
+              nextBuffer();
+            }
+            uint64_t copyBytes =
+              std::min(static_cast<uint64_t>(count - i),
+                       static_cast<uint64_t>(bufferEnd - bufferStart));
+            memcpy(data + position + i, bufferStart, copyBytes);
+            bufferStart += copyBytes;
+            i += copyBytes;
+          }
+          consumed = count;
+        }
+      }
+      remainingValues -= consumed;
+      position += count;
+      // skip over any null values
+      while (notNull && position < numValues && !notNull[position]) {
+        position += 1;
+      }
+    }
+  }
+
+  std::unique_ptr<ByteRleDecoder> createByteRleDecoder
+                                 (std::unique_ptr<SeekableInputStream> input) {
+    return std::unique_ptr<ByteRleDecoder>(new ByteRleDecoderImpl
+                                           (std::move(input)));
+  }
+
+  class BooleanRleDecoderImpl: public ByteRleDecoderImpl {
+  public:
+    BooleanRleDecoderImpl(std::unique_ptr<SeekableInputStream> input);
+
+    virtual ~BooleanRleDecoderImpl();
+
+    /**
+     * Seek to a particular spot.
+     */
+    virtual void seek(PositionProvider&);
+
+    /**
+     * Seek over a given number of values.
+     */
+    virtual void skip(uint64_t numValues);
+
+    /**
+     * Read a number of values into the batch.
+     */
+    virtual void next(char* data, uint64_t numValues, char* notNull);
+
+  protected:
+    size_t remainingBits;
+    char lastByte;
+  };
+
+  BooleanRleDecoderImpl::BooleanRleDecoderImpl
+                                (std::unique_ptr<SeekableInputStream> input
+                                 ): ByteRleDecoderImpl(std::move(input)) {
+    remainingBits = 0;
+    lastByte = 0;
+  }
+
+  BooleanRleDecoderImpl::~BooleanRleDecoderImpl() {
+    // PASS
+  }
+
+  void BooleanRleDecoderImpl::seek(PositionProvider& location) {
+    ByteRleDecoderImpl::seek(location);
+    uint64_t consumed = location.next();
+    remainingBits = 0;
+    if (consumed > 8) {
+      throw ParseError("bad position");
+    }
+    if (consumed != 0) {
+      remainingBits = 8 - consumed;
+      ByteRleDecoderImpl::next(&lastByte, 1, nullptr);
+    }
+  }
+
+  void BooleanRleDecoderImpl::skip(uint64_t numValues) {
+    if (numValues <= remainingBits) {
+      remainingBits -= numValues;
+    } else {
+      numValues -= remainingBits;
+      uint64_t bytesSkipped = numValues / 8;
+      ByteRleDecoderImpl::skip(bytesSkipped);
+      if (numValues % 8 != 0) {
+        ByteRleDecoderImpl::next(&lastByte, 1, nullptr);
+        remainingBits = 8 - (numValues % 8);
+      } else {
+        remainingBits = 0;
+      }
+    }
+  }
+
+  void BooleanRleDecoderImpl::next(char* data, uint64_t numValues,
+                                   char* notNull) {
+    // next spot to fill in
+    uint64_t position = 0;
+
+    // use up any remaining bits
+    if (notNull) {
+      while(remainingBits > 0 && position < numValues) {
+        if (notNull[position]) {
+          remainingBits -= 1;
+          data[position] = (static_cast<unsigned char>(lastByte) >>
+                            remainingBits) & 0x1;
+        } else {
+          data[position] = 0;
+        }
+        position += 1;
+      }
+    } else {
+      while(remainingBits > 0 && position < numValues) {
+        remainingBits -= 1;
+        data[position++] = (static_cast<unsigned char>(lastByte) >>
+                            remainingBits) & 0x1;
+      }
+    }
+
+    // count the number of nonNulls remaining
+    uint64_t nonNulls = numValues - position;
+    if (notNull) {
+      for(uint64_t i=position; i < numValues; ++i) {
+        if (!notNull[i]) {
+          nonNulls -= 1;
+        }
+      }
+    }
+
+    // fill in the remaining values
+    if (nonNulls == 0) {
+      while (position < numValues) {
+        data[position++] = 0;
+      }
+    } else if (position < numValues) {
+      // read the new bytes into the array
+      uint64_t bytesRead = (nonNulls + 7) / 8;
+      ByteRleDecoderImpl::next(data + position, bytesRead, nullptr);
+      lastByte = data[position + bytesRead - 1];
+      remainingBits = bytesRead * 8 - nonNulls;
+      // expand the array backwards so that we don't clobber the data
+      uint64_t bitsLeft = bytesRead * 8 - remainingBits;
+      if (notNull) {
+        for(int64_t i=static_cast<int64_t>(numValues) - 1;
+            i >= static_cast<int64_t>(position); --i) {
+          if (notNull[i]) {
+            uint64_t shiftPosn = (-bitsLeft) % 8;
+            data[i] = (data[position + (bitsLeft - 1) / 8] >> shiftPosn) & 0x1;
+            bitsLeft -= 1;
+          } else {
+            data[i] = 0;
+          }
+        }
+      } else {
+        for(int64_t i=static_cast<int64_t>(numValues) - 1;
+            i >= static_cast<int64_t>(position); --i, --bitsLeft) {
+          uint64_t shiftPosn = (-bitsLeft) % 8;
+          data[i] = (data[position + (bitsLeft - 1) / 8] >> shiftPosn) & 0x1;
+        }
+      }
+    }
+  }
+
+  std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder
+                                 (std::unique_ptr<SeekableInputStream> input) {
+    BooleanRleDecoderImpl* decoder =
+      new BooleanRleDecoderImpl(std::move(input));
+    return std::unique_ptr<ByteRleDecoder>(
+                                    reinterpret_cast<ByteRleDecoder*>(decoder));
+  }
+}
diff --git a/contrib/libs/apache/orc/c++/src/ByteRLE.hh b/contrib/libs/apache/orc/c++/src/ByteRLE.hh
index b799675aee..71ca579cd3 100644
--- a/contrib/libs/apache/orc/c++/src/ByteRLE.hh
+++ b/contrib/libs/apache/orc/c++/src/ByteRLE.hh
@@ -1,117 +1,117 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_BYTE_RLE_HH 
-#define ORC_BYTE_RLE_HH 
- 
-#include <memory> 
- 
-#include "io/InputStream.hh" 
-#include "io/OutputStream.hh" 
- 
-namespace orc { 
- 
-  class ByteRleEncoder { 
-  public: 
-    virtual ~ByteRleEncoder(); 
- 
-    /** 
-     * Encode the next batch of values 
-     * @param data to be encoded 
-     * @param numValues the number of values to be encoded 
-     * @param notNull If the pointer is null, all values are read. If the 
-     *    pointer is not null, positions that are false are skipped. 
-     */ 
-    virtual void add(const char* data, uint64_t numValues, 
-                      const char* notNull) = 0; 
- 
-    /** 
-     * Get size of buffer used so far. 
-     */ 
-    virtual uint64_t getBufferSize() const = 0; 
- 
-    /** 
-     * Flushing underlying output stream 
-     */ 
-    virtual uint64_t flush() = 0; 
- 
-    /** 
-     * record current position 
-     * @param recorder use the recorder to record current positions 
-     */ 
-    virtual void recordPosition(PositionRecorder* recorder) const = 0; 
-  }; 
- 
-  class ByteRleDecoder { 
-  public: 
-    virtual ~ByteRleDecoder(); 
- 
-    /** 
-     * Seek to a particular spot. 
-     */ 
-    virtual void seek(PositionProvider&) = 0; 
- 
-    /** 
-     * Seek over a given number of values. 
-     */ 
-    virtual void skip(uint64_t numValues) = 0; 
- 
-    /** 
-     * Read a number of values into the batch. 
-     * @param data the array to read into 
-     * @param numValues the number of values to read 
-     * @param notNull If the pointer is null, all values are read. If the 
-     *    pointer is not null, positions that are false are skipped. 
-     */ 
-    virtual void next(char* data, uint64_t numValues, char* notNull) = 0; 
-  }; 
- 
-  /** 
-   * Create a byte RLE encoder. 
-   * @param output the output stream to write to 
-   */ 
-  std::unique_ptr<ByteRleEncoder> createByteRleEncoder 
-                                 (std::unique_ptr<BufferedOutputStream> output); 
- 
-  /** 
-   * Create a boolean RLE encoder. 
-   * @param output the output stream to write to 
-   */ 
-  std::unique_ptr<ByteRleEncoder> createBooleanRleEncoder 
-                                 (std::unique_ptr<BufferedOutputStream> output); 
- 
-  /** 
-   * Create a byte RLE decoder. 
-   * @param input the input stream to read from 
-   */ 
-  std::unique_ptr<ByteRleDecoder> createByteRleDecoder 
-                                 (std::unique_ptr<SeekableInputStream> input); 
- 
-  /** 
-   * Create a boolean RLE decoder. 
-   * 
-   * Unlike the other RLE decoders, the boolean decoder sets the data to 0 
-   * if the value is masked by notNull. This is required for the notNull stream 
-   * processing to properly apply multiple masks from nested types. 
-   * @param input the input stream to read from 
-   */ 
-  std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder 
-                                 (std::unique_ptr<SeekableInputStream> input); 
-} 
- 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_BYTE_RLE_HH
+#define ORC_BYTE_RLE_HH
+
+#include <memory>
+
+#include "io/InputStream.hh"
+#include "io/OutputStream.hh"
+
+namespace orc {
+
+  class ByteRleEncoder {
+  public:
+    virtual ~ByteRleEncoder();
+
+    /**
+     * Encode the next batch of values
+     * @param data to be encoded
+     * @param numValues the number of values to be encoded
+     * @param notNull If the pointer is null, all values are read. If the
+     *    pointer is not null, positions that are false are skipped.
+     */
+    virtual void add(const char* data, uint64_t numValues,
+                      const char* notNull) = 0;
+
+    /**
+     * Get size of buffer used so far.
+     */
+    virtual uint64_t getBufferSize() const = 0;
+
+    /**
+     * Flushing underlying output stream
+     */
+    virtual uint64_t flush() = 0;
+
+    /**
+     * record current position
+     * @param recorder use the recorder to record current positions
+     */
+    virtual void recordPosition(PositionRecorder* recorder) const = 0;
+  };
+
+  class ByteRleDecoder {
+  public:
+    virtual ~ByteRleDecoder();
+
+    /**
+     * Seek to a particular spot.
+     */
+    virtual void seek(PositionProvider&) = 0;
+
+    /**
+     * Seek over a given number of values.
+     */
+    virtual void skip(uint64_t numValues) = 0;
+
+    /**
+     * Read a number of values into the batch.
+     * @param data the array to read into
+     * @param numValues the number of values to read
+     * @param notNull If the pointer is null, all values are read. If the
+     *    pointer is not null, positions that are false are skipped.
+     */
+    virtual void next(char* data, uint64_t numValues, char* notNull) = 0;
+  };
+
+  /**
+   * Create a byte RLE encoder.
+   * @param output the output stream to write to
+   */
+  std::unique_ptr<ByteRleEncoder> createByteRleEncoder
+                                 (std::unique_ptr<BufferedOutputStream> output);
+
+  /**
+   * Create a boolean RLE encoder.
+   * @param output the output stream to write to
+   */
+  std::unique_ptr<ByteRleEncoder> createBooleanRleEncoder
+                                 (std::unique_ptr<BufferedOutputStream> output);
+
+  /**
+   * Create a byte RLE decoder.
+   * @param input the input stream to read from
+   */
+  std::unique_ptr<ByteRleDecoder> createByteRleDecoder
+                                 (std::unique_ptr<SeekableInputStream> input);
+
+  /**
+   * Create a boolean RLE decoder.
+   *
+   * Unlike the other RLE decoders, the boolean decoder sets the data to 0
+   * if the value is masked by notNull. This is required for the notNull stream
+   * processing to properly apply multiple masks from nested types.
+   * @param input the input stream to read from
+   */
+  std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder
+                                 (std::unique_ptr<SeekableInputStream> input);
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/ColumnPrinter.cc b/contrib/libs/apache/orc/c++/src/ColumnPrinter.cc
index 91c2904038..b4b5860cad 100644
--- a/contrib/libs/apache/orc/c++/src/ColumnPrinter.cc
+++ b/contrib/libs/apache/orc/c++/src/ColumnPrinter.cc
@@ -1,747 +1,747 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/ColumnPrinter.hh" 
-#include "orc/orc-config.hh" 
- 
-#include "Adaptor.hh" 
- 
-#include <limits> 
-#include <sstream> 
-#include <stdexcept> 
-#include <time.h> 
-#include <typeinfo> 
- 
-#ifdef __clang__ 
-  #pragma clang diagnostic ignored "-Wformat-security" 
-#endif 
- 
-namespace orc { 
- 
-  class VoidColumnPrinter: public ColumnPrinter { 
-  public: 
-    VoidColumnPrinter(std::string&); 
-    ~VoidColumnPrinter() override {} 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  class BooleanColumnPrinter: public ColumnPrinter { 
-  private: 
-    const int64_t* data; 
-  public: 
-    BooleanColumnPrinter(std::string&); 
-    ~BooleanColumnPrinter() override {} 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  class LongColumnPrinter: public ColumnPrinter { 
-  private: 
-    const int64_t* data; 
-  public: 
-    LongColumnPrinter(std::string&); 
-    ~LongColumnPrinter() override {} 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  class DoubleColumnPrinter: public ColumnPrinter { 
-  private: 
-    const double* data; 
-    const bool isFloat; 
- 
-  public: 
-    DoubleColumnPrinter(std::string&, const Type& type); 
-    virtual ~DoubleColumnPrinter() override {} 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  class TimestampColumnPrinter: public ColumnPrinter { 
-  private: 
-    const int64_t* seconds; 
-    const int64_t* nanoseconds; 
- 
-  public: 
-    TimestampColumnPrinter(std::string&); 
-    ~TimestampColumnPrinter() override {} 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  class DateColumnPrinter: public ColumnPrinter { 
-  private: 
-    const int64_t* data; 
- 
-  public: 
-    DateColumnPrinter(std::string&); 
-    ~DateColumnPrinter() override {} 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  class Decimal64ColumnPrinter: public ColumnPrinter { 
-  private: 
-    const int64_t* data; 
-    int32_t scale; 
-  public: 
-    Decimal64ColumnPrinter(std::string&); 
-    ~Decimal64ColumnPrinter() override {} 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  class Decimal128ColumnPrinter: public ColumnPrinter { 
-  private: 
-    const Int128* data; 
-    int32_t scale; 
-  public: 
-    Decimal128ColumnPrinter(std::string&); 
-    ~Decimal128ColumnPrinter() override {} 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  class StringColumnPrinter: public ColumnPrinter { 
-  private: 
-    const char* const * start; 
-    const int64_t* length; 
-  public: 
-    StringColumnPrinter(std::string&); 
-    virtual ~StringColumnPrinter() override {} 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  class BinaryColumnPrinter: public ColumnPrinter { 
-  private: 
-    const char* const * start; 
-    const int64_t* length; 
-  public: 
-    BinaryColumnPrinter(std::string&); 
-    virtual ~BinaryColumnPrinter() override {} 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  class ListColumnPrinter: public ColumnPrinter { 
-  private: 
-    const int64_t* offsets; 
-    std::unique_ptr<ColumnPrinter> elementPrinter; 
- 
-  public: 
-    ListColumnPrinter(std::string&, const Type& type); 
-    virtual ~ListColumnPrinter() override {} 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  class MapColumnPrinter: public ColumnPrinter { 
-  private: 
-    const int64_t* offsets; 
-    std::unique_ptr<ColumnPrinter> keyPrinter; 
-    std::unique_ptr<ColumnPrinter> elementPrinter; 
- 
-  public: 
-    MapColumnPrinter(std::string&, const Type& type); 
-    virtual ~MapColumnPrinter() override {} 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  class UnionColumnPrinter: public ColumnPrinter { 
-  private: 
-    const unsigned char *tags; 
-    const uint64_t* offsets; 
-    std::vector<ColumnPrinter*> fieldPrinter; 
- 
-  public: 
-    UnionColumnPrinter(std::string&, const Type& type); 
-    virtual ~UnionColumnPrinter() override; 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  class StructColumnPrinter: public ColumnPrinter { 
-  private: 
-    std::vector<ColumnPrinter*> fieldPrinter; 
-    std::vector<std::string> fieldNames; 
-  public: 
-    StructColumnPrinter(std::string&, const Type& type); 
-    virtual ~StructColumnPrinter() override; 
-    void printRow(uint64_t rowId) override; 
-    void reset(const ColumnVectorBatch& batch) override; 
-  }; 
- 
-  void writeChar(std::string& file, char ch) { 
-    file += ch; 
-  } 
- 
-  void writeString(std::string& file, const char *ptr) { 
-    size_t len = strlen(ptr); 
-    file.append(ptr, len); 
-  } 
- 
-  ColumnPrinter::ColumnPrinter(std::string& _buffer 
-                               ): buffer(_buffer) { 
-    notNull = nullptr; 
-    hasNulls = false; 
-  } 
- 
-  ColumnPrinter::~ColumnPrinter() { 
-    // PASS 
-  } 
- 
-  void ColumnPrinter::reset(const ColumnVectorBatch& batch) { 
-    hasNulls = batch.hasNulls; 
-    if (hasNulls) { 
-      notNull = batch.notNull.data(); 
-    } else { 
-      notNull = nullptr ; 
-    } 
-  } 
- 
-  std::unique_ptr<ColumnPrinter> createColumnPrinter(std::string& buffer, 
-                                                     const Type* type) { 
-    ColumnPrinter *result = nullptr; 
-    if (type == nullptr) { 
-      result = new VoidColumnPrinter(buffer); 
-    } else { 
-      switch(static_cast<int64_t>(type->getKind())) { 
-      case BOOLEAN: 
-        result = new BooleanColumnPrinter(buffer); 
-        break; 
- 
-      case BYTE: 
-      case SHORT: 
-      case INT: 
-      case LONG: 
-        result = new LongColumnPrinter(buffer); 
-        break; 
- 
-      case FLOAT: 
-      case DOUBLE: 
-        result = new DoubleColumnPrinter(buffer, *type); 
-        break; 
- 
-      case STRING: 
-      case VARCHAR : 
-      case CHAR: 
-        result = new StringColumnPrinter(buffer); 
-        break; 
- 
-      case BINARY: 
-        result = new BinaryColumnPrinter(buffer); 
-        break; 
- 
-      case TIMESTAMP: 
-        result = new TimestampColumnPrinter(buffer); 
-        break; 
- 
-      case LIST: 
-        result = new ListColumnPrinter(buffer, *type); 
-        break; 
- 
-      case MAP: 
-        result = new MapColumnPrinter(buffer, *type); 
-        break; 
- 
-      case STRUCT: 
-        result = new StructColumnPrinter(buffer, *type); 
-        break; 
- 
-      case DECIMAL: 
-        if (type->getPrecision() == 0 || type->getPrecision() > 18) { 
-          result = new Decimal128ColumnPrinter(buffer); 
-        } else { 
-          result = new Decimal64ColumnPrinter(buffer); 
-        } 
-        break; 
- 
-      case DATE: 
-        result = new DateColumnPrinter(buffer); 
-        break; 
- 
-      case UNION: 
-        result = new UnionColumnPrinter(buffer, *type); 
-        break; 
- 
-      default: 
-        throw std::logic_error("unknown batch type"); 
-      } 
-    } 
-    return std::unique_ptr<ColumnPrinter>(result); 
-  } 
- 
-  VoidColumnPrinter::VoidColumnPrinter(std::string& _buffer 
-                                       ): ColumnPrinter(_buffer) { 
-    // PASS 
-  } 
- 
-  void VoidColumnPrinter::reset(const  ColumnVectorBatch&) { 
-    // PASS 
-  } 
- 
-  void VoidColumnPrinter::printRow(uint64_t) { 
-    writeString(buffer, "null"); 
-  } 
- 
-  LongColumnPrinter::LongColumnPrinter(std::string& _buffer 
-                                       ): ColumnPrinter(_buffer), 
-                                          data(nullptr) { 
-    // PASS 
-  } 
- 
-  void LongColumnPrinter::reset(const  ColumnVectorBatch& batch) { 
-    ColumnPrinter::reset(batch); 
-    data = dynamic_cast<const LongVectorBatch&>(batch).data.data(); 
-  } 
- 
-  void LongColumnPrinter::printRow(uint64_t rowId) { 
-    if (hasNulls && !notNull[rowId]) { 
-      writeString(buffer, "null"); 
-    } else { 
-      char numBuffer[64]; 
-      snprintf(numBuffer, sizeof(numBuffer), "%" INT64_FORMAT_STRING "d", 
-               static_cast<int64_t >(data[rowId])); 
-      writeString(buffer, numBuffer); 
-    } 
-  } 
- 
-  DoubleColumnPrinter::DoubleColumnPrinter(std::string& _buffer, 
-                                           const Type& type 
-                                           ): ColumnPrinter(_buffer), 
-                                              data(nullptr), 
-                                              isFloat(type.getKind() == FLOAT){ 
-    // PASS 
-  } 
- 
-  void DoubleColumnPrinter::reset(const  ColumnVectorBatch& batch) { 
-    ColumnPrinter::reset(batch); 
-    data = dynamic_cast<const DoubleVectorBatch&>(batch).data.data(); 
-  } 
- 
-  void DoubleColumnPrinter::printRow(uint64_t rowId) { 
-    if (hasNulls && !notNull[rowId]) { 
-      writeString(buffer, "null"); 
-    } else { 
-      char numBuffer[64]; 
-      snprintf(numBuffer, sizeof(numBuffer), isFloat ? "%.7g" : "%.14g", 
-               data[rowId]); 
-      writeString(buffer, numBuffer); 
-    } 
-  } 
- 
-  Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& _buffer 
-                                                 ): ColumnPrinter(_buffer), 
-                                                    data(nullptr), 
-                                                    scale(0) { 
-    // PASS 
-  } 
- 
-  void Decimal64ColumnPrinter::reset(const  ColumnVectorBatch& batch) { 
-    ColumnPrinter::reset(batch); 
-    data = dynamic_cast<const Decimal64VectorBatch&>(batch).values.data(); 
-    scale = dynamic_cast<const Decimal64VectorBatch&>(batch).scale; 
-  } 
- 
-  std::string toDecimalString(int64_t value, int32_t scale) { 
-    std::stringstream buffer; 
-    if (scale == 0) { 
-      buffer << value; 
-      return buffer.str(); 
-    } 
-    std::string sign = ""; 
-    if (value < 0) { 
-      sign = "-"; 
-      value = -value; 
-    } 
-    buffer << value; 
-    std::string str = buffer.str(); 
-    int32_t len = static_cast<int32_t>(str.length()); 
-    if (len > scale) { 
-      return sign + str.substr(0, static_cast<size_t>(len - scale)) + "." + 
-        str.substr(static_cast<size_t>(len - scale), 
-                   static_cast<size_t>(scale)); 
-    } else if (len == scale) { 
-      return sign + "0." + str; 
-    } else { 
-      std::string result = sign + "0."; 
-      for(int32_t i=0; i < scale - len; ++i) { 
-        result += "0"; 
-      } 
-      return result + str; 
-    } 
-  } 
- 
-  void Decimal64ColumnPrinter::printRow(uint64_t rowId) { 
-    if (hasNulls && !notNull[rowId]) { 
-      writeString(buffer, "null"); 
-    } else { 
-      writeString(buffer, toDecimalString(data[rowId], scale).c_str()); 
-    } 
-  } 
- 
-  Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& _buffer 
-                                                   ): ColumnPrinter(_buffer), 
-                                                      data(nullptr), 
-                                                      scale(0) { 
-     // PASS 
-   } 
- 
-   void Decimal128ColumnPrinter::reset(const  ColumnVectorBatch& batch) { 
-     ColumnPrinter::reset(batch); 
-     data = dynamic_cast<const Decimal128VectorBatch&>(batch).values.data(); 
-     scale = dynamic_cast<const Decimal128VectorBatch&>(batch).scale; 
-   } 
- 
-   void Decimal128ColumnPrinter::printRow(uint64_t rowId) { 
-     if (hasNulls && !notNull[rowId]) { 
-       writeString(buffer, "null"); 
-     } else { 
-       writeString(buffer, data[rowId].toDecimalString(scale).c_str()); 
-     } 
-   } 
- 
-  StringColumnPrinter::StringColumnPrinter(std::string& _buffer 
-                                           ): ColumnPrinter(_buffer), 
-                                              start(nullptr), 
-                                              length(nullptr) { 
-    // PASS 
-  } 
- 
-  void StringColumnPrinter::reset(const ColumnVectorBatch& batch) { 
-    ColumnPrinter::reset(batch); 
-    start = dynamic_cast<const StringVectorBatch&>(batch).data.data(); 
-    length = dynamic_cast<const StringVectorBatch&>(batch).length.data(); 
-  } 
- 
-  void StringColumnPrinter::printRow(uint64_t rowId) { 
-    if (hasNulls && !notNull[rowId]) { 
-      writeString(buffer, "null"); 
-    } else { 
-      writeChar(buffer, '"'); 
-      for(int64_t i=0; i < length[rowId]; ++i) { 
-        char ch = static_cast<char>(start[rowId][i]); 
-        switch (ch) { 
-        case '\\': 
-          writeString(buffer, "\\\\"); 
-          break; 
-        case '\b': 
-          writeString(buffer, "\\b"); 
-          break; 
-        case '\f': 
-          writeString(buffer, "\\f"); 
-          break; 
-        case '\n': 
-          writeString(buffer, "\\n"); 
-          break; 
-        case '\r': 
-          writeString(buffer, "\\r"); 
-          break; 
-        case '\t': 
-          writeString(buffer, "\\t"); 
-          break; 
-        case '"': 
-          writeString(buffer, "\\\""); 
-          break; 
-        default: 
-          writeChar(buffer, ch); 
-          break; 
-        } 
-      } 
-      writeChar(buffer, '"'); 
-    } 
-  } 
- 
-  ListColumnPrinter::ListColumnPrinter(std::string& _buffer, 
-                                       const Type& type 
-                                       ): ColumnPrinter(_buffer), 
-                                          offsets(nullptr) { 
-    elementPrinter = createColumnPrinter(buffer, type.getSubtype(0)); 
-  } 
- 
-  void ListColumnPrinter::reset(const  ColumnVectorBatch& batch) { 
-    ColumnPrinter::reset(batch); 
-    offsets = dynamic_cast<const ListVectorBatch&>(batch).offsets.data(); 
-    elementPrinter->reset(*dynamic_cast<const ListVectorBatch&>(batch). 
-                          elements); 
-  } 
- 
-  void ListColumnPrinter::printRow(uint64_t rowId) { 
-    if (hasNulls && !notNull[rowId]) { 
-      writeString(buffer, "null"); 
-    } else { 
-      writeChar(buffer, '['); 
-      for(int64_t i=offsets[rowId]; i < offsets[rowId+1]; ++i) { 
-        if (i != offsets[rowId]) { 
-          writeString(buffer, ", "); 
-        } 
-        elementPrinter->printRow(static_cast<uint64_t>(i)); 
-      } 
-      writeChar(buffer, ']'); 
-    } 
-  } 
- 
-  MapColumnPrinter::MapColumnPrinter(std::string& _buffer, 
-                                     const Type& type 
-                                     ): ColumnPrinter(_buffer), 
-                                        offsets(nullptr) { 
-    keyPrinter = createColumnPrinter(buffer, type.getSubtype(0)); 
-    elementPrinter = createColumnPrinter(buffer, type.getSubtype(1)); 
-  } 
- 
-  void MapColumnPrinter::reset(const  ColumnVectorBatch& batch) { 
-    ColumnPrinter::reset(batch); 
-    const MapVectorBatch& myBatch = dynamic_cast<const MapVectorBatch&>(batch); 
-    offsets = myBatch.offsets.data(); 
-    keyPrinter->reset(*myBatch.keys); 
-    elementPrinter->reset(*myBatch.elements); 
-  } 
- 
-  void MapColumnPrinter::printRow(uint64_t rowId) { 
-    if (hasNulls && !notNull[rowId]) { 
-      writeString(buffer, "null"); 
-    } else { 
-      writeChar(buffer, '['); 
-      for(int64_t i=offsets[rowId]; i < offsets[rowId+1]; ++i) { 
-        if (i != offsets[rowId]) { 
-          writeString(buffer, ", "); 
-        } 
-        writeString(buffer, "{\"key\": "); 
-        keyPrinter->printRow(static_cast<uint64_t>(i)); 
-        writeString(buffer, ", \"value\": "); 
-        elementPrinter->printRow(static_cast<uint64_t>(i)); 
-        writeChar(buffer, '}'); 
-      } 
-      writeChar(buffer, ']'); 
-    } 
-  } 
- 
-  UnionColumnPrinter::UnionColumnPrinter(std::string& _buffer, 
-                                           const Type& type 
-                                         ): ColumnPrinter(_buffer), 
-                                            tags(nullptr), 
-                                            offsets(nullptr) { 
-    for(unsigned int i=0; i < type.getSubtypeCount(); ++i) { 
-      fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i)) 
-                             .release()); 
-    } 
-  } 
- 
-  UnionColumnPrinter::~UnionColumnPrinter() { 
-    for (size_t i = 0; i < fieldPrinter.size(); i++) { 
-      delete fieldPrinter[i]; 
-    } 
-  } 
- 
-  void UnionColumnPrinter::reset(const ColumnVectorBatch& batch) { 
-    ColumnPrinter::reset(batch); 
-    const UnionVectorBatch& unionBatch = 
-      dynamic_cast<const UnionVectorBatch&>(batch); 
-    tags = unionBatch.tags.data(); 
-    offsets = unionBatch.offsets.data(); 
-    for(size_t i=0; i < fieldPrinter.size(); ++i) { 
-      fieldPrinter[i]->reset(*(unionBatch.children[i])); 
-    } 
-  } 
- 
-  void UnionColumnPrinter::printRow(uint64_t rowId) { 
-    if (hasNulls && !notNull[rowId]) { 
-      writeString(buffer, "null"); 
-    } else { 
-      writeString(buffer, "{\"tag\": "); 
-      char numBuffer[64]; 
-      snprintf(numBuffer, sizeof(numBuffer), "%" INT64_FORMAT_STRING "d", 
-               static_cast<int64_t>(tags[rowId])); 
-      writeString(buffer, numBuffer); 
-      writeString(buffer, ", \"value\": "); 
-      fieldPrinter[tags[rowId]]->printRow(offsets[rowId]); 
-      writeChar(buffer, '}'); 
-    } 
-  } 
- 
-  StructColumnPrinter::StructColumnPrinter(std::string& _buffer, 
-                                           const Type& type 
-                                           ): ColumnPrinter(_buffer) { 
-    for(unsigned int i=0; i < type.getSubtypeCount(); ++i) { 
-      fieldNames.push_back(type.getFieldName(i)); 
-      fieldPrinter.push_back(createColumnPrinter(buffer, 
-                                                 type.getSubtype(i)) 
-                             .release()); 
-    } 
-  } 
- 
-  StructColumnPrinter::~StructColumnPrinter() { 
-    for (size_t i = 0; i < fieldPrinter.size(); i++) { 
-      delete fieldPrinter[i]; 
-    } 
-  } 
- 
-  void StructColumnPrinter::reset(const ColumnVectorBatch& batch) { 
-    ColumnPrinter::reset(batch); 
-    const StructVectorBatch& structBatch = 
-      dynamic_cast<const StructVectorBatch&>(batch); 
-    for(size_t i=0; i < fieldPrinter.size(); ++i) { 
-      fieldPrinter[i]->reset(*(structBatch.fields[i])); 
-    } 
-  } 
- 
-  void StructColumnPrinter::printRow(uint64_t rowId) { 
-    if (hasNulls && !notNull[rowId]) { 
-      writeString(buffer, "null"); 
-    } else { 
-      writeChar(buffer, '{'); 
-      for(unsigned int i=0; i < fieldPrinter.size(); ++i) { 
-        if (i != 0) { 
-          writeString(buffer, ", "); 
-        } 
-        writeChar(buffer, '"'); 
-        writeString(buffer, fieldNames[i].c_str()); 
-        writeString(buffer, "\": "); 
-        fieldPrinter[i]->printRow(rowId); 
-      } 
-      writeChar(buffer, '}'); 
-    } 
-  } 
- 
-  DateColumnPrinter::DateColumnPrinter(std::string& _buffer 
-                                       ): ColumnPrinter(_buffer), 
-                                          data(nullptr) { 
-    // PASS 
-  } 
- 
-  void DateColumnPrinter::printRow(uint64_t rowId) { 
-    if (hasNulls && !notNull[rowId]) { 
-      writeString(buffer, "null"); 
-    } else { 
-      const time_t timeValue = data[rowId] * 24 * 60 * 60; 
-      struct tm tmValue; 
-      gmtime_r(&timeValue, &tmValue); 
-      char timeBuffer[11]; 
-      strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d", &tmValue); 
-      writeChar(buffer, '"'); 
-      writeString(buffer, timeBuffer); 
-      writeChar(buffer, '"'); 
-    } 
-  } 
- 
-  void DateColumnPrinter::reset(const ColumnVectorBatch& batch) { 
-    ColumnPrinter::reset(batch); 
-    data = dynamic_cast<const LongVectorBatch&>(batch).data.data(); 
-  } 
- 
-  BooleanColumnPrinter::BooleanColumnPrinter(std::string& _buffer 
-                                             ): ColumnPrinter(_buffer), 
-                                                data(nullptr) { 
-    // PASS 
-  } 
- 
-  void BooleanColumnPrinter::printRow(uint64_t rowId) { 
-    if (hasNulls && !notNull[rowId]) { 
-      writeString(buffer, "null"); 
-    } else { 
-      writeString(buffer, (data[rowId] ? "true" : "false")); 
-    } 
-  } 
- 
-  void BooleanColumnPrinter::reset(const ColumnVectorBatch& batch) { 
-    ColumnPrinter::reset(batch); 
-    data = dynamic_cast<const LongVectorBatch&>(batch).data.data(); 
-  } 
- 
-  BinaryColumnPrinter::BinaryColumnPrinter(std::string& _buffer 
-                                           ): ColumnPrinter(_buffer), 
-                                              start(nullptr), 
-                                              length(nullptr) { 
-    // PASS 
-  } 
- 
-  void BinaryColumnPrinter::printRow(uint64_t rowId) { 
-    if (hasNulls && !notNull[rowId]) { 
-      writeString(buffer, "null"); 
-    } else { 
-      writeChar(buffer, '['); 
-      for(int64_t i=0; i < length[rowId]; ++i) { 
-        if (i != 0) { 
-          writeString(buffer, ", "); 
-        } 
-        char numBuffer[64]; 
-        snprintf(numBuffer, sizeof(numBuffer), "%d", 
-                 (static_cast<const int>(start[rowId][i]) & 0xff)); 
-        writeString(buffer, numBuffer); 
-      } 
-      writeChar(buffer, ']'); 
-    } 
-  } 
- 
-  void BinaryColumnPrinter::reset(const ColumnVectorBatch& batch) { 
-    ColumnPrinter::reset(batch); 
-    start = dynamic_cast<const StringVectorBatch&>(batch).data.data(); 
-    length = dynamic_cast<const StringVectorBatch&>(batch).length.data(); 
-  } 
- 
-  TimestampColumnPrinter::TimestampColumnPrinter(std::string& _buffer 
-                                                 ): ColumnPrinter(_buffer), 
-                                                    seconds(nullptr), 
-                                                    nanoseconds(nullptr) { 
-    // PASS 
-  } 
- 
-  void TimestampColumnPrinter::printRow(uint64_t rowId) { 
-    const int64_t NANO_DIGITS = 9; 
-    if (hasNulls && !notNull[rowId]) { 
-      writeString(buffer, "null"); 
-    } else { 
-      int64_t nanos = nanoseconds[rowId]; 
-      time_t secs = static_cast<time_t>(seconds[rowId]); 
-      struct tm tmValue; 
-      gmtime_r(&secs, &tmValue); 
-      char timeBuffer[20]; 
-      strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); 
-      writeChar(buffer, '"'); 
-      writeString(buffer, timeBuffer); 
-      writeChar(buffer, '.'); 
-      // remove trailing zeros off the back of the nanos value. 
-      int64_t zeroDigits = 0; 
-      if (nanos == 0) { 
-        zeroDigits = 8; 
-      } else { 
-        while (nanos % 10 == 0) { 
-          nanos /= 10; 
-          zeroDigits += 1; 
-        } 
-      } 
-      char numBuffer[64]; 
-      snprintf(numBuffer, sizeof(numBuffer), 
-               "%0*" INT64_FORMAT_STRING "d\"", 
-               static_cast<int>(NANO_DIGITS - zeroDigits), 
-               static_cast<int64_t >(nanos)); 
-      writeString(buffer, numBuffer); 
-    } 
-  } 
- 
-  void TimestampColumnPrinter::reset(const ColumnVectorBatch& batch) { 
-    ColumnPrinter::reset(batch); 
-    const TimestampVectorBatch& ts = 
-      dynamic_cast<const TimestampVectorBatch&>(batch); 
-    seconds = ts.data.data(); 
-    nanoseconds = ts.nanoseconds.data(); 
-  } 
-} 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/ColumnPrinter.hh"
+#include "orc/orc-config.hh"
+
+#include "Adaptor.hh"
+
+#include <limits>
+#include <sstream>
+#include <stdexcept>
+#include <time.h>
+#include <typeinfo>
+
+#ifdef __clang__
+  #pragma clang diagnostic ignored "-Wformat-security"
+#endif
+
+namespace orc {
+
+  class VoidColumnPrinter: public ColumnPrinter {
+  public:
+    VoidColumnPrinter(std::string&);
+    ~VoidColumnPrinter() override {}
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  class BooleanColumnPrinter: public ColumnPrinter {
+  private:
+    const int64_t* data;
+  public:
+    BooleanColumnPrinter(std::string&);
+    ~BooleanColumnPrinter() override {}
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  class LongColumnPrinter: public ColumnPrinter {
+  private:
+    const int64_t* data;
+  public:
+    LongColumnPrinter(std::string&);
+    ~LongColumnPrinter() override {}
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  class DoubleColumnPrinter: public ColumnPrinter {
+  private:
+    const double* data;
+    const bool isFloat;
+
+  public:
+    DoubleColumnPrinter(std::string&, const Type& type);
+    virtual ~DoubleColumnPrinter() override {}
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  class TimestampColumnPrinter: public ColumnPrinter {
+  private:
+    const int64_t* seconds;
+    const int64_t* nanoseconds;
+
+  public:
+    TimestampColumnPrinter(std::string&);
+    ~TimestampColumnPrinter() override {}
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  class DateColumnPrinter: public ColumnPrinter {
+  private:
+    const int64_t* data;
+
+  public:
+    DateColumnPrinter(std::string&);
+    ~DateColumnPrinter() override {}
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  class Decimal64ColumnPrinter: public ColumnPrinter {
+  private:
+    const int64_t* data;
+    int32_t scale;
+  public:
+    Decimal64ColumnPrinter(std::string&);
+    ~Decimal64ColumnPrinter() override {}
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  class Decimal128ColumnPrinter: public ColumnPrinter {
+  private:
+    const Int128* data;
+    int32_t scale;
+  public:
+    Decimal128ColumnPrinter(std::string&);
+    ~Decimal128ColumnPrinter() override {}
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  class StringColumnPrinter: public ColumnPrinter {
+  private:
+    const char* const * start;
+    const int64_t* length;
+  public:
+    StringColumnPrinter(std::string&);
+    virtual ~StringColumnPrinter() override {}
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  class BinaryColumnPrinter: public ColumnPrinter {
+  private:
+    const char* const * start;
+    const int64_t* length;
+  public:
+    BinaryColumnPrinter(std::string&);
+    virtual ~BinaryColumnPrinter() override {}
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  class ListColumnPrinter: public ColumnPrinter {
+  private:
+    const int64_t* offsets;
+    std::unique_ptr<ColumnPrinter> elementPrinter;
+
+  public:
+    ListColumnPrinter(std::string&, const Type& type);
+    virtual ~ListColumnPrinter() override {}
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  class MapColumnPrinter: public ColumnPrinter {
+  private:
+    const int64_t* offsets;
+    std::unique_ptr<ColumnPrinter> keyPrinter;
+    std::unique_ptr<ColumnPrinter> elementPrinter;
+
+  public:
+    MapColumnPrinter(std::string&, const Type& type);
+    virtual ~MapColumnPrinter() override {}
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  class UnionColumnPrinter: public ColumnPrinter {
+  private:
+    const unsigned char *tags;
+    const uint64_t* offsets;
+    std::vector<ColumnPrinter*> fieldPrinter;
+
+  public:
+    UnionColumnPrinter(std::string&, const Type& type);
+    virtual ~UnionColumnPrinter() override;
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  class StructColumnPrinter: public ColumnPrinter {
+  private:
+    std::vector<ColumnPrinter*> fieldPrinter;
+    std::vector<std::string> fieldNames;
+  public:
+    StructColumnPrinter(std::string&, const Type& type);
+    virtual ~StructColumnPrinter() override;
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
+  void writeChar(std::string& file, char ch) {
+    file += ch;
+  }
+
+  void writeString(std::string& file, const char *ptr) {
+    size_t len = strlen(ptr);
+    file.append(ptr, len);
+  }
+
+  ColumnPrinter::ColumnPrinter(std::string& _buffer
+                               ): buffer(_buffer) {
+    notNull = nullptr;
+    hasNulls = false;
+  }
+
+  ColumnPrinter::~ColumnPrinter() {
+    // PASS
+  }
+
+  void ColumnPrinter::reset(const ColumnVectorBatch& batch) {
+    hasNulls = batch.hasNulls;
+    if (hasNulls) {
+      notNull = batch.notNull.data();
+    } else {
+      notNull = nullptr ;
+    }
+  }
+
+  std::unique_ptr<ColumnPrinter> createColumnPrinter(std::string& buffer,
+                                                     const Type* type) {
+    ColumnPrinter *result = nullptr;
+    if (type == nullptr) {
+      result = new VoidColumnPrinter(buffer);
+    } else {
+      switch(static_cast<int64_t>(type->getKind())) {
+      case BOOLEAN:
+        result = new BooleanColumnPrinter(buffer);
+        break;
+
+      case BYTE:
+      case SHORT:
+      case INT:
+      case LONG:
+        result = new LongColumnPrinter(buffer);
+        break;
+
+      case FLOAT:
+      case DOUBLE:
+        result = new DoubleColumnPrinter(buffer, *type);
+        break;
+
+      case STRING:
+      case VARCHAR :
+      case CHAR:
+        result = new StringColumnPrinter(buffer);
+        break;
+
+      case BINARY:
+        result = new BinaryColumnPrinter(buffer);
+        break;
+
+      case TIMESTAMP:
+        result = new TimestampColumnPrinter(buffer);
+        break;
+
+      case LIST:
+        result = new ListColumnPrinter(buffer, *type);
+        break;
+
+      case MAP:
+        result = new MapColumnPrinter(buffer, *type);
+        break;
+
+      case STRUCT:
+        result = new StructColumnPrinter(buffer, *type);
+        break;
+
+      case DECIMAL:
+        if (type->getPrecision() == 0 || type->getPrecision() > 18) {
+          result = new Decimal128ColumnPrinter(buffer);
+        } else {
+          result = new Decimal64ColumnPrinter(buffer);
+        }
+        break;
+
+      case DATE:
+        result = new DateColumnPrinter(buffer);
+        break;
+
+      case UNION:
+        result = new UnionColumnPrinter(buffer, *type);
+        break;
+
+      default:
+        throw std::logic_error("unknown batch type");
+      }
+    }
+    return std::unique_ptr<ColumnPrinter>(result);
+  }
+
+  VoidColumnPrinter::VoidColumnPrinter(std::string& _buffer
+                                       ): ColumnPrinter(_buffer) {
+    // PASS
+  }
+
+  void VoidColumnPrinter::reset(const  ColumnVectorBatch&) {
+    // PASS
+  }
+
+  void VoidColumnPrinter::printRow(uint64_t) {
+    writeString(buffer, "null");
+  }
+
+  LongColumnPrinter::LongColumnPrinter(std::string& _buffer
+                                       ): ColumnPrinter(_buffer),
+                                          data(nullptr) {
+    // PASS
+  }
+
+  void LongColumnPrinter::reset(const  ColumnVectorBatch& batch) {
+    ColumnPrinter::reset(batch);
+    data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
+  }
+
+  void LongColumnPrinter::printRow(uint64_t rowId) {
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      char numBuffer[64];
+      snprintf(numBuffer, sizeof(numBuffer), "%" INT64_FORMAT_STRING "d",
+               static_cast<int64_t >(data[rowId]));
+      writeString(buffer, numBuffer);
+    }
+  }
+
+  DoubleColumnPrinter::DoubleColumnPrinter(std::string& _buffer,
+                                           const Type& type
+                                           ): ColumnPrinter(_buffer),
+                                              data(nullptr),
+                                              isFloat(type.getKind() == FLOAT){
+    // PASS
+  }
+
+  void DoubleColumnPrinter::reset(const  ColumnVectorBatch& batch) {
+    ColumnPrinter::reset(batch);
+    data = dynamic_cast<const DoubleVectorBatch&>(batch).data.data();
+  }
+
+  void DoubleColumnPrinter::printRow(uint64_t rowId) {
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      char numBuffer[64];
+      snprintf(numBuffer, sizeof(numBuffer), isFloat ? "%.7g" : "%.14g",
+               data[rowId]);
+      writeString(buffer, numBuffer);
+    }
+  }
+
+  Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& _buffer
+                                                 ): ColumnPrinter(_buffer),
+                                                    data(nullptr),
+                                                    scale(0) {
+    // PASS
+  }
+
+  void Decimal64ColumnPrinter::reset(const  ColumnVectorBatch& batch) {
+    ColumnPrinter::reset(batch);
+    data = dynamic_cast<const Decimal64VectorBatch&>(batch).values.data();
+    scale = dynamic_cast<const Decimal64VectorBatch&>(batch).scale;
+  }
+
+  std::string toDecimalString(int64_t value, int32_t scale) {
+    std::stringstream buffer;
+    if (scale == 0) {
+      buffer << value;
+      return buffer.str();
+    }
+    std::string sign = "";
+    if (value < 0) {
+      sign = "-";
+      value = -value;
+    }
+    buffer << value;
+    std::string str = buffer.str();
+    int32_t len = static_cast<int32_t>(str.length());
+    if (len > scale) {
+      return sign + str.substr(0, static_cast<size_t>(len - scale)) + "." +
+        str.substr(static_cast<size_t>(len - scale),
+                   static_cast<size_t>(scale));
+    } else if (len == scale) {
+      return sign + "0." + str;
+    } else {
+      std::string result = sign + "0.";
+      for(int32_t i=0; i < scale - len; ++i) {
+        result += "0";
+      }
+      return result + str;
+    }
+  }
+
+  void Decimal64ColumnPrinter::printRow(uint64_t rowId) {
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      writeString(buffer, toDecimalString(data[rowId], scale).c_str());
+    }
+  }
+
+  Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& _buffer
+                                                   ): ColumnPrinter(_buffer),
+                                                      data(nullptr),
+                                                      scale(0) {
+     // PASS
+   }
+
+   void Decimal128ColumnPrinter::reset(const  ColumnVectorBatch& batch) {
+     ColumnPrinter::reset(batch);
+     data = dynamic_cast<const Decimal128VectorBatch&>(batch).values.data();
+     scale = dynamic_cast<const Decimal128VectorBatch&>(batch).scale;
+   }
+
+   void Decimal128ColumnPrinter::printRow(uint64_t rowId) {
+     if (hasNulls && !notNull[rowId]) {
+       writeString(buffer, "null");
+     } else {
+       writeString(buffer, data[rowId].toDecimalString(scale).c_str());
+     }
+   }
+
+  StringColumnPrinter::StringColumnPrinter(std::string& _buffer
+                                           ): ColumnPrinter(_buffer),
+                                              start(nullptr),
+                                              length(nullptr) {
+    // PASS
+  }
+
+  void StringColumnPrinter::reset(const ColumnVectorBatch& batch) {
+    ColumnPrinter::reset(batch);
+    start = dynamic_cast<const StringVectorBatch&>(batch).data.data();
+    length = dynamic_cast<const StringVectorBatch&>(batch).length.data();
+  }
+
+  void StringColumnPrinter::printRow(uint64_t rowId) {
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      writeChar(buffer, '"');
+      for(int64_t i=0; i < length[rowId]; ++i) {
+        char ch = static_cast<char>(start[rowId][i]);
+        switch (ch) {
+        case '\\':
+          writeString(buffer, "\\\\");
+          break;
+        case '\b':
+          writeString(buffer, "\\b");
+          break;
+        case '\f':
+          writeString(buffer, "\\f");
+          break;
+        case '\n':
+          writeString(buffer, "\\n");
+          break;
+        case '\r':
+          writeString(buffer, "\\r");
+          break;
+        case '\t':
+          writeString(buffer, "\\t");
+          break;
+        case '"':
+          writeString(buffer, "\\\"");
+          break;
+        default:
+          writeChar(buffer, ch);
+          break;
+        }
+      }
+      writeChar(buffer, '"');
+    }
+  }
+
+  ListColumnPrinter::ListColumnPrinter(std::string& _buffer,
+                                       const Type& type
+                                       ): ColumnPrinter(_buffer),
+                                          offsets(nullptr) {
+    elementPrinter = createColumnPrinter(buffer, type.getSubtype(0));
+  }
+
+  void ListColumnPrinter::reset(const  ColumnVectorBatch& batch) {
+    ColumnPrinter::reset(batch);
+    offsets = dynamic_cast<const ListVectorBatch&>(batch).offsets.data();
+    elementPrinter->reset(*dynamic_cast<const ListVectorBatch&>(batch).
+                          elements);
+  }
+
+  void ListColumnPrinter::printRow(uint64_t rowId) {
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      writeChar(buffer, '[');
+      for(int64_t i=offsets[rowId]; i < offsets[rowId+1]; ++i) {
+        if (i != offsets[rowId]) {
+          writeString(buffer, ", ");
+        }
+        elementPrinter->printRow(static_cast<uint64_t>(i));
+      }
+      writeChar(buffer, ']');
+    }
+  }
+
+  MapColumnPrinter::MapColumnPrinter(std::string& _buffer,
+                                     const Type& type
+                                     ): ColumnPrinter(_buffer),
+                                        offsets(nullptr) {
+    keyPrinter = createColumnPrinter(buffer, type.getSubtype(0));
+    elementPrinter = createColumnPrinter(buffer, type.getSubtype(1));
+  }
+
+  void MapColumnPrinter::reset(const  ColumnVectorBatch& batch) {
+    ColumnPrinter::reset(batch);
+    const MapVectorBatch& myBatch = dynamic_cast<const MapVectorBatch&>(batch);
+    offsets = myBatch.offsets.data();
+    keyPrinter->reset(*myBatch.keys);
+    elementPrinter->reset(*myBatch.elements);
+  }
+
+  void MapColumnPrinter::printRow(uint64_t rowId) {
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      writeChar(buffer, '[');
+      for(int64_t i=offsets[rowId]; i < offsets[rowId+1]; ++i) {
+        if (i != offsets[rowId]) {
+          writeString(buffer, ", ");
+        }
+        writeString(buffer, "{\"key\": ");
+        keyPrinter->printRow(static_cast<uint64_t>(i));
+        writeString(buffer, ", \"value\": ");
+        elementPrinter->printRow(static_cast<uint64_t>(i));
+        writeChar(buffer, '}');
+      }
+      writeChar(buffer, ']');
+    }
+  }
+
+  UnionColumnPrinter::UnionColumnPrinter(std::string& _buffer,
+                                           const Type& type
+                                         ): ColumnPrinter(_buffer),
+                                            tags(nullptr),
+                                            offsets(nullptr) {
+    for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
+      fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i))
+                             .release());
+    }
+  }
+
+  UnionColumnPrinter::~UnionColumnPrinter() {
+    for (size_t i = 0; i < fieldPrinter.size(); i++) {
+      delete fieldPrinter[i];
+    }
+  }
+
+  void UnionColumnPrinter::reset(const ColumnVectorBatch& batch) {
+    ColumnPrinter::reset(batch);
+    const UnionVectorBatch& unionBatch =
+      dynamic_cast<const UnionVectorBatch&>(batch);
+    tags = unionBatch.tags.data();
+    offsets = unionBatch.offsets.data();
+    for(size_t i=0; i < fieldPrinter.size(); ++i) {
+      fieldPrinter[i]->reset(*(unionBatch.children[i]));
+    }
+  }
+
+  void UnionColumnPrinter::printRow(uint64_t rowId) {
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      writeString(buffer, "{\"tag\": ");
+      char numBuffer[64];
+      snprintf(numBuffer, sizeof(numBuffer), "%" INT64_FORMAT_STRING "d",
+               static_cast<int64_t>(tags[rowId]));
+      writeString(buffer, numBuffer);
+      writeString(buffer, ", \"value\": ");
+      fieldPrinter[tags[rowId]]->printRow(offsets[rowId]);
+      writeChar(buffer, '}');
+    }
+  }
+
+  StructColumnPrinter::StructColumnPrinter(std::string& _buffer,
+                                           const Type& type
+                                           ): ColumnPrinter(_buffer) {
+    for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
+      fieldNames.push_back(type.getFieldName(i));
+      fieldPrinter.push_back(createColumnPrinter(buffer,
+                                                 type.getSubtype(i))
+                             .release());
+    }
+  }
+
+  StructColumnPrinter::~StructColumnPrinter() {
+    for (size_t i = 0; i < fieldPrinter.size(); i++) {
+      delete fieldPrinter[i];
+    }
+  }
+
+  void StructColumnPrinter::reset(const ColumnVectorBatch& batch) {
+    ColumnPrinter::reset(batch);
+    const StructVectorBatch& structBatch =
+      dynamic_cast<const StructVectorBatch&>(batch);
+    for(size_t i=0; i < fieldPrinter.size(); ++i) {
+      fieldPrinter[i]->reset(*(structBatch.fields[i]));
+    }
+  }
+
+  void StructColumnPrinter::printRow(uint64_t rowId) {
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      writeChar(buffer, '{');
+      for(unsigned int i=0; i < fieldPrinter.size(); ++i) {
+        if (i != 0) {
+          writeString(buffer, ", ");
+        }
+        writeChar(buffer, '"');
+        writeString(buffer, fieldNames[i].c_str());
+        writeString(buffer, "\": ");
+        fieldPrinter[i]->printRow(rowId);
+      }
+      writeChar(buffer, '}');
+    }
+  }
+
+  DateColumnPrinter::DateColumnPrinter(std::string& _buffer
+                                       ): ColumnPrinter(_buffer),
+                                          data(nullptr) {
+    // PASS
+  }
+
+  void DateColumnPrinter::printRow(uint64_t rowId) {
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      const time_t timeValue = data[rowId] * 24 * 60 * 60;
+      struct tm tmValue;
+      gmtime_r(&timeValue, &tmValue);
+      char timeBuffer[11];
+      strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d", &tmValue);
+      writeChar(buffer, '"');
+      writeString(buffer, timeBuffer);
+      writeChar(buffer, '"');
+    }
+  }
+
+  void DateColumnPrinter::reset(const ColumnVectorBatch& batch) {
+    ColumnPrinter::reset(batch);
+    data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
+  }
+
+  BooleanColumnPrinter::BooleanColumnPrinter(std::string& _buffer
+                                             ): ColumnPrinter(_buffer),
+                                                data(nullptr) {
+    // PASS
+  }
+
+  void BooleanColumnPrinter::printRow(uint64_t rowId) {
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      writeString(buffer, (data[rowId] ? "true" : "false"));
+    }
+  }
+
+  void BooleanColumnPrinter::reset(const ColumnVectorBatch& batch) {
+    ColumnPrinter::reset(batch);
+    data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
+  }
+
+  BinaryColumnPrinter::BinaryColumnPrinter(std::string& _buffer
+                                           ): ColumnPrinter(_buffer),
+                                              start(nullptr),
+                                              length(nullptr) {
+    // PASS
+  }
+
+  void BinaryColumnPrinter::printRow(uint64_t rowId) {
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      writeChar(buffer, '[');
+      for(int64_t i=0; i < length[rowId]; ++i) {
+        if (i != 0) {
+          writeString(buffer, ", ");
+        }
+        char numBuffer[64];
+        snprintf(numBuffer, sizeof(numBuffer), "%d",
+                 (static_cast<const int>(start[rowId][i]) & 0xff));
+        writeString(buffer, numBuffer);
+      }
+      writeChar(buffer, ']');
+    }
+  }
+
+  void BinaryColumnPrinter::reset(const ColumnVectorBatch& batch) {
+    ColumnPrinter::reset(batch);
+    start = dynamic_cast<const StringVectorBatch&>(batch).data.data();
+    length = dynamic_cast<const StringVectorBatch&>(batch).length.data();
+  }
+
+  TimestampColumnPrinter::TimestampColumnPrinter(std::string& _buffer
+                                                 ): ColumnPrinter(_buffer),
+                                                    seconds(nullptr),
+                                                    nanoseconds(nullptr) {
+    // PASS
+  }
+
+  void TimestampColumnPrinter::printRow(uint64_t rowId) {
+    const int64_t NANO_DIGITS = 9;
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      int64_t nanos = nanoseconds[rowId];
+      time_t secs = static_cast<time_t>(seconds[rowId]);
+      struct tm tmValue;
+      gmtime_r(&secs, &tmValue);
+      char timeBuffer[20];
+      strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
+      writeChar(buffer, '"');
+      writeString(buffer, timeBuffer);
+      writeChar(buffer, '.');
+      // remove trailing zeros off the back of the nanos value.
+      int64_t zeroDigits = 0;
+      if (nanos == 0) {
+        zeroDigits = 8;
+      } else {
+        while (nanos % 10 == 0) {
+          nanos /= 10;
+          zeroDigits += 1;
+        }
+      }
+      char numBuffer[64];
+      snprintf(numBuffer, sizeof(numBuffer),
+               "%0*" INT64_FORMAT_STRING "d\"",
+               static_cast<int>(NANO_DIGITS - zeroDigits),
+               static_cast<int64_t >(nanos));
+      writeString(buffer, numBuffer);
+    }
+  }
+
+  void TimestampColumnPrinter::reset(const ColumnVectorBatch& batch) {
+    ColumnPrinter::reset(batch);
+    const TimestampVectorBatch& ts =
+      dynamic_cast<const TimestampVectorBatch&>(batch);
+    seconds = ts.data.data();
+    nanoseconds = ts.nanoseconds.data();
+  }
+}
diff --git a/contrib/libs/apache/orc/c++/src/ColumnReader.cc b/contrib/libs/apache/orc/c++/src/ColumnReader.cc
index aa891f5074..8cf660be11 100644
--- a/contrib/libs/apache/orc/c++/src/ColumnReader.cc
+++ b/contrib/libs/apache/orc/c++/src/ColumnReader.cc
@@ -1,1836 +1,1836 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/Int128.hh" 
- 
-#include "Adaptor.hh" 
-#include "ByteRLE.hh" 
-#include "ColumnReader.hh" 
-#include "orc/Exceptions.hh" 
-#include "RLE.hh" 
- 
-#include <math.h> 
-#include <iostream> 
- 
-namespace orc { 
- 
-  StripeStreams::~StripeStreams() { 
-    // PASS 
-  } 
- 
-  inline RleVersion convertRleVersion(proto::ColumnEncoding_Kind kind) { 
-    switch (static_cast<int64_t>(kind)) { 
-    case proto::ColumnEncoding_Kind_DIRECT: 
-    case proto::ColumnEncoding_Kind_DICTIONARY: 
-      return RleVersion_1; 
-    case proto::ColumnEncoding_Kind_DIRECT_V2: 
-    case proto::ColumnEncoding_Kind_DICTIONARY_V2: 
-      return RleVersion_2; 
-    default: 
-      throw ParseError("Unknown encoding in convertRleVersion"); 
-    } 
-  } 
- 
-  ColumnReader::ColumnReader(const Type& type, 
-                             StripeStreams& stripe 
-                             ): columnId(type.getColumnId()), 
-                                memoryPool(stripe.getMemoryPool()) { 
-    std::unique_ptr<SeekableInputStream> stream = 
-      stripe.getStream(columnId, proto::Stream_Kind_PRESENT, true); 
-    if (stream.get()) { 
-      notNullDecoder = createBooleanRleDecoder(std::move(stream)); 
-    } 
-  } 
- 
-  ColumnReader::~ColumnReader() { 
-    // PASS 
-  } 
- 
-  uint64_t ColumnReader::skip(uint64_t numValues) { 
-    ByteRleDecoder* decoder = notNullDecoder.get(); 
-    if (decoder) { 
-      // page through the values that we want to skip 
-      // and count how many are non-null 
-      const size_t MAX_BUFFER_SIZE = 32768; 
-      size_t bufferSize = std::min(MAX_BUFFER_SIZE, 
-                                   static_cast<size_t>(numValues)); 
-      char buffer[MAX_BUFFER_SIZE]; 
-      uint64_t remaining = numValues; 
-      while (remaining > 0) { 
-        uint64_t chunkSize = 
-          std::min(remaining, 
-                   static_cast<uint64_t>(bufferSize)); 
-        decoder->next(buffer, chunkSize, nullptr); 
-        remaining -= chunkSize; 
-        for(uint64_t i=0; i < chunkSize; ++i) { 
-          if (!buffer[i]) { 
-            numValues -= 1; 
-          } 
-        } 
-      } 
-    } 
-    return numValues; 
-  } 
- 
-  void ColumnReader::next(ColumnVectorBatch& rowBatch, 
-                          uint64_t numValues, 
-                          char* incomingMask) { 
-    if (numValues > rowBatch.capacity) { 
-      rowBatch.resize(numValues); 
-    } 
-    rowBatch.numElements = numValues; 
-    ByteRleDecoder* decoder = notNullDecoder.get(); 
-    if (decoder) { 
-      char* notNullArray = rowBatch.notNull.data(); 
-      decoder->next(notNullArray, numValues, incomingMask); 
-      // check to see if there are nulls in this batch 
-      for(uint64_t i=0; i < numValues; ++i) { 
-        if (!notNullArray[i]) { 
-          rowBatch.hasNulls = true; 
-          return; 
-        } 
-      } 
-    } else if (incomingMask) { 
-      // If we don't have a notNull stream, copy the incomingMask 
-      rowBatch.hasNulls = true; 
-      memcpy(rowBatch.notNull.data(), incomingMask, numValues); 
-      return; 
-    } 
-    rowBatch.hasNulls = false; 
-  } 
- 
-  void ColumnReader::seekToRowGroup( 
-    std::unordered_map<uint64_t, PositionProvider>& positions) { 
-    if (notNullDecoder.get()) { 
-      notNullDecoder->seek(positions.at(columnId)); 
-    } 
-  } 
- 
-  /** 
-   * Expand an array of bytes in place to the corresponding array of longs. 
-   * Has to work backwards so that they data isn't clobbered during the 
-   * expansion. 
-   * @param buffer the array of chars and array of longs that need to be 
-   *        expanded 
-   * @param numValues the number of bytes to convert to longs 
-   */ 
-  void expandBytesToLongs(int64_t* buffer, uint64_t numValues) { 
-    for(size_t i=numValues - 1; i < numValues; --i) { 
-      buffer[i] = reinterpret_cast<char *>(buffer)[i]; 
-    } 
-  } 
- 
-  class BooleanColumnReader: public ColumnReader { 
-  private: 
-    std::unique_ptr<orc::ByteRleDecoder> rle; 
- 
-  public: 
-    BooleanColumnReader(const Type& type, StripeStreams& stipe); 
-    ~BooleanColumnReader() override; 
- 
-    uint64_t skip(uint64_t numValues) override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char* notNull) override; 
- 
-    void seekToRowGroup( 
-      std::unordered_map<uint64_t, PositionProvider>& positions) override; 
-  }; 
- 
-  BooleanColumnReader::BooleanColumnReader(const Type& type, 
-                                           StripeStreams& stripe 
-                                           ): ColumnReader(type, stripe){ 
-    std::unique_ptr<SeekableInputStream> stream = 
-        stripe.getStream(columnId, proto::Stream_Kind_DATA, true); 
-    if (stream == nullptr) 
-      throw ParseError("DATA stream not found in Boolean column"); 
-    rle = createBooleanRleDecoder(std::move(stream)); 
-  } 
- 
-  BooleanColumnReader::~BooleanColumnReader() { 
-    // PASS 
-  } 
- 
-  uint64_t BooleanColumnReader::skip(uint64_t numValues) { 
-    numValues = ColumnReader::skip(numValues); 
-    rle->skip(numValues); 
-    return numValues; 
-  } 
- 
-  void BooleanColumnReader::next(ColumnVectorBatch& rowBatch, 
-                                 uint64_t numValues, 
-                                 char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    // Since the byte rle places the output in a char* instead of long*, 
-    // we cheat here and use the long* and then expand it in a second pass. 
-    int64_t *ptr = dynamic_cast<LongVectorBatch&>(rowBatch).data.data(); 
-    rle->next(reinterpret_cast<char*>(ptr), 
-              numValues, rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr); 
-    expandBytesToLongs(ptr, numValues); 
-  } 
- 
-  void BooleanColumnReader::seekToRowGroup( 
-    std::unordered_map<uint64_t, PositionProvider>& positions) { 
-    ColumnReader::seekToRowGroup(positions); 
-    rle->seek(positions.at(columnId)); 
-  } 
- 
-  class ByteColumnReader: public ColumnReader { 
-  private: 
-    std::unique_ptr<orc::ByteRleDecoder> rle; 
- 
-  public: 
-    ByteColumnReader(const Type& type, StripeStreams& stipe); 
-    ~ByteColumnReader() override; 
- 
-    uint64_t skip(uint64_t numValues) override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char* notNull) override; 
- 
-    void seekToRowGroup( 
-      std::unordered_map<uint64_t, PositionProvider>& positions) override; 
-  }; 
- 
-  ByteColumnReader::ByteColumnReader(const Type& type, 
-                                           StripeStreams& stripe 
-                                           ): ColumnReader(type, stripe){ 
-    std::unique_ptr<SeekableInputStream> stream = 
-        stripe.getStream(columnId, proto::Stream_Kind_DATA, true); 
-    if (stream == nullptr) 
-      throw ParseError("DATA stream not found in Byte column"); 
-    rle = createByteRleDecoder(std::move(stream)); 
-  } 
- 
-  ByteColumnReader::~ByteColumnReader() { 
-    // PASS 
-  } 
- 
-  uint64_t ByteColumnReader::skip(uint64_t numValues) { 
-    numValues = ColumnReader::skip(numValues); 
-    rle->skip(numValues); 
-    return numValues; 
-  } 
- 
-  void ByteColumnReader::next(ColumnVectorBatch& rowBatch, 
-                              uint64_t numValues, 
-                              char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    // Since the byte rle places the output in a char* instead of long*, 
-    // we cheat here and use the long* and then expand it in a second pass. 
-    int64_t *ptr = dynamic_cast<LongVectorBatch&>(rowBatch).data.data(); 
-    rle->next(reinterpret_cast<char*>(ptr), 
-              numValues, rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr); 
-    expandBytesToLongs(ptr, numValues); 
-  } 
- 
-  void ByteColumnReader::seekToRowGroup( 
-    std::unordered_map<uint64_t, PositionProvider>& positions) { 
-    ColumnReader::seekToRowGroup(positions); 
-    rle->seek(positions.at(columnId)); 
-  } 
- 
-  class IntegerColumnReader: public ColumnReader { 
-  protected: 
-    std::unique_ptr<orc::RleDecoder> rle; 
- 
-  public: 
-    IntegerColumnReader(const Type& type, StripeStreams& stripe); 
-    ~IntegerColumnReader() override; 
- 
-    uint64_t skip(uint64_t numValues) override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char* notNull) override; 
- 
-    void seekToRowGroup( 
-      std::unordered_map<uint64_t, PositionProvider>& positions) override; 
-  }; 
- 
-  IntegerColumnReader::IntegerColumnReader(const Type& type, 
-                                           StripeStreams& stripe 
-                                           ): ColumnReader(type, stripe) { 
-    RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind()); 
-    std::unique_ptr<SeekableInputStream> stream = 
-        stripe.getStream(columnId, proto::Stream_Kind_DATA, true); 
-    if (stream == nullptr) 
-      throw ParseError("DATA stream not found in Integer column"); 
-    rle = createRleDecoder(std::move(stream), true, vers, memoryPool); 
-  } 
- 
-  IntegerColumnReader::~IntegerColumnReader() { 
-    // PASS 
-  } 
- 
-  uint64_t IntegerColumnReader::skip(uint64_t numValues) { 
-    numValues = ColumnReader::skip(numValues); 
-    rle->skip(numValues); 
-    return numValues; 
-  } 
- 
-  void IntegerColumnReader::next(ColumnVectorBatch& rowBatch, 
-                                 uint64_t numValues, 
-                                 char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    rle->next(dynamic_cast<LongVectorBatch&>(rowBatch).data.data(), 
-              numValues, rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr); 
-  } 
- 
-  void IntegerColumnReader::seekToRowGroup( 
-    std::unordered_map<uint64_t, PositionProvider>& positions) { 
-    ColumnReader::seekToRowGroup(positions); 
-    rle->seek(positions.at(columnId)); 
-  } 
- 
-  class TimestampColumnReader: public ColumnReader { 
-  private: 
-    std::unique_ptr<orc::RleDecoder> secondsRle; 
-    std::unique_ptr<orc::RleDecoder> nanoRle; 
-    const Timezone& writerTimezone; 
-    const int64_t epochOffset; 
- 
-  public: 
-    TimestampColumnReader(const Type& type, StripeStreams& stripe); 
-    ~TimestampColumnReader() override; 
- 
-    uint64_t skip(uint64_t numValues) override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char* notNull) override; 
- 
-    void seekToRowGroup( 
-      std::unordered_map<uint64_t, PositionProvider>& positions) override; 
-  }; 
- 
- 
-  TimestampColumnReader::TimestampColumnReader(const Type& type, 
-                                               StripeStreams& stripe 
-                               ): ColumnReader(type, stripe), 
-                                  writerTimezone(stripe.getWriterTimezone()), 
-                                  epochOffset(writerTimezone.getEpoch()) { 
-    RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind()); 
-    std::unique_ptr<SeekableInputStream> stream = 
-        stripe.getStream(columnId, proto::Stream_Kind_DATA, true); 
-    if (stream == nullptr) 
-      throw ParseError("DATA stream not found in Timestamp column"); 
-    secondsRle = createRleDecoder(std::move(stream), true, vers, memoryPool); 
-    stream = stripe.getStream(columnId, proto::Stream_Kind_SECONDARY, true); 
-    if (stream == nullptr) 
-      throw ParseError("SECONDARY stream not found in Timestamp column"); 
-    nanoRle = createRleDecoder(std::move(stream), false, vers, memoryPool); 
-  } 
- 
-  TimestampColumnReader::~TimestampColumnReader() { 
-    // PASS 
-  } 
- 
-  uint64_t TimestampColumnReader::skip(uint64_t numValues) { 
-    numValues = ColumnReader::skip(numValues); 
-    secondsRle->skip(numValues); 
-    nanoRle->skip(numValues); 
-    return numValues; 
-  } 
- 
-  void TimestampColumnReader::next(ColumnVectorBatch& rowBatch, 
-                                   uint64_t numValues, 
-                                   char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr; 
-    TimestampVectorBatch& timestampBatch = 
-      dynamic_cast<TimestampVectorBatch&>(rowBatch); 
-    int64_t *secsBuffer = timestampBatch.data.data(); 
-    secondsRle->next(secsBuffer, numValues, notNull); 
-    int64_t *nanoBuffer = timestampBatch.nanoseconds.data(); 
-    nanoRle->next(nanoBuffer, numValues, notNull); 
- 
-    // Construct the values 
-    for(uint64_t i=0; i < numValues; i++) { 
-      if (notNull == nullptr || notNull[i]) { 
-        uint64_t zeros = nanoBuffer[i] & 0x7; 
-        nanoBuffer[i] >>= 3; 
-        if (zeros != 0) { 
-          for(uint64_t j = 0; j <= zeros; ++j) { 
-            nanoBuffer[i] *= 10; 
-          } 
-        } 
-        int64_t writerTime = secsBuffer[i] + epochOffset; 
-        secsBuffer[i] = writerTimezone.convertToUTC(writerTime); 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Int128.hh"
+
+#include "Adaptor.hh"
+#include "ByteRLE.hh"
+#include "ColumnReader.hh"
+#include "orc/Exceptions.hh"
+#include "RLE.hh"
+
+#include <math.h>
+#include <iostream>
+
+namespace orc {
+
+  StripeStreams::~StripeStreams() {
+    // PASS
+  }
+
+  inline RleVersion convertRleVersion(proto::ColumnEncoding_Kind kind) {
+    switch (static_cast<int64_t>(kind)) {
+    case proto::ColumnEncoding_Kind_DIRECT:
+    case proto::ColumnEncoding_Kind_DICTIONARY:
+      return RleVersion_1;
+    case proto::ColumnEncoding_Kind_DIRECT_V2:
+    case proto::ColumnEncoding_Kind_DICTIONARY_V2:
+      return RleVersion_2;
+    default:
+      throw ParseError("Unknown encoding in convertRleVersion");
+    }
+  }
+
+  ColumnReader::ColumnReader(const Type& type,
+                             StripeStreams& stripe
+                             ): columnId(type.getColumnId()),
+                                memoryPool(stripe.getMemoryPool()) {
+    std::unique_ptr<SeekableInputStream> stream =
+      stripe.getStream(columnId, proto::Stream_Kind_PRESENT, true);
+    if (stream.get()) {
+      notNullDecoder = createBooleanRleDecoder(std::move(stream));
+    }
+  }
+
+  ColumnReader::~ColumnReader() {
+    // PASS
+  }
+
+  uint64_t ColumnReader::skip(uint64_t numValues) {
+    ByteRleDecoder* decoder = notNullDecoder.get();
+    if (decoder) {
+      // page through the values that we want to skip
+      // and count how many are non-null
+      const size_t MAX_BUFFER_SIZE = 32768;
+      size_t bufferSize = std::min(MAX_BUFFER_SIZE,
+                                   static_cast<size_t>(numValues));
+      char buffer[MAX_BUFFER_SIZE];
+      uint64_t remaining = numValues;
+      while (remaining > 0) {
+        uint64_t chunkSize =
+          std::min(remaining,
+                   static_cast<uint64_t>(bufferSize));
+        decoder->next(buffer, chunkSize, nullptr);
+        remaining -= chunkSize;
+        for(uint64_t i=0; i < chunkSize; ++i) {
+          if (!buffer[i]) {
+            numValues -= 1;
+          }
+        }
+      }
+    }
+    return numValues;
+  }
+
+  void ColumnReader::next(ColumnVectorBatch& rowBatch,
+                          uint64_t numValues,
+                          char* incomingMask) {
+    if (numValues > rowBatch.capacity) {
+      rowBatch.resize(numValues);
+    }
+    rowBatch.numElements = numValues;
+    ByteRleDecoder* decoder = notNullDecoder.get();
+    if (decoder) {
+      char* notNullArray = rowBatch.notNull.data();
+      decoder->next(notNullArray, numValues, incomingMask);
+      // check to see if there are nulls in this batch
+      for(uint64_t i=0; i < numValues; ++i) {
+        if (!notNullArray[i]) {
+          rowBatch.hasNulls = true;
+          return;
+        }
+      }
+    } else if (incomingMask) {
+      // If we don't have a notNull stream, copy the incomingMask
+      rowBatch.hasNulls = true;
+      memcpy(rowBatch.notNull.data(), incomingMask, numValues);
+      return;
+    }
+    rowBatch.hasNulls = false;
+  }
+
+  void ColumnReader::seekToRowGroup(
+    std::unordered_map<uint64_t, PositionProvider>& positions) {
+    if (notNullDecoder.get()) {
+      notNullDecoder->seek(positions.at(columnId));
+    }
+  }
+
+  /**
+   * Expand an array of bytes in place to the corresponding array of longs.
+   * Has to work backwards so that they data isn't clobbered during the
+   * expansion.
+   * @param buffer the array of chars and array of longs that need to be
+   *        expanded
+   * @param numValues the number of bytes to convert to longs
+   */
+  void expandBytesToLongs(int64_t* buffer, uint64_t numValues) {
+    for(size_t i=numValues - 1; i < numValues; --i) {
+      buffer[i] = reinterpret_cast<char *>(buffer)[i];
+    }
+  }
+
+  class BooleanColumnReader: public ColumnReader {
+  private:
+    std::unique_ptr<orc::ByteRleDecoder> rle;
+
+  public:
+    BooleanColumnReader(const Type& type, StripeStreams& stipe);
+    ~BooleanColumnReader() override;
+
+    uint64_t skip(uint64_t numValues) override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char* notNull) override;
+
+    void seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+  };
+
+  BooleanColumnReader::BooleanColumnReader(const Type& type,
+                                           StripeStreams& stripe
+                                           ): ColumnReader(type, stripe){
+    std::unique_ptr<SeekableInputStream> stream =
+        stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+    if (stream == nullptr)
+      throw ParseError("DATA stream not found in Boolean column");
+    rle = createBooleanRleDecoder(std::move(stream));
+  }
+
+  BooleanColumnReader::~BooleanColumnReader() {
+    // PASS
+  }
+
+  uint64_t BooleanColumnReader::skip(uint64_t numValues) {
+    numValues = ColumnReader::skip(numValues);
+    rle->skip(numValues);
+    return numValues;
+  }
+
+  void BooleanColumnReader::next(ColumnVectorBatch& rowBatch,
+                                 uint64_t numValues,
+                                 char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    // Since the byte rle places the output in a char* instead of long*,
+    // we cheat here and use the long* and then expand it in a second pass.
+    int64_t *ptr = dynamic_cast<LongVectorBatch&>(rowBatch).data.data();
+    rle->next(reinterpret_cast<char*>(ptr),
+              numValues, rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr);
+    expandBytesToLongs(ptr, numValues);
+  }
+
+  void BooleanColumnReader::seekToRowGroup(
+    std::unordered_map<uint64_t, PositionProvider>& positions) {
+    ColumnReader::seekToRowGroup(positions);
+    rle->seek(positions.at(columnId));
+  }
+
+  class ByteColumnReader: public ColumnReader {
+  private:
+    std::unique_ptr<orc::ByteRleDecoder> rle;
+
+  public:
+    ByteColumnReader(const Type& type, StripeStreams& stipe);
+    ~ByteColumnReader() override;
+
+    uint64_t skip(uint64_t numValues) override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char* notNull) override;
+
+    void seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+  };
+
+  ByteColumnReader::ByteColumnReader(const Type& type,
+                                           StripeStreams& stripe
+                                           ): ColumnReader(type, stripe){
+    std::unique_ptr<SeekableInputStream> stream =
+        stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+    if (stream == nullptr)
+      throw ParseError("DATA stream not found in Byte column");
+    rle = createByteRleDecoder(std::move(stream));
+  }
+
+  ByteColumnReader::~ByteColumnReader() {
+    // PASS
+  }
+
+  uint64_t ByteColumnReader::skip(uint64_t numValues) {
+    numValues = ColumnReader::skip(numValues);
+    rle->skip(numValues);
+    return numValues;
+  }
+
+  void ByteColumnReader::next(ColumnVectorBatch& rowBatch,
+                              uint64_t numValues,
+                              char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    // Since the byte rle places the output in a char* instead of long*,
+    // we cheat here and use the long* and then expand it in a second pass.
+    int64_t *ptr = dynamic_cast<LongVectorBatch&>(rowBatch).data.data();
+    rle->next(reinterpret_cast<char*>(ptr),
+              numValues, rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr);
+    expandBytesToLongs(ptr, numValues);
+  }
+
+  void ByteColumnReader::seekToRowGroup(
+    std::unordered_map<uint64_t, PositionProvider>& positions) {
+    ColumnReader::seekToRowGroup(positions);
+    rle->seek(positions.at(columnId));
+  }
+
+  class IntegerColumnReader: public ColumnReader {
+  protected:
+    std::unique_ptr<orc::RleDecoder> rle;
+
+  public:
+    IntegerColumnReader(const Type& type, StripeStreams& stripe);
+    ~IntegerColumnReader() override;
+
+    uint64_t skip(uint64_t numValues) override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char* notNull) override;
+
+    void seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+  };
+
+  IntegerColumnReader::IntegerColumnReader(const Type& type,
+                                           StripeStreams& stripe
+                                           ): ColumnReader(type, stripe) {
+    RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
+    std::unique_ptr<SeekableInputStream> stream =
+        stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+    if (stream == nullptr)
+      throw ParseError("DATA stream not found in Integer column");
+    rle = createRleDecoder(std::move(stream), true, vers, memoryPool);
+  }
+
+  IntegerColumnReader::~IntegerColumnReader() {
+    // PASS
+  }
+
+  uint64_t IntegerColumnReader::skip(uint64_t numValues) {
+    numValues = ColumnReader::skip(numValues);
+    rle->skip(numValues);
+    return numValues;
+  }
+
+  void IntegerColumnReader::next(ColumnVectorBatch& rowBatch,
+                                 uint64_t numValues,
+                                 char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    rle->next(dynamic_cast<LongVectorBatch&>(rowBatch).data.data(),
+              numValues, rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr);
+  }
+
+  void IntegerColumnReader::seekToRowGroup(
+    std::unordered_map<uint64_t, PositionProvider>& positions) {
+    ColumnReader::seekToRowGroup(positions);
+    rle->seek(positions.at(columnId));
+  }
+
+  class TimestampColumnReader: public ColumnReader {
+  private:
+    std::unique_ptr<orc::RleDecoder> secondsRle;
+    std::unique_ptr<orc::RleDecoder> nanoRle;
+    const Timezone& writerTimezone;
+    const int64_t epochOffset;
+
+  public:
+    TimestampColumnReader(const Type& type, StripeStreams& stripe);
+    ~TimestampColumnReader() override;
+
+    uint64_t skip(uint64_t numValues) override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char* notNull) override;
+
+    void seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+  };
+
+
+  TimestampColumnReader::TimestampColumnReader(const Type& type,
+                                               StripeStreams& stripe
+                               ): ColumnReader(type, stripe),
+                                  writerTimezone(stripe.getWriterTimezone()),
+                                  epochOffset(writerTimezone.getEpoch()) {
+    RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
+    std::unique_ptr<SeekableInputStream> stream =
+        stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+    if (stream == nullptr)
+      throw ParseError("DATA stream not found in Timestamp column");
+    secondsRle = createRleDecoder(std::move(stream), true, vers, memoryPool);
+    stream = stripe.getStream(columnId, proto::Stream_Kind_SECONDARY, true);
+    if (stream == nullptr)
+      throw ParseError("SECONDARY stream not found in Timestamp column");
+    nanoRle = createRleDecoder(std::move(stream), false, vers, memoryPool);
+  }
+
+  TimestampColumnReader::~TimestampColumnReader() {
+    // PASS
+  }
+
+  uint64_t TimestampColumnReader::skip(uint64_t numValues) {
+    numValues = ColumnReader::skip(numValues);
+    secondsRle->skip(numValues);
+    nanoRle->skip(numValues);
+    return numValues;
+  }
+
+  void TimestampColumnReader::next(ColumnVectorBatch& rowBatch,
+                                   uint64_t numValues,
+                                   char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
+    TimestampVectorBatch& timestampBatch =
+      dynamic_cast<TimestampVectorBatch&>(rowBatch);
+    int64_t *secsBuffer = timestampBatch.data.data();
+    secondsRle->next(secsBuffer, numValues, notNull);
+    int64_t *nanoBuffer = timestampBatch.nanoseconds.data();
+    nanoRle->next(nanoBuffer, numValues, notNull);
+
+    // Construct the values
+    for(uint64_t i=0; i < numValues; i++) {
+      if (notNull == nullptr || notNull[i]) {
+        uint64_t zeros = nanoBuffer[i] & 0x7;
+        nanoBuffer[i] >>= 3;
+        if (zeros != 0) {
+          for(uint64_t j = 0; j <= zeros; ++j) {
+            nanoBuffer[i] *= 10;
+          }
+        }
+        int64_t writerTime = secsBuffer[i] + epochOffset;
+        secsBuffer[i] = writerTimezone.convertToUTC(writerTime);
         if (secsBuffer[i] < 0 && nanoBuffer[i] > 999999) {
-          secsBuffer[i] -= 1; 
-        } 
-      } 
-    } 
-  } 
- 
-  void TimestampColumnReader::seekToRowGroup( 
-    std::unordered_map<uint64_t, PositionProvider>& positions) { 
-    ColumnReader::seekToRowGroup(positions); 
-    secondsRle->seek(positions.at(columnId)); 
-    nanoRle->seek(positions.at(columnId)); 
-  } 
- 
-  class DoubleColumnReader: public ColumnReader { 
-  public: 
-    DoubleColumnReader(const Type& type, StripeStreams& stripe); 
-    ~DoubleColumnReader() override; 
- 
-    uint64_t skip(uint64_t numValues) override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char* notNull) override; 
- 
-    void seekToRowGroup( 
-      std::unordered_map<uint64_t, PositionProvider>& positions) override; 
- 
-  private: 
-    std::unique_ptr<SeekableInputStream> inputStream; 
-    TypeKind columnKind; 
-    const uint64_t bytesPerValue ; 
-    const char *bufferPointer; 
-    const char *bufferEnd; 
- 
-    unsigned char readByte() { 
-      if (bufferPointer == bufferEnd) { 
-        int length; 
-        if (!inputStream->Next 
-            (reinterpret_cast<const void**>(&bufferPointer), &length)) { 
-          throw ParseError("bad read in DoubleColumnReader::next()"); 
-        } 
-        bufferEnd = bufferPointer + length; 
-      } 
-      return static_cast<unsigned char>(*(bufferPointer++)); 
-    } 
- 
-    double readDouble() { 
-      int64_t bits = 0; 
-      for (uint64_t i=0; i < 8; i++) { 
-        bits |= static_cast<int64_t>(readByte()) << (i*8); 
-      } 
-      double *result = reinterpret_cast<double*>(&bits); 
-      return *result; 
-    } 
- 
-    double readFloat() { 
-      int32_t bits = 0; 
-      for (uint64_t i=0; i < 4; i++) { 
-        bits |= readByte() << (i*8); 
-      } 
-      float *result = reinterpret_cast<float*>(&bits); 
-      return static_cast<double>(*result); 
-    } 
-  }; 
- 
-  DoubleColumnReader::DoubleColumnReader(const Type& type, 
-                                         StripeStreams& stripe 
-                                         ): ColumnReader(type, stripe), 
-                                            columnKind(type.getKind()), 
-                                            bytesPerValue((type.getKind() == 
-                                                           FLOAT) ? 4 : 8), 
-                                            bufferPointer(nullptr), 
-                                            bufferEnd(nullptr) { 
-    inputStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true); 
-    if (inputStream == nullptr) 
-      throw ParseError("DATA stream not found in Double column"); 
-  } 
- 
-  DoubleColumnReader::~DoubleColumnReader() { 
-    // PASS 
-  } 
- 
-  uint64_t DoubleColumnReader::skip(uint64_t numValues) { 
-    numValues = ColumnReader::skip(numValues); 
- 
-    if (static_cast<size_t>(bufferEnd - bufferPointer) >= 
-        bytesPerValue * numValues) { 
-      bufferPointer += bytesPerValue * numValues; 
-    } else { 
-      size_t sizeToSkip = bytesPerValue * numValues - 
-                          static_cast<size_t>(bufferEnd - bufferPointer); 
-      const size_t cap = static_cast<size_t>(std::numeric_limits<int>::max()); 
-      while (sizeToSkip != 0) { 
-        size_t step = sizeToSkip > cap ? cap : sizeToSkip; 
-        inputStream->Skip(static_cast<int>(step)); 
-        sizeToSkip -= step; 
-      } 
-      bufferEnd = nullptr; 
-      bufferPointer = nullptr; 
-    } 
- 
-    return numValues; 
-  } 
- 
-  void DoubleColumnReader::next(ColumnVectorBatch& rowBatch, 
-                                uint64_t numValues, 
-                                char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    // update the notNull from the parent class 
-    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr; 
-    double* outArray = dynamic_cast<DoubleVectorBatch&>(rowBatch).data.data(); 
- 
-    if (columnKind == FLOAT) { 
-      if (notNull) { 
-        for(size_t i=0; i < numValues; ++i) { 
-          if (notNull[i]) { 
-            outArray[i] = readFloat(); 
-          } 
-        } 
-      } else { 
-        for(size_t i=0; i < numValues; ++i) { 
-          outArray[i] = readFloat(); 
-        } 
-      } 
-    } else { 
-      if (notNull) { 
-        for(size_t i=0; i < numValues; ++i) { 
-          if (notNull[i]) { 
-            outArray[i] = readDouble(); 
-          } 
-        } 
-      } else { 
-        for(size_t i=0; i < numValues; ++i) { 
-          outArray[i] = readDouble(); 
-        } 
-      } 
-    } 
-  } 
- 
-  void readFully(char* buffer, int64_t bufferSize, SeekableInputStream* stream) { 
-    int64_t posn = 0; 
-    while (posn < bufferSize) { 
-      const void* chunk; 
-      int length; 
-      if (!stream->Next(&chunk, &length)) { 
-        throw ParseError("bad read in readFully"); 
-      } 
-      if (posn + length > bufferSize) { 
-        throw ParseError("Corrupt dictionary blob in StringDictionaryColumn"); 
-      } 
-      memcpy(buffer + posn, chunk, static_cast<size_t>(length)); 
-      posn += length; 
-    } 
-  } 
- 
-  void DoubleColumnReader::seekToRowGroup( 
-    std::unordered_map<uint64_t, PositionProvider>& positions) { 
-    ColumnReader::seekToRowGroup(positions); 
-    inputStream->seek(positions.at(columnId)); 
-  } 
- 
-  class StringDictionaryColumnReader: public ColumnReader { 
-  private: 
-    std::shared_ptr<StringDictionary> dictionary; 
-    std::unique_ptr<RleDecoder> rle; 
- 
-  public: 
-    StringDictionaryColumnReader(const Type& type, StripeStreams& stipe); 
-    ~StringDictionaryColumnReader() override; 
- 
-    uint64_t skip(uint64_t numValues) override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char *notNull) override; 
- 
-    void nextEncoded(ColumnVectorBatch& rowBatch, 
-                      uint64_t numValues, 
-                      char* notNull) override; 
- 
-    void seekToRowGroup( 
-      std::unordered_map<uint64_t, PositionProvider>& positions) override; 
-  }; 
- 
-  StringDictionaryColumnReader::StringDictionaryColumnReader 
-             (const Type& type, 
-              StripeStreams& stripe 
-              ): ColumnReader(type, stripe), 
-                 dictionary(new StringDictionary(stripe.getMemoryPool())) { 
-    RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId) 
-                                                .kind()); 
-    uint32_t dictSize = stripe.getEncoding(columnId).dictionarysize(); 
-    rle = createRleDecoder(stripe.getStream(columnId, 
-                                            proto::Stream_Kind_DATA, 
-                                            true), 
-                           false, rleVersion, memoryPool); 
-    std::unique_ptr<RleDecoder> lengthDecoder = 
-            createRleDecoder(stripe.getStream(columnId, 
-                                        proto::Stream_Kind_LENGTH, 
-                                        false), 
-                       false, rleVersion, memoryPool); 
-    dictionary->dictionaryOffset.resize(dictSize + 1); 
-    int64_t* lengthArray = dictionary->dictionaryOffset.data(); 
-    lengthDecoder->next(lengthArray + 1, dictSize, nullptr); 
-    lengthArray[0] = 0; 
-    for(uint32_t i = 1; i < dictSize + 1; ++i) { 
-      lengthArray[i] += lengthArray[i - 1]; 
-    } 
-    dictionary->dictionaryBlob.resize( 
-                                static_cast<uint64_t>(lengthArray[dictSize])); 
-    std::unique_ptr<SeekableInputStream> blobStream = 
-      stripe.getStream(columnId, proto::Stream_Kind_DICTIONARY_DATA, false); 
-    readFully( 
-              dictionary->dictionaryBlob.data(), 
-              lengthArray[dictSize], 
-              blobStream.get()); 
-  } 
- 
-  StringDictionaryColumnReader::~StringDictionaryColumnReader() { 
-    // PASS 
-  } 
- 
-  uint64_t StringDictionaryColumnReader::skip(uint64_t numValues) { 
-    numValues = ColumnReader::skip(numValues); 
-    rle->skip(numValues); 
-    return numValues; 
-  } 
- 
-  void StringDictionaryColumnReader::next(ColumnVectorBatch& rowBatch, 
-                                          uint64_t numValues, 
-                                          char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    // update the notNull from the parent class 
-    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr; 
-    StringVectorBatch& byteBatch = dynamic_cast<StringVectorBatch&>(rowBatch); 
-    char *blob = dictionary->dictionaryBlob.data(); 
-    int64_t *dictionaryOffsets = dictionary->dictionaryOffset.data(); 
-    char **outputStarts = byteBatch.data.data(); 
-    int64_t *outputLengths = byteBatch.length.data(); 
-    rle->next(outputLengths, numValues, notNull); 
-    uint64_t dictionaryCount = dictionary->dictionaryOffset.size() - 1; 
-    if (notNull) { 
-      for(uint64_t i=0; i < numValues; ++i) { 
-        if (notNull[i]) { 
-          int64_t entry = outputLengths[i]; 
-          if (entry < 0 || static_cast<uint64_t>(entry) >= dictionaryCount ) { 
-            throw ParseError("Entry index out of range in StringDictionaryColumn"); 
-          } 
-          outputStarts[i] = blob + dictionaryOffsets[entry]; 
-          outputLengths[i] = dictionaryOffsets[entry+1] - 
-            dictionaryOffsets[entry]; 
-        } 
-      } 
-    } else { 
-      for(uint64_t i=0; i < numValues; ++i) { 
-        int64_t entry = outputLengths[i]; 
-        if (entry < 0 || static_cast<uint64_t>(entry) >= dictionaryCount) { 
-          throw ParseError("Entry index out of range in StringDictionaryColumn"); 
-        } 
-        outputStarts[i] = blob + dictionaryOffsets[entry]; 
-        outputLengths[i] = dictionaryOffsets[entry+1] - 
-          dictionaryOffsets[entry]; 
-      } 
-    } 
-  } 
- 
-  void StringDictionaryColumnReader::nextEncoded(ColumnVectorBatch& rowBatch, 
-                                                  uint64_t numValues, 
-                                                  char* notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr; 
-    rowBatch.isEncoded = true; 
- 
-    EncodedStringVectorBatch& batch = dynamic_cast<EncodedStringVectorBatch&>(rowBatch); 
-    batch.dictionary = this->dictionary; 
- 
-    // Length buffer is reused to save dictionary entry ids 
-    rle->next(batch.index.data(), numValues, notNull); 
-  } 
- 
-  void StringDictionaryColumnReader::seekToRowGroup( 
-    std::unordered_map<uint64_t, PositionProvider>& positions) { 
-    ColumnReader::seekToRowGroup(positions); 
-    rle->seek(positions.at(columnId)); 
-  } 
- 
- 
-  class StringDirectColumnReader: public ColumnReader { 
-  private: 
-    std::unique_ptr<RleDecoder> lengthRle; 
-    std::unique_ptr<SeekableInputStream> blobStream; 
-    const char *lastBuffer; 
-    size_t lastBufferLength; 
- 
-    /** 
-     * Compute the total length of the values. 
-     * @param lengths the array of lengths 
-     * @param notNull the array of notNull flags 
-     * @param numValues the lengths of the arrays 
-     * @return the total number of bytes for the non-null values 
-     */ 
-    size_t computeSize(const int64_t *lengths, const char *notNull, 
-                       uint64_t numValues); 
- 
-  public: 
-    StringDirectColumnReader(const Type& type, StripeStreams& stipe); 
-    ~StringDirectColumnReader() override; 
- 
-    uint64_t skip(uint64_t numValues) override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char *notNull) override; 
- 
-    void seekToRowGroup( 
-      std::unordered_map<uint64_t, PositionProvider>& positions) override; 
-  }; 
- 
-  StringDirectColumnReader::StringDirectColumnReader 
-                 (const Type& type, 
-                  StripeStreams& stripe 
-                  ): ColumnReader(type, stripe) { 
-    RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId) 
-                                                .kind()); 
-    std::unique_ptr<SeekableInputStream> stream = 
-        stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true); 
-    if (stream == nullptr) 
-      throw ParseError("LENGTH stream not found in StringDirectColumn"); 
-    lengthRle = createRleDecoder( 
-        std::move(stream), false, rleVersion, memoryPool); 
-    blobStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true); 
-    if (blobStream == nullptr) 
-      throw ParseError("DATA stream not found in StringDirectColumn"); 
-    lastBuffer = nullptr; 
-    lastBufferLength = 0; 
-  } 
- 
-  StringDirectColumnReader::~StringDirectColumnReader() { 
-    // PASS 
-  } 
- 
-  uint64_t StringDirectColumnReader::skip(uint64_t numValues) { 
-    const size_t BUFFER_SIZE = 1024; 
-    numValues = ColumnReader::skip(numValues); 
-    int64_t buffer[BUFFER_SIZE]; 
-    uint64_t done = 0; 
-    size_t totalBytes = 0; 
-    // read the lengths, so we know haw many bytes to skip 
-    while (done < numValues) { 
-      uint64_t step = std::min(BUFFER_SIZE, 
-                                    static_cast<size_t>(numValues - done)); 
-      lengthRle->next(buffer, step, nullptr); 
-      totalBytes += computeSize(buffer, nullptr, step); 
-      done += step; 
-    } 
-    if (totalBytes <= lastBufferLength) { 
-      // subtract the needed bytes from the ones left over 
-      lastBufferLength -= totalBytes; 
-      lastBuffer += totalBytes; 
-    } else { 
-      // move the stream forward after accounting for the buffered bytes 
-      totalBytes -= lastBufferLength; 
-      const size_t cap = static_cast<size_t>(std::numeric_limits<int>::max()); 
-      while (totalBytes != 0) { 
-        size_t step = totalBytes > cap ? cap : totalBytes; 
-        blobStream->Skip(static_cast<int>(step)); 
-        totalBytes -= step; 
-      } 
-      lastBufferLength = 0; 
-      lastBuffer = nullptr; 
-    } 
-    return numValues; 
-  } 
- 
-  size_t StringDirectColumnReader::computeSize(const int64_t* lengths, 
-                                               const char* notNull, 
-                                               uint64_t numValues) { 
-    size_t totalLength = 0; 
-    if (notNull) { 
-      for(size_t i=0; i < numValues; ++i) { 
-        if (notNull[i]) { 
-          totalLength += static_cast<size_t>(lengths[i]); 
-        } 
-      } 
-    } else { 
-      for(size_t i=0; i < numValues; ++i) { 
-        totalLength += static_cast<size_t>(lengths[i]); 
-      } 
-    } 
-    return totalLength; 
-  } 
- 
-  void StringDirectColumnReader::next(ColumnVectorBatch& rowBatch, 
-                                      uint64_t numValues, 
-                                      char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    // update the notNull from the parent class 
-    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr; 
-    StringVectorBatch& byteBatch = dynamic_cast<StringVectorBatch&>(rowBatch); 
-    char **startPtr = byteBatch.data.data(); 
-    int64_t *lengthPtr = byteBatch.length.data(); 
- 
-    // read the length vector 
-    lengthRle->next(lengthPtr, numValues, notNull); 
- 
-    // figure out the total length of data we need from the blob stream 
-    const size_t totalLength = computeSize(lengthPtr, notNull, numValues); 
- 
-    // Load data from the blob stream into our buffer until we have enough 
-    // to get the rest directly out of the stream's buffer. 
-    size_t bytesBuffered = 0; 
-    byteBatch.blob.resize(totalLength); 
-    char *ptr= byteBatch.blob.data(); 
-    while (bytesBuffered + lastBufferLength < totalLength) { 
-      memcpy(ptr + bytesBuffered, lastBuffer, lastBufferLength); 
-      bytesBuffered += lastBufferLength; 
-      const void* readBuffer; 
-      int readLength; 
-      if (!blobStream->Next(&readBuffer, &readLength)) { 
-        throw ParseError("failed to read in StringDirectColumnReader.next"); 
-      } 
-      lastBuffer = static_cast<const char*>(readBuffer); 
-      lastBufferLength = static_cast<size_t>(readLength); 
-    } 
- 
-    if (bytesBuffered < totalLength) { 
-      size_t moreBytes = totalLength - bytesBuffered; 
-      memcpy(ptr + bytesBuffered, lastBuffer, moreBytes); 
-      lastBuffer += moreBytes; 
-      lastBufferLength -= moreBytes; 
-    } 
- 
-    size_t filledSlots = 0; 
-    ptr = byteBatch.blob.data(); 
-    if (notNull) { 
-      while (filledSlots < numValues) { 
-        if (notNull[filledSlots]) { 
-          startPtr[filledSlots] = const_cast<char*>(ptr); 
-          ptr += lengthPtr[filledSlots]; 
-        } 
-        filledSlots += 1; 
-      } 
-    } else { 
-      while (filledSlots < numValues) { 
-        startPtr[filledSlots] = const_cast<char*>(ptr); 
-        ptr += lengthPtr[filledSlots]; 
-        filledSlots += 1; 
-      } 
-    } 
-  } 
- 
-  void StringDirectColumnReader::seekToRowGroup( 
-    std::unordered_map<uint64_t, PositionProvider>& positions) { 
-    ColumnReader::seekToRowGroup(positions); 
-    blobStream->seek(positions.at(columnId)); 
-    lengthRle->seek(positions.at(columnId)); 
-  } 
- 
-  class StructColumnReader: public ColumnReader { 
-  private: 
-    std::vector<ColumnReader*> children; 
- 
-  public: 
-    StructColumnReader(const Type& type, StripeStreams& stipe); 
-    ~StructColumnReader() override; 
- 
-    uint64_t skip(uint64_t numValues) override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char *notNull) override; 
- 
-    void nextEncoded(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char *notNull) override; 
- 
-    void seekToRowGroup( 
-      std::unordered_map<uint64_t, PositionProvider>& positions) override; 
- 
-  private: 
-    template<bool encoded> 
-    void nextInternal(ColumnVectorBatch& rowBatch, 
-                      uint64_t numValues, 
-                      char *notNull); 
-  }; 
- 
-  StructColumnReader::StructColumnReader(const Type& type, 
-                                         StripeStreams& stripe 
-                                         ): ColumnReader(type, stripe) { 
-    // count the number of selected sub-columns 
-    const std::vector<bool> selectedColumns = stripe.getSelectedColumns(); 
-    switch (static_cast<int64_t>(stripe.getEncoding(columnId).kind())) { 
-    case proto::ColumnEncoding_Kind_DIRECT: 
-      for(unsigned int i=0; i < type.getSubtypeCount(); ++i) { 
-        const Type& child = *type.getSubtype(i); 
-        if (selectedColumns[static_cast<uint64_t>(child.getColumnId())]) { 
-          children.push_back(buildReader(child, stripe).release()); 
-        } 
-      } 
-      break; 
-    case proto::ColumnEncoding_Kind_DIRECT_V2: 
-    case proto::ColumnEncoding_Kind_DICTIONARY: 
-    case proto::ColumnEncoding_Kind_DICTIONARY_V2: 
-    default: 
-      throw ParseError("Unknown encoding for StructColumnReader"); 
-    } 
-  } 
- 
-  StructColumnReader::~StructColumnReader() { 
-    for (size_t i=0; i<children.size(); i++) { 
-      delete children[i]; 
-    } 
-  } 
- 
-  uint64_t StructColumnReader::skip(uint64_t numValues) { 
-    numValues = ColumnReader::skip(numValues); 
-    for(std::vector<ColumnReader*>::iterator ptr=children.begin(); ptr != children.end(); ++ptr) { 
-      (*ptr)->skip(numValues); 
-    } 
-    return numValues; 
-  } 
- 
-  void StructColumnReader::next(ColumnVectorBatch& rowBatch, 
-                                uint64_t numValues, 
-                                char *notNull) { 
-    nextInternal<false>(rowBatch, numValues, notNull); 
-  } 
- 
-  void StructColumnReader::nextEncoded(ColumnVectorBatch& rowBatch, 
-                                uint64_t numValues, 
-                                char *notNull) { 
-    nextInternal<true>(rowBatch, numValues, notNull); 
-  } 
- 
-  template<bool encoded> 
-  void StructColumnReader::nextInternal(ColumnVectorBatch& rowBatch, 
-                                uint64_t numValues, 
-                                char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    uint64_t i=0; 
-    notNull = rowBatch.hasNulls? rowBatch.notNull.data() : nullptr; 
-    for(std::vector<ColumnReader*>::iterator ptr=children.begin(); 
-        ptr != children.end(); ++ptr, ++i) { 
-      if (encoded) { 
-        (*ptr)->nextEncoded(*(dynamic_cast<StructVectorBatch&>(rowBatch).fields[i]), 
-                    numValues, notNull); 
-      } else { 
-        (*ptr)->next(*(dynamic_cast<StructVectorBatch&>(rowBatch).fields[i]), 
-                    numValues, notNull); 
-      } 
-    } 
-  } 
- 
-  void StructColumnReader::seekToRowGroup( 
-    std::unordered_map<uint64_t, PositionProvider>& positions) { 
-    ColumnReader::seekToRowGroup(positions); 
- 
-    for(std::vector<ColumnReader*>::iterator ptr = children.begin(); 
-        ptr != children.end(); 
-        ++ptr) { 
-      (*ptr)->seekToRowGroup(positions); 
-    } 
-  } 
- 
-  class ListColumnReader: public ColumnReader { 
-  private: 
-    std::unique_ptr<ColumnReader> child; 
-    std::unique_ptr<RleDecoder> rle; 
- 
-  public: 
-    ListColumnReader(const Type& type, StripeStreams& stipe); 
-    ~ListColumnReader() override; 
- 
-    uint64_t skip(uint64_t numValues) override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char *notNull) override; 
- 
-    void nextEncoded(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char *notNull) override; 
- 
-    void seekToRowGroup( 
-      std::unordered_map<uint64_t, PositionProvider>& positions) override; 
- 
-  private: 
-    template<bool encoded> 
-    void nextInternal(ColumnVectorBatch& rowBatch, 
-                      uint64_t numValues, 
-                      char *notNull); 
-  }; 
- 
-  ListColumnReader::ListColumnReader(const Type& type, 
-                                     StripeStreams& stripe 
-                                     ): ColumnReader(type, stripe) { 
-    // count the number of selected sub-columns 
-    const std::vector<bool> selectedColumns = stripe.getSelectedColumns(); 
-    RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind()); 
-    std::unique_ptr<SeekableInputStream> stream = 
-        stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true); 
-    if (stream == nullptr) 
-      throw ParseError("LENGTH stream not found in List column"); 
-    rle = createRleDecoder(std::move(stream), false, vers, memoryPool); 
-    const Type& childType = *type.getSubtype(0); 
-    if (selectedColumns[static_cast<uint64_t>(childType.getColumnId())]) { 
-      child = buildReader(childType, stripe); 
-    } 
-  } 
- 
-  ListColumnReader::~ListColumnReader() { 
-    // PASS 
-  } 
- 
-  uint64_t ListColumnReader::skip(uint64_t numValues) { 
-    numValues = ColumnReader::skip(numValues); 
-    ColumnReader *childReader = child.get(); 
-    if (childReader) { 
-      const uint64_t BUFFER_SIZE = 1024; 
-      int64_t buffer[BUFFER_SIZE]; 
-      uint64_t childrenElements = 0; 
-      uint64_t lengthsRead = 0; 
-      while (lengthsRead < numValues) { 
-        uint64_t chunk = std::min(numValues - lengthsRead, BUFFER_SIZE); 
-        rle->next(buffer, chunk, nullptr); 
-        for(size_t i=0; i < chunk; ++i) { 
-          childrenElements += static_cast<size_t>(buffer[i]); 
-        } 
-        lengthsRead += chunk; 
-      } 
-      childReader->skip(childrenElements); 
-    } else { 
-      rle->skip(numValues); 
-    } 
-    return numValues; 
-  } 
- 
-  void ListColumnReader::next(ColumnVectorBatch& rowBatch, 
-                                      uint64_t numValues, 
-                                      char *notNull) { 
-    nextInternal<false>(rowBatch, numValues, notNull); 
-  } 
- 
-  void ListColumnReader::nextEncoded(ColumnVectorBatch& rowBatch, 
-                                      uint64_t numValues, 
-                                      char *notNull) { 
-    nextInternal<true>(rowBatch, numValues, notNull); 
-  } 
- 
-  template<bool encoded> 
-  void ListColumnReader::nextInternal(ColumnVectorBatch& rowBatch, 
-                              uint64_t numValues, 
-                              char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    ListVectorBatch &listBatch = dynamic_cast<ListVectorBatch&>(rowBatch); 
-    int64_t* offsets = listBatch.offsets.data(); 
-    notNull = listBatch.hasNulls ? listBatch.notNull.data() : nullptr; 
-    rle->next(offsets, numValues, notNull); 
-    uint64_t totalChildren = 0; 
-    if (notNull) { 
-      for(size_t i=0; i < numValues; ++i) { 
-        if (notNull[i]) { 
-          uint64_t tmp = static_cast<uint64_t>(offsets[i]); 
-          offsets[i] = static_cast<int64_t>(totalChildren); 
-          totalChildren += tmp; 
-        } else { 
-          offsets[i] = static_cast<int64_t>(totalChildren); 
-        } 
-      } 
-    } else { 
-      for(size_t i=0; i < numValues; ++i) { 
-        uint64_t tmp = static_cast<uint64_t>(offsets[i]); 
-        offsets[i] = static_cast<int64_t>(totalChildren); 
-        totalChildren += tmp; 
-      } 
-    } 
-    offsets[numValues] = static_cast<int64_t>(totalChildren); 
-    ColumnReader *childReader = child.get(); 
-    if (childReader) { 
-      if (encoded) { 
-        childReader->nextEncoded(*(listBatch.elements.get()), totalChildren, nullptr); 
-      } else { 
-        childReader->next(*(listBatch.elements.get()), totalChildren, nullptr); 
-      } 
-    } 
-  } 
- 
-  void ListColumnReader::seekToRowGroup( 
-    std::unordered_map<uint64_t, PositionProvider>& positions) { 
-    ColumnReader::seekToRowGroup(positions); 
-    rle->seek(positions.at(columnId)); 
-    if (child.get()) { 
-      child->seekToRowGroup(positions); 
-    } 
-  } 
- 
-  class MapColumnReader: public ColumnReader { 
-  private: 
-    std::unique_ptr<ColumnReader> keyReader; 
-    std::unique_ptr<ColumnReader> elementReader; 
-    std::unique_ptr<RleDecoder> rle; 
- 
-  public: 
-    MapColumnReader(const Type& type, StripeStreams& stipe); 
-    ~MapColumnReader() override; 
- 
-    uint64_t skip(uint64_t numValues) override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char *notNull) override; 
- 
-    void nextEncoded(ColumnVectorBatch& rowBatch, 
-                     uint64_t numValues, 
-                     char *notNull) override; 
- 
-    void seekToRowGroup( 
-      std::unordered_map<uint64_t, PositionProvider>& positions) override; 
- 
-  private: 
-    template<bool encoded> 
-    void nextInternal(ColumnVectorBatch& rowBatch, 
-                      uint64_t numValues, 
-                      char *notNull); 
-  }; 
- 
-  MapColumnReader::MapColumnReader(const Type& type, 
-                                   StripeStreams& stripe 
-                                   ): ColumnReader(type, stripe) { 
-    // Determine if the key and/or value columns are selected 
-    const std::vector<bool> selectedColumns = stripe.getSelectedColumns(); 
-    RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind()); 
-    std::unique_ptr<SeekableInputStream> stream = 
-        stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true); 
-    if (stream == nullptr) 
-      throw ParseError("LENGTH stream not found in Map column"); 
-    rle = createRleDecoder(std::move(stream), false, vers, memoryPool); 
-    const Type& keyType = *type.getSubtype(0); 
-    if (selectedColumns[static_cast<uint64_t>(keyType.getColumnId())]) { 
-      keyReader = buildReader(keyType, stripe); 
-    } 
-    const Type& elementType = *type.getSubtype(1); 
-    if (selectedColumns[static_cast<uint64_t>(elementType.getColumnId())]) { 
-      elementReader = buildReader(elementType, stripe); 
-    } 
-  } 
- 
-  MapColumnReader::~MapColumnReader() { 
-    // PASS 
-  } 
- 
-  uint64_t MapColumnReader::skip(uint64_t numValues) { 
-    numValues = ColumnReader::skip(numValues); 
-    ColumnReader *rawKeyReader = keyReader.get(); 
-    ColumnReader *rawElementReader = elementReader.get(); 
-    if (rawKeyReader || rawElementReader) { 
-      const uint64_t BUFFER_SIZE = 1024; 
-      int64_t buffer[BUFFER_SIZE]; 
-      uint64_t childrenElements = 0; 
-      uint64_t lengthsRead = 0; 
-      while (lengthsRead < numValues) { 
-        uint64_t chunk = std::min(numValues - lengthsRead, BUFFER_SIZE); 
-        rle->next(buffer, chunk, nullptr); 
-        for(size_t i=0; i < chunk; ++i) { 
-          childrenElements += static_cast<size_t>(buffer[i]); 
-        } 
-        lengthsRead += chunk; 
-      } 
-      if (rawKeyReader) { 
-        rawKeyReader->skip(childrenElements); 
-      } 
-      if (rawElementReader) { 
-        rawElementReader->skip(childrenElements); 
-      } 
-    } else { 
-      rle->skip(numValues); 
-    } 
-    return numValues; 
-  } 
- 
-  void MapColumnReader::next(ColumnVectorBatch& rowBatch, 
-                             uint64_t numValues, 
-                             char *notNull) 
-  { 
-    nextInternal<false>(rowBatch, numValues, notNull); 
-  } 
- 
-  void MapColumnReader::nextEncoded(ColumnVectorBatch& rowBatch, 
-                             uint64_t numValues, 
-                             char *notNull) 
-  { 
-    nextInternal<true>(rowBatch, numValues, notNull); 
-  } 
- 
-  template<bool encoded> 
-  void MapColumnReader::nextInternal(ColumnVectorBatch& rowBatch, 
-                             uint64_t numValues, 
-                             char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    MapVectorBatch &mapBatch = dynamic_cast<MapVectorBatch&>(rowBatch); 
-    int64_t* offsets = mapBatch.offsets.data(); 
-    notNull = mapBatch.hasNulls ? mapBatch.notNull.data() : nullptr; 
-    rle->next(offsets, numValues, notNull); 
-    uint64_t totalChildren = 0; 
-    if (notNull) { 
-      for(size_t i=0; i < numValues; ++i) { 
-        if (notNull[i]) { 
-          uint64_t tmp = static_cast<uint64_t>(offsets[i]); 
-          offsets[i] = static_cast<int64_t>(totalChildren); 
-          totalChildren += tmp; 
-        } else { 
-          offsets[i] = static_cast<int64_t>(totalChildren); 
-        } 
-      } 
-    } else { 
-      for(size_t i=0; i < numValues; ++i) { 
-        uint64_t tmp = static_cast<uint64_t>(offsets[i]); 
-        offsets[i] = static_cast<int64_t>(totalChildren); 
-        totalChildren += tmp; 
-      } 
-    } 
-    offsets[numValues] = static_cast<int64_t>(totalChildren); 
-    ColumnReader *rawKeyReader = keyReader.get(); 
-    if (rawKeyReader) { 
-      if (encoded) { 
-        rawKeyReader->nextEncoded(*(mapBatch.keys.get()), totalChildren, nullptr); 
-      } else { 
-        rawKeyReader->next(*(mapBatch.keys.get()), totalChildren, nullptr); 
-      } 
-    } 
-    ColumnReader *rawElementReader = elementReader.get(); 
-    if (rawElementReader) { 
-      if (encoded) { 
-        rawElementReader->nextEncoded(*(mapBatch.elements.get()), totalChildren, nullptr); 
-      } else { 
-        rawElementReader->next(*(mapBatch.elements.get()), totalChildren, nullptr); 
-      } 
-    } 
-  } 
- 
-  void MapColumnReader::seekToRowGroup( 
-    std::unordered_map<uint64_t, PositionProvider>& positions) { 
-    ColumnReader::seekToRowGroup(positions); 
-    rle->seek(positions.at(columnId)); 
-    if (keyReader.get()) { 
-      keyReader->seekToRowGroup(positions); 
-    } 
-    if (elementReader.get()) { 
-      elementReader->seekToRowGroup(positions); 
-    } 
-  } 
- 
-  class UnionColumnReader: public ColumnReader { 
-  private: 
-    std::unique_ptr<ByteRleDecoder> rle; 
-    std::vector<ColumnReader*> childrenReader; 
-    std::vector<int64_t> childrenCounts; 
-    uint64_t numChildren; 
- 
-  public: 
-    UnionColumnReader(const Type& type, StripeStreams& stipe); 
-    ~UnionColumnReader() override; 
- 
-    uint64_t skip(uint64_t numValues) override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char *notNull) override; 
- 
-    void nextEncoded(ColumnVectorBatch& rowBatch, 
-                     uint64_t numValues, 
-                     char *notNull) override; 
- 
-    void seekToRowGroup( 
-      std::unordered_map<uint64_t, PositionProvider>& positions) override; 
- 
-  private: 
-    template<bool encoded> 
-    void nextInternal(ColumnVectorBatch& rowBatch, 
-                      uint64_t numValues, 
-                      char *notNull); 
-  }; 
- 
-  UnionColumnReader::UnionColumnReader(const Type& type, 
-                                       StripeStreams& stripe 
-                                       ): ColumnReader(type, stripe) { 
-    numChildren = type.getSubtypeCount(); 
-    childrenReader.resize(numChildren); 
-    childrenCounts.resize(numChildren); 
- 
-    std::unique_ptr<SeekableInputStream> stream = 
-        stripe.getStream(columnId, proto::Stream_Kind_DATA, true); 
-    if (stream == nullptr) 
-      throw ParseError("LENGTH stream not found in Union column"); 
-    rle = createByteRleDecoder(std::move(stream)); 
-    // figure out which types are selected 
-    const std::vector<bool> selectedColumns = stripe.getSelectedColumns(); 
-    for(unsigned int i=0; i < numChildren; ++i) { 
-      const Type &child = *type.getSubtype(i); 
-      if (selectedColumns[static_cast<size_t>(child.getColumnId())]) { 
-        childrenReader[i] = buildReader(child, stripe).release(); 
-      } 
-    } 
-  } 
- 
-  UnionColumnReader::~UnionColumnReader() { 
-    for(std::vector<ColumnReader*>::iterator itr = childrenReader.begin(); 
-        itr != childrenReader.end(); ++itr) { 
-      delete *itr; 
-    } 
-  } 
- 
-  uint64_t UnionColumnReader::skip(uint64_t numValues) { 
-    numValues = ColumnReader::skip(numValues); 
-    const uint64_t BUFFER_SIZE = 1024; 
-    char buffer[BUFFER_SIZE]; 
-    uint64_t lengthsRead = 0; 
-    int64_t *counts = childrenCounts.data(); 
-    memset(counts, 0, sizeof(int64_t) * numChildren); 
-    while (lengthsRead < numValues) { 
-      uint64_t chunk = std::min(numValues - lengthsRead, BUFFER_SIZE); 
-      rle->next(buffer, chunk, nullptr); 
-      for(size_t i=0; i < chunk; ++i) { 
-        counts[static_cast<size_t>(buffer[i])] += 1; 
-      } 
-      lengthsRead += chunk; 
-    } 
-    for(size_t i=0; i < numChildren; ++i) { 
-      if (counts[i] != 0 && childrenReader[i] != nullptr) { 
-        childrenReader[i]->skip(static_cast<uint64_t>(counts[i])); 
-      } 
-    } 
-    return numValues; 
-  } 
- 
-  void UnionColumnReader::next(ColumnVectorBatch& rowBatch, 
-                              uint64_t numValues, 
-                              char *notNull) { 
-    nextInternal<false>(rowBatch, numValues, notNull); 
-  } 
- 
-  void UnionColumnReader::nextEncoded(ColumnVectorBatch& rowBatch, 
-                              uint64_t numValues, 
-                              char *notNull) { 
-    nextInternal<true>(rowBatch, numValues, notNull); 
-  } 
- 
-  template<bool encoded> 
-  void UnionColumnReader::nextInternal(ColumnVectorBatch& rowBatch, 
-                               uint64_t numValues, 
-                               char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    UnionVectorBatch &unionBatch = dynamic_cast<UnionVectorBatch&>(rowBatch); 
-    uint64_t* offsets = unionBatch.offsets.data(); 
-    int64_t* counts = childrenCounts.data(); 
-    memset(counts, 0, sizeof(int64_t) * numChildren); 
-    unsigned char* tags = unionBatch.tags.data(); 
-    notNull = unionBatch.hasNulls ? unionBatch.notNull.data() : nullptr; 
-    rle->next(reinterpret_cast<char *>(tags), numValues, notNull); 
-    // set the offsets for each row 
-    if (notNull) { 
-      for(size_t i=0; i < numValues; ++i) { 
-        if (notNull[i]) { 
-          offsets[i] = 
-            static_cast<uint64_t>(counts[static_cast<size_t>(tags[i])]++); 
-        } 
-      } 
-    } else { 
-      for(size_t i=0; i < numValues; ++i) { 
-        offsets[i] = 
-          static_cast<uint64_t>(counts[static_cast<size_t>(tags[i])]++); 
-      } 
-    } 
-    // read the right number of each child column 
-    for(size_t i=0; i < numChildren; ++i) { 
-      if (childrenReader[i] != nullptr) { 
-        if (encoded) { 
-          childrenReader[i]->nextEncoded(*(unionBatch.children[i]), 
-                                  static_cast<uint64_t>(counts[i]), nullptr); 
-        } else { 
-          childrenReader[i]->next(*(unionBatch.children[i]), 
-                                  static_cast<uint64_t>(counts[i]), nullptr); 
-        } 
-      } 
-    } 
-  } 
- 
-  void UnionColumnReader::seekToRowGroup( 
-    std::unordered_map<uint64_t, PositionProvider>& positions) { 
-    ColumnReader::seekToRowGroup(positions); 
-    rle->seek(positions.at(columnId)); 
-    for(size_t i = 0; i < numChildren; ++i) { 
-      if (childrenReader[i] != nullptr) { 
-        childrenReader[i]->seekToRowGroup(positions); 
-      } 
-    } 
-  } 
- 
-  /** 
-   * Destructively convert the number from zigzag encoding to the 
-   * natural signed representation. 
-   */ 
-  void unZigZagInt128(Int128& value) { 
-    bool needsNegate = value.getLowBits() & 1; 
-    value >>= 1; 
-    if (needsNegate) { 
-      value.negate(); 
-      value -= 1; 
-    } 
-  } 
- 
-  class Decimal64ColumnReader: public ColumnReader { 
-  public: 
-    static const uint32_t MAX_PRECISION_64 = 18; 
-    static const uint32_t MAX_PRECISION_128 = 38; 
-    static const int64_t POWERS_OF_TEN[MAX_PRECISION_64 + 1]; 
- 
-  protected: 
-    std::unique_ptr<SeekableInputStream> valueStream; 
-    int32_t precision; 
-    int32_t scale; 
-    const char* buffer; 
-    const char* bufferEnd; 
- 
-    std::unique_ptr<RleDecoder> scaleDecoder; 
- 
-    /** 
-     * Read the valueStream for more bytes. 
-     */ 
-    void readBuffer() { 
-      while (buffer == bufferEnd) { 
-        int length; 
-        if (!valueStream->Next(reinterpret_cast<const void**>(&buffer), 
-                               &length)) { 
-          throw ParseError("Read past end of stream in Decimal64ColumnReader "+ 
-                           valueStream->getName()); 
-        } 
-        bufferEnd = buffer + length; 
-      } 
-    } 
- 
-    void readInt64(int64_t& value, int32_t currentScale) { 
-      value = 0; 
-      size_t offset = 0; 
-      while (true) { 
-        readBuffer(); 
-        unsigned char ch = static_cast<unsigned char>(*(buffer++)); 
-        value |= static_cast<uint64_t>(ch & 0x7f) << offset; 
-        offset += 7; 
-        if (!(ch & 0x80)) { 
-          break; 
-        } 
-      } 
-      value = unZigZag(static_cast<uint64_t>(value)); 
-      if (scale > currentScale && 
-          static_cast<uint64_t>(scale - currentScale) <= MAX_PRECISION_64) { 
-        value *= POWERS_OF_TEN[scale - currentScale]; 
-      } else if (scale < currentScale && 
-          static_cast<uint64_t>(currentScale - scale) <= MAX_PRECISION_64) { 
-        value /= POWERS_OF_TEN[currentScale - scale]; 
-      } else if (scale != currentScale) { 
-        throw ParseError("Decimal scale out of range"); 
-      } 
-    } 
- 
-  public: 
-    Decimal64ColumnReader(const Type& type, StripeStreams& stipe); 
-    ~Decimal64ColumnReader() override; 
- 
-    uint64_t skip(uint64_t numValues) override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char *notNull) override; 
- 
-    void seekToRowGroup( 
-      std::unordered_map<uint64_t, PositionProvider>& positions) override; 
-  }; 
-  const uint32_t Decimal64ColumnReader::MAX_PRECISION_64; 
-  const uint32_t Decimal64ColumnReader::MAX_PRECISION_128; 
-  const int64_t Decimal64ColumnReader::POWERS_OF_TEN[MAX_PRECISION_64 + 1]= 
-    {1, 
-     10, 
-     100, 
-     1000, 
-     10000, 
-     100000, 
-     1000000, 
-     10000000, 
-     100000000, 
-     1000000000, 
-     10000000000, 
-     100000000000, 
-     1000000000000, 
-     10000000000000, 
-     100000000000000, 
-     1000000000000000, 
-     10000000000000000, 
-     100000000000000000, 
-     1000000000000000000}; 
- 
-  Decimal64ColumnReader::Decimal64ColumnReader(const Type& type, 
-                                               StripeStreams& stripe 
-                                               ): ColumnReader(type, stripe) { 
-    scale = static_cast<int32_t>(type.getScale()); 
-    precision = static_cast<int32_t>(type.getPrecision()); 
-    valueStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true); 
-    if (valueStream == nullptr) 
-      throw ParseError("DATA stream not found in Decimal64Column"); 
-    buffer = nullptr; 
-    bufferEnd = nullptr; 
-    RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind()); 
-    std::unique_ptr<SeekableInputStream> stream = 
-        stripe.getStream(columnId, proto::Stream_Kind_SECONDARY, true); 
-    if (stream == nullptr) 
-      throw ParseError("SECONDARY stream not found in Decimal64Column"); 
-    scaleDecoder = createRleDecoder(std::move(stream), true, vers, memoryPool); 
-  } 
- 
-  Decimal64ColumnReader::~Decimal64ColumnReader() { 
-    // PASS 
-  } 
- 
-  uint64_t Decimal64ColumnReader::skip(uint64_t numValues) { 
-    numValues = ColumnReader::skip(numValues); 
-    uint64_t skipped = 0; 
-    while (skipped < numValues) { 
-      readBuffer(); 
-      if (!(0x80 & *(buffer++))) { 
-        skipped += 1; 
-      } 
-    } 
-    scaleDecoder->skip(numValues); 
-    return numValues; 
-  } 
- 
-  void Decimal64ColumnReader::next(ColumnVectorBatch& rowBatch, 
-                                   uint64_t numValues, 
-                                   char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr; 
-    Decimal64VectorBatch &batch = 
-      dynamic_cast<Decimal64VectorBatch&>(rowBatch); 
-    int64_t* values = batch.values.data(); 
-    // read the next group of scales 
-    int64_t* scaleBuffer = batch.readScales.data(); 
-    scaleDecoder->next(scaleBuffer, numValues, notNull); 
-    batch.precision = precision; 
-    batch.scale = scale; 
-    if (notNull) { 
-      for(size_t i=0; i < numValues; ++i) { 
-        if (notNull[i]) { 
-          readInt64(values[i], static_cast<int32_t>(scaleBuffer[i])); 
-        } 
-      } 
-    } else { 
-      for(size_t i=0; i < numValues; ++i) { 
-        readInt64(values[i], static_cast<int32_t>(scaleBuffer[i])); 
-      } 
-    } 
-  } 
- 
-  void scaleInt128(Int128& value, uint32_t scale, uint32_t currentScale) { 
-    if (scale > currentScale) { 
-      while(scale > currentScale) { 
-        uint32_t scaleAdjust = 
-          std::min(Decimal64ColumnReader::MAX_PRECISION_64, 
-                   scale - currentScale); 
-        value *= Decimal64ColumnReader::POWERS_OF_TEN[scaleAdjust]; 
-        currentScale += scaleAdjust; 
-      } 
-    } else if (scale < currentScale) { 
-      Int128 remainder; 
-      while(currentScale > scale) { 
-        uint32_t scaleAdjust = 
-          std::min(Decimal64ColumnReader::MAX_PRECISION_64, 
-                   currentScale - scale); 
-        value = value.divide(Decimal64ColumnReader::POWERS_OF_TEN[scaleAdjust], 
-                             remainder); 
-        currentScale -= scaleAdjust; 
-      } 
-    } 
-  } 
- 
-  void Decimal64ColumnReader::seekToRowGroup( 
-    std::unordered_map<uint64_t, PositionProvider>& positions) { 
-    ColumnReader::seekToRowGroup(positions); 
-    valueStream->seek(positions.at(columnId)); 
-    scaleDecoder->seek(positions.at(columnId)); 
-  } 
- 
-  class Decimal128ColumnReader: public Decimal64ColumnReader { 
-  public: 
-    Decimal128ColumnReader(const Type& type, StripeStreams& stipe); 
-    ~Decimal128ColumnReader() override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char *notNull) override; 
- 
-  private: 
-    void readInt128(Int128& value, int32_t currentScale) { 
-      value = 0; 
-      Int128 work; 
-      uint32_t offset = 0; 
-      while (true) { 
-        readBuffer(); 
-        unsigned char ch = static_cast<unsigned char>(*(buffer++)); 
-        work = ch & 0x7f; 
-        work <<= offset; 
-        value |=  work; 
-        offset += 7; 
-        if (!(ch & 0x80)) { 
-          break; 
-        } 
-      } 
-      unZigZagInt128(value); 
-      scaleInt128(value, static_cast<uint32_t>(scale), 
-                  static_cast<uint32_t>(currentScale)); 
-    } 
-  }; 
- 
-  Decimal128ColumnReader::Decimal128ColumnReader 
-                (const Type& type, 
-                 StripeStreams& stripe 
-                 ): Decimal64ColumnReader(type, stripe) { 
-    // PASS 
-  } 
- 
-  Decimal128ColumnReader::~Decimal128ColumnReader() { 
-    // PASS 
-  } 
- 
-  void Decimal128ColumnReader::next(ColumnVectorBatch& rowBatch, 
-                                   uint64_t numValues, 
-                                   char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr; 
-    Decimal128VectorBatch &batch = 
-      dynamic_cast<Decimal128VectorBatch&>(rowBatch); 
-    Int128* values = batch.values.data(); 
-    // read the next group of scales 
-    int64_t* scaleBuffer = batch.readScales.data(); 
-    scaleDecoder->next(scaleBuffer, numValues, notNull); 
-    batch.precision = precision; 
-    batch.scale = scale; 
-    if (notNull) { 
-      for(size_t i=0; i < numValues; ++i) { 
-        if (notNull[i]) { 
-          readInt128(values[i], static_cast<int32_t>(scaleBuffer[i])); 
-        } 
-      } 
-    } else { 
-      for(size_t i=0; i < numValues; ++i) { 
-        readInt128(values[i], static_cast<int32_t>(scaleBuffer[i])); 
-      } 
-    } 
-  } 
- 
-  class DecimalHive11ColumnReader: public Decimal64ColumnReader { 
-  private: 
-    bool throwOnOverflow; 
-    std::ostream* errorStream; 
- 
-    /** 
-     * Read an Int128 from the stream and correct it to the desired scale. 
-     */ 
-    bool readInt128(Int128& value, int32_t currentScale) { 
-      // -/+ 99999999999999999999999999999999999999 
-      static const Int128 MIN_VALUE(-0x4b3b4ca85a86c47b, 0xf675ddc000000001); 
-      static const Int128 MAX_VALUE( 0x4b3b4ca85a86c47a, 0x098a223fffffffff); 
- 
-      value = 0; 
-      Int128 work; 
-      uint32_t offset = 0; 
-      bool result = true; 
-      while (true) { 
-        readBuffer(); 
-        unsigned char ch = static_cast<unsigned char>(*(buffer++)); 
-        work = ch & 0x7f; 
-        // If we have read more than 128 bits, we flag the error, but keep 
-        // reading bytes so the stream isn't thrown off. 
-        if (offset > 128 || (offset == 126 && work > 3)) { 
-          result = false; 
-        } 
-        work <<= offset; 
-        value |=  work; 
-        offset += 7; 
-        if (!(ch & 0x80)) { 
-          break; 
-        } 
-      } 
- 
-      if (!result) { 
-        return result; 
-      } 
-      unZigZagInt128(value); 
-      scaleInt128(value, static_cast<uint32_t>(scale), 
-                  static_cast<uint32_t>(currentScale)); 
-      return value >= MIN_VALUE && value <= MAX_VALUE; 
-    } 
- 
-  public: 
-    DecimalHive11ColumnReader(const Type& type, StripeStreams& stipe); 
-    ~DecimalHive11ColumnReader() override; 
- 
-    void next(ColumnVectorBatch& rowBatch, 
-              uint64_t numValues, 
-              char *notNull) override; 
-  }; 
- 
-  DecimalHive11ColumnReader::DecimalHive11ColumnReader 
-                    (const Type& type, 
-                     StripeStreams& stripe 
-                     ): Decimal64ColumnReader(type, stripe) { 
-    scale = stripe.getForcedScaleOnHive11Decimal(); 
-    throwOnOverflow = stripe.getThrowOnHive11DecimalOverflow(); 
-    errorStream = stripe.getErrorStream(); 
-  } 
- 
-  DecimalHive11ColumnReader::~DecimalHive11ColumnReader() { 
-    // PASS 
-  } 
- 
-  void DecimalHive11ColumnReader::next(ColumnVectorBatch& rowBatch, 
-                                       uint64_t numValues, 
-                                       char *notNull) { 
-    ColumnReader::next(rowBatch, numValues, notNull); 
-    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr; 
-    Decimal128VectorBatch &batch = 
-      dynamic_cast<Decimal128VectorBatch&>(rowBatch); 
-    Int128* values = batch.values.data(); 
-    // read the next group of scales 
-    int64_t* scaleBuffer = batch.readScales.data(); 
- 
-    scaleDecoder->next(scaleBuffer, numValues, notNull); 
- 
-    batch.precision = precision; 
-    batch.scale = scale; 
-    if (notNull) { 
-      for(size_t i=0; i < numValues; ++i) { 
-        if (notNull[i]) { 
-          if (!readInt128(values[i], 
-                          static_cast<int32_t>(scaleBuffer[i]))) { 
-            if (throwOnOverflow) { 
-              throw ParseError("Hive 0.11 decimal was more than 38 digits."); 
-            } else { 
-              *errorStream << "Warning: " 
-                           << "Hive 0.11 decimal with more than 38 digits " 
-                           << "replaced by NULL.\n"; 
-              notNull[i] = false; 
-            } 
-          } 
-        } 
-      } 
-    } else { 
-      for(size_t i=0; i < numValues; ++i) { 
-        if (!readInt128(values[i], 
-                        static_cast<int32_t>(scaleBuffer[i]))) { 
-          if (throwOnOverflow) { 
-            throw ParseError("Hive 0.11 decimal was more than 38 digits."); 
-          } else { 
-            *errorStream << "Warning: " 
-                         << "Hive 0.11 decimal with more than 38 digits " 
-                         << "replaced by NULL.\n"; 
-            batch.hasNulls = true; 
-            batch.notNull[i] = false; 
-          } 
-        } 
-      } 
-    } 
-  } 
- 
-  /** 
-   * Create a reader for the given stripe. 
-   */ 
-  std::unique_ptr<ColumnReader> buildReader(const Type& type, 
-                                            StripeStreams& stripe) { 
-    switch (static_cast<int64_t>(type.getKind())) { 
-    case DATE: 
-    case INT: 
-    case LONG: 
-    case SHORT: 
-      return std::unique_ptr<ColumnReader>( 
-          new IntegerColumnReader(type, stripe)); 
-    case BINARY: 
-    case CHAR: 
-    case STRING: 
-    case VARCHAR: 
-      switch (static_cast<int64_t>(stripe.getEncoding(type.getColumnId()).kind())){ 
-      case proto::ColumnEncoding_Kind_DICTIONARY: 
-      case proto::ColumnEncoding_Kind_DICTIONARY_V2: 
-        return std::unique_ptr<ColumnReader>( 
-            new StringDictionaryColumnReader(type, stripe)); 
-      case proto::ColumnEncoding_Kind_DIRECT: 
-      case proto::ColumnEncoding_Kind_DIRECT_V2: 
-        return std::unique_ptr<ColumnReader>( 
-            new StringDirectColumnReader(type, stripe)); 
-      default: 
-        throw NotImplementedYet("buildReader unhandled string encoding"); 
-      } 
- 
-    case BOOLEAN: 
-      return std::unique_ptr<ColumnReader>( 
-          new BooleanColumnReader(type, stripe)); 
- 
-    case BYTE: 
-      return std::unique_ptr<ColumnReader>( 
-          new ByteColumnReader(type, stripe)); 
- 
-    case LIST: 
-      return std::unique_ptr<ColumnReader>( 
-          new ListColumnReader(type, stripe)); 
- 
-    case MAP: 
-      return std::unique_ptr<ColumnReader>( 
-          new MapColumnReader(type, stripe)); 
- 
-    case UNION: 
-      return std::unique_ptr<ColumnReader>( 
-          new UnionColumnReader(type, stripe)); 
- 
-    case STRUCT: 
-      return std::unique_ptr<ColumnReader>( 
-          new StructColumnReader(type, stripe)); 
- 
-    case FLOAT: 
-    case DOUBLE: 
-      return std::unique_ptr<ColumnReader>( 
-          new DoubleColumnReader(type, stripe)); 
- 
-    case TIMESTAMP: 
-      return std::unique_ptr<ColumnReader> 
-        (new TimestampColumnReader(type, stripe)); 
- 
-    case DECIMAL: 
-      // is this a Hive 0.11 or 0.12 file? 
-      if (type.getPrecision() == 0) { 
-        return std::unique_ptr<ColumnReader> 
-          (new DecimalHive11ColumnReader(type, stripe)); 
- 
-      // can we represent the values using int64_t? 
-      } else if (type.getPrecision() <= 
-                 Decimal64ColumnReader::MAX_PRECISION_64) { 
-        return std::unique_ptr<ColumnReader> 
-          (new Decimal64ColumnReader(type, stripe)); 
- 
-      // otherwise we use the Int128 implementation 
-      } else { 
-        return std::unique_ptr<ColumnReader> 
-          (new Decimal128ColumnReader(type, stripe)); 
-      } 
- 
-    default: 
-      throw NotImplementedYet("buildReader unhandled type"); 
-    } 
-  } 
- 
-} 
+          secsBuffer[i] -= 1;
+        }
+      }
+    }
+  }
+
+  void TimestampColumnReader::seekToRowGroup(
+    std::unordered_map<uint64_t, PositionProvider>& positions) {
+    ColumnReader::seekToRowGroup(positions);
+    secondsRle->seek(positions.at(columnId));
+    nanoRle->seek(positions.at(columnId));
+  }
+
+  class DoubleColumnReader: public ColumnReader {
+  public:
+    DoubleColumnReader(const Type& type, StripeStreams& stripe);
+    ~DoubleColumnReader() override;
+
+    uint64_t skip(uint64_t numValues) override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char* notNull) override;
+
+    void seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+
+  private:
+    std::unique_ptr<SeekableInputStream> inputStream;
+    TypeKind columnKind;
+    const uint64_t bytesPerValue ;
+    const char *bufferPointer;
+    const char *bufferEnd;
+
+    unsigned char readByte() {
+      if (bufferPointer == bufferEnd) {
+        int length;
+        if (!inputStream->Next
+            (reinterpret_cast<const void**>(&bufferPointer), &length)) {
+          throw ParseError("bad read in DoubleColumnReader::next()");
+        }
+        bufferEnd = bufferPointer + length;
+      }
+      return static_cast<unsigned char>(*(bufferPointer++));
+    }
+
+    double readDouble() {
+      int64_t bits = 0;
+      for (uint64_t i=0; i < 8; i++) {
+        bits |= static_cast<int64_t>(readByte()) << (i*8);
+      }
+      double *result = reinterpret_cast<double*>(&bits);
+      return *result;
+    }
+
+    double readFloat() {
+      int32_t bits = 0;
+      for (uint64_t i=0; i < 4; i++) {
+        bits |= readByte() << (i*8);
+      }
+      float *result = reinterpret_cast<float*>(&bits);
+      return static_cast<double>(*result);
+    }
+  };
+
+  DoubleColumnReader::DoubleColumnReader(const Type& type,
+                                         StripeStreams& stripe
+                                         ): ColumnReader(type, stripe),
+                                            columnKind(type.getKind()),
+                                            bytesPerValue((type.getKind() ==
+                                                           FLOAT) ? 4 : 8),
+                                            bufferPointer(nullptr),
+                                            bufferEnd(nullptr) {
+    inputStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+    if (inputStream == nullptr)
+      throw ParseError("DATA stream not found in Double column");
+  }
+
+  DoubleColumnReader::~DoubleColumnReader() {
+    // PASS
+  }
+
+  uint64_t DoubleColumnReader::skip(uint64_t numValues) {
+    numValues = ColumnReader::skip(numValues);
+
+    if (static_cast<size_t>(bufferEnd - bufferPointer) >=
+        bytesPerValue * numValues) {
+      bufferPointer += bytesPerValue * numValues;
+    } else {
+      size_t sizeToSkip = bytesPerValue * numValues -
+                          static_cast<size_t>(bufferEnd - bufferPointer);
+      const size_t cap = static_cast<size_t>(std::numeric_limits<int>::max());
+      while (sizeToSkip != 0) {
+        size_t step = sizeToSkip > cap ? cap : sizeToSkip;
+        inputStream->Skip(static_cast<int>(step));
+        sizeToSkip -= step;
+      }
+      bufferEnd = nullptr;
+      bufferPointer = nullptr;
+    }
+
+    return numValues;
+  }
+
+  void DoubleColumnReader::next(ColumnVectorBatch& rowBatch,
+                                uint64_t numValues,
+                                char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    // update the notNull from the parent class
+    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
+    double* outArray = dynamic_cast<DoubleVectorBatch&>(rowBatch).data.data();
+
+    if (columnKind == FLOAT) {
+      if (notNull) {
+        for(size_t i=0; i < numValues; ++i) {
+          if (notNull[i]) {
+            outArray[i] = readFloat();
+          }
+        }
+      } else {
+        for(size_t i=0; i < numValues; ++i) {
+          outArray[i] = readFloat();
+        }
+      }
+    } else {
+      if (notNull) {
+        for(size_t i=0; i < numValues; ++i) {
+          if (notNull[i]) {
+            outArray[i] = readDouble();
+          }
+        }
+      } else {
+        for(size_t i=0; i < numValues; ++i) {
+          outArray[i] = readDouble();
+        }
+      }
+    }
+  }
+
+  void readFully(char* buffer, int64_t bufferSize, SeekableInputStream* stream) {
+    int64_t posn = 0;
+    while (posn < bufferSize) {
+      const void* chunk;
+      int length;
+      if (!stream->Next(&chunk, &length)) {
+        throw ParseError("bad read in readFully");
+      }
+      if (posn + length > bufferSize) {
+        throw ParseError("Corrupt dictionary blob in StringDictionaryColumn");
+      }
+      memcpy(buffer + posn, chunk, static_cast<size_t>(length));
+      posn += length;
+    }
+  }
+
+  void DoubleColumnReader::seekToRowGroup(
+    std::unordered_map<uint64_t, PositionProvider>& positions) {
+    ColumnReader::seekToRowGroup(positions);
+    inputStream->seek(positions.at(columnId));
+  }
+
+  class StringDictionaryColumnReader: public ColumnReader {
+  private:
+    std::shared_ptr<StringDictionary> dictionary;
+    std::unique_ptr<RleDecoder> rle;
+
+  public:
+    StringDictionaryColumnReader(const Type& type, StripeStreams& stipe);
+    ~StringDictionaryColumnReader() override;
+
+    uint64_t skip(uint64_t numValues) override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char *notNull) override;
+
+    void nextEncoded(ColumnVectorBatch& rowBatch,
+                      uint64_t numValues,
+                      char* notNull) override;
+
+    void seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+  };
+
+  StringDictionaryColumnReader::StringDictionaryColumnReader
+             (const Type& type,
+              StripeStreams& stripe
+              ): ColumnReader(type, stripe),
+                 dictionary(new StringDictionary(stripe.getMemoryPool())) {
+    RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId)
+                                                .kind());
+    uint32_t dictSize = stripe.getEncoding(columnId).dictionarysize();
+    rle = createRleDecoder(stripe.getStream(columnId,
+                                            proto::Stream_Kind_DATA,
+                                            true),
+                           false, rleVersion, memoryPool);
+    std::unique_ptr<RleDecoder> lengthDecoder =
+            createRleDecoder(stripe.getStream(columnId,
+                                        proto::Stream_Kind_LENGTH,
+                                        false),
+                       false, rleVersion, memoryPool);
+    dictionary->dictionaryOffset.resize(dictSize + 1);
+    int64_t* lengthArray = dictionary->dictionaryOffset.data();
+    lengthDecoder->next(lengthArray + 1, dictSize, nullptr);
+    lengthArray[0] = 0;
+    for(uint32_t i = 1; i < dictSize + 1; ++i) {
+      lengthArray[i] += lengthArray[i - 1];
+    }
+    dictionary->dictionaryBlob.resize(
+                                static_cast<uint64_t>(lengthArray[dictSize]));
+    std::unique_ptr<SeekableInputStream> blobStream =
+      stripe.getStream(columnId, proto::Stream_Kind_DICTIONARY_DATA, false);
+    readFully(
+              dictionary->dictionaryBlob.data(),
+              lengthArray[dictSize],
+              blobStream.get());
+  }
+
+  StringDictionaryColumnReader::~StringDictionaryColumnReader() {
+    // PASS
+  }
+
+  uint64_t StringDictionaryColumnReader::skip(uint64_t numValues) {
+    numValues = ColumnReader::skip(numValues);
+    rle->skip(numValues);
+    return numValues;
+  }
+
+  void StringDictionaryColumnReader::next(ColumnVectorBatch& rowBatch,
+                                          uint64_t numValues,
+                                          char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    // update the notNull from the parent class
+    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
+    StringVectorBatch& byteBatch = dynamic_cast<StringVectorBatch&>(rowBatch);
+    char *blob = dictionary->dictionaryBlob.data();
+    int64_t *dictionaryOffsets = dictionary->dictionaryOffset.data();
+    char **outputStarts = byteBatch.data.data();
+    int64_t *outputLengths = byteBatch.length.data();
+    rle->next(outputLengths, numValues, notNull);
+    uint64_t dictionaryCount = dictionary->dictionaryOffset.size() - 1;
+    if (notNull) {
+      for(uint64_t i=0; i < numValues; ++i) {
+        if (notNull[i]) {
+          int64_t entry = outputLengths[i];
+          if (entry < 0 || static_cast<uint64_t>(entry) >= dictionaryCount ) {
+            throw ParseError("Entry index out of range in StringDictionaryColumn");
+          }
+          outputStarts[i] = blob + dictionaryOffsets[entry];
+          outputLengths[i] = dictionaryOffsets[entry+1] -
+            dictionaryOffsets[entry];
+        }
+      }
+    } else {
+      for(uint64_t i=0; i < numValues; ++i) {
+        int64_t entry = outputLengths[i];
+        if (entry < 0 || static_cast<uint64_t>(entry) >= dictionaryCount) {
+          throw ParseError("Entry index out of range in StringDictionaryColumn");
+        }
+        outputStarts[i] = blob + dictionaryOffsets[entry];
+        outputLengths[i] = dictionaryOffsets[entry+1] -
+          dictionaryOffsets[entry];
+      }
+    }
+  }
+
+  void StringDictionaryColumnReader::nextEncoded(ColumnVectorBatch& rowBatch,
+                                                  uint64_t numValues,
+                                                  char* notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
+    rowBatch.isEncoded = true;
+
+    EncodedStringVectorBatch& batch = dynamic_cast<EncodedStringVectorBatch&>(rowBatch);
+    batch.dictionary = this->dictionary;
+
+    // Length buffer is reused to save dictionary entry ids
+    rle->next(batch.index.data(), numValues, notNull);
+  }
+
+  void StringDictionaryColumnReader::seekToRowGroup(
+    std::unordered_map<uint64_t, PositionProvider>& positions) {
+    ColumnReader::seekToRowGroup(positions);
+    rle->seek(positions.at(columnId));
+  }
+
+
+  class StringDirectColumnReader: public ColumnReader {
+  private:
+    std::unique_ptr<RleDecoder> lengthRle;
+    std::unique_ptr<SeekableInputStream> blobStream;
+    const char *lastBuffer;
+    size_t lastBufferLength;
+
+    /**
+     * Compute the total length of the values.
+     * @param lengths the array of lengths
+     * @param notNull the array of notNull flags
+     * @param numValues the lengths of the arrays
+     * @return the total number of bytes for the non-null values
+     */
+    size_t computeSize(const int64_t *lengths, const char *notNull,
+                       uint64_t numValues);
+
+  public:
+    StringDirectColumnReader(const Type& type, StripeStreams& stipe);
+    ~StringDirectColumnReader() override;
+
+    uint64_t skip(uint64_t numValues) override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char *notNull) override;
+
+    void seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+  };
+
+  StringDirectColumnReader::StringDirectColumnReader
+                 (const Type& type,
+                  StripeStreams& stripe
+                  ): ColumnReader(type, stripe) {
+    RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId)
+                                                .kind());
+    std::unique_ptr<SeekableInputStream> stream =
+        stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true);
+    if (stream == nullptr)
+      throw ParseError("LENGTH stream not found in StringDirectColumn");
+    lengthRle = createRleDecoder(
+        std::move(stream), false, rleVersion, memoryPool);
+    blobStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+    if (blobStream == nullptr)
+      throw ParseError("DATA stream not found in StringDirectColumn");
+    lastBuffer = nullptr;
+    lastBufferLength = 0;
+  }
+
+  StringDirectColumnReader::~StringDirectColumnReader() {
+    // PASS
+  }
+
+  uint64_t StringDirectColumnReader::skip(uint64_t numValues) {
+    const size_t BUFFER_SIZE = 1024;
+    numValues = ColumnReader::skip(numValues);
+    int64_t buffer[BUFFER_SIZE];
+    uint64_t done = 0;
+    size_t totalBytes = 0;
+    // read the lengths, so we know haw many bytes to skip
+    while (done < numValues) {
+      uint64_t step = std::min(BUFFER_SIZE,
+                                    static_cast<size_t>(numValues - done));
+      lengthRle->next(buffer, step, nullptr);
+      totalBytes += computeSize(buffer, nullptr, step);
+      done += step;
+    }
+    if (totalBytes <= lastBufferLength) {
+      // subtract the needed bytes from the ones left over
+      lastBufferLength -= totalBytes;
+      lastBuffer += totalBytes;
+    } else {
+      // move the stream forward after accounting for the buffered bytes
+      totalBytes -= lastBufferLength;
+      const size_t cap = static_cast<size_t>(std::numeric_limits<int>::max());
+      while (totalBytes != 0) {
+        size_t step = totalBytes > cap ? cap : totalBytes;
+        blobStream->Skip(static_cast<int>(step));
+        totalBytes -= step;
+      }
+      lastBufferLength = 0;
+      lastBuffer = nullptr;
+    }
+    return numValues;
+  }
+
+  size_t StringDirectColumnReader::computeSize(const int64_t* lengths,
+                                               const char* notNull,
+                                               uint64_t numValues) {
+    size_t totalLength = 0;
+    if (notNull) {
+      for(size_t i=0; i < numValues; ++i) {
+        if (notNull[i]) {
+          totalLength += static_cast<size_t>(lengths[i]);
+        }
+      }
+    } else {
+      for(size_t i=0; i < numValues; ++i) {
+        totalLength += static_cast<size_t>(lengths[i]);
+      }
+    }
+    return totalLength;
+  }
+
+  void StringDirectColumnReader::next(ColumnVectorBatch& rowBatch,
+                                      uint64_t numValues,
+                                      char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    // update the notNull from the parent class
+    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
+    StringVectorBatch& byteBatch = dynamic_cast<StringVectorBatch&>(rowBatch);
+    char **startPtr = byteBatch.data.data();
+    int64_t *lengthPtr = byteBatch.length.data();
+
+    // read the length vector
+    lengthRle->next(lengthPtr, numValues, notNull);
+
+    // figure out the total length of data we need from the blob stream
+    const size_t totalLength = computeSize(lengthPtr, notNull, numValues);
+
+    // Load data from the blob stream into our buffer until we have enough
+    // to get the rest directly out of the stream's buffer.
+    size_t bytesBuffered = 0;
+    byteBatch.blob.resize(totalLength);
+    char *ptr= byteBatch.blob.data();
+    while (bytesBuffered + lastBufferLength < totalLength) {
+      memcpy(ptr + bytesBuffered, lastBuffer, lastBufferLength);
+      bytesBuffered += lastBufferLength;
+      const void* readBuffer;
+      int readLength;
+      if (!blobStream->Next(&readBuffer, &readLength)) {
+        throw ParseError("failed to read in StringDirectColumnReader.next");
+      }
+      lastBuffer = static_cast<const char*>(readBuffer);
+      lastBufferLength = static_cast<size_t>(readLength);
+    }
+
+    if (bytesBuffered < totalLength) {
+      size_t moreBytes = totalLength - bytesBuffered;
+      memcpy(ptr + bytesBuffered, lastBuffer, moreBytes);
+      lastBuffer += moreBytes;
+      lastBufferLength -= moreBytes;
+    }
+
+    size_t filledSlots = 0;
+    ptr = byteBatch.blob.data();
+    if (notNull) {
+      while (filledSlots < numValues) {
+        if (notNull[filledSlots]) {
+          startPtr[filledSlots] = const_cast<char*>(ptr);
+          ptr += lengthPtr[filledSlots];
+        }
+        filledSlots += 1;
+      }
+    } else {
+      while (filledSlots < numValues) {
+        startPtr[filledSlots] = const_cast<char*>(ptr);
+        ptr += lengthPtr[filledSlots];
+        filledSlots += 1;
+      }
+    }
+  }
+
+  void StringDirectColumnReader::seekToRowGroup(
+    std::unordered_map<uint64_t, PositionProvider>& positions) {
+    ColumnReader::seekToRowGroup(positions);
+    blobStream->seek(positions.at(columnId));
+    lengthRle->seek(positions.at(columnId));
+  }
+
+  class StructColumnReader: public ColumnReader {
+  private:
+    std::vector<ColumnReader*> children;
+
+  public:
+    StructColumnReader(const Type& type, StripeStreams& stipe);
+    ~StructColumnReader() override;
+
+    uint64_t skip(uint64_t numValues) override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char *notNull) override;
+
+    void nextEncoded(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char *notNull) override;
+
+    void seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+
+  private:
+    template<bool encoded>
+    void nextInternal(ColumnVectorBatch& rowBatch,
+                      uint64_t numValues,
+                      char *notNull);
+  };
+
+  StructColumnReader::StructColumnReader(const Type& type,
+                                         StripeStreams& stripe
+                                         ): ColumnReader(type, stripe) {
+    // count the number of selected sub-columns
+    const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
+    switch (static_cast<int64_t>(stripe.getEncoding(columnId).kind())) {
+    case proto::ColumnEncoding_Kind_DIRECT:
+      for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
+        const Type& child = *type.getSubtype(i);
+        if (selectedColumns[static_cast<uint64_t>(child.getColumnId())]) {
+          children.push_back(buildReader(child, stripe).release());
+        }
+      }
+      break;
+    case proto::ColumnEncoding_Kind_DIRECT_V2:
+    case proto::ColumnEncoding_Kind_DICTIONARY:
+    case proto::ColumnEncoding_Kind_DICTIONARY_V2:
+    default:
+      throw ParseError("Unknown encoding for StructColumnReader");
+    }
+  }
+
+  StructColumnReader::~StructColumnReader() {
+    for (size_t i=0; i<children.size(); i++) {
+      delete children[i];
+    }
+  }
+
+  uint64_t StructColumnReader::skip(uint64_t numValues) {
+    numValues = ColumnReader::skip(numValues);
+    for(std::vector<ColumnReader*>::iterator ptr=children.begin(); ptr != children.end(); ++ptr) {
+      (*ptr)->skip(numValues);
+    }
+    return numValues;
+  }
+
+  void StructColumnReader::next(ColumnVectorBatch& rowBatch,
+                                uint64_t numValues,
+                                char *notNull) {
+    nextInternal<false>(rowBatch, numValues, notNull);
+  }
+
+  void StructColumnReader::nextEncoded(ColumnVectorBatch& rowBatch,
+                                uint64_t numValues,
+                                char *notNull) {
+    nextInternal<true>(rowBatch, numValues, notNull);
+  }
+
+  template<bool encoded>
+  void StructColumnReader::nextInternal(ColumnVectorBatch& rowBatch,
+                                uint64_t numValues,
+                                char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    uint64_t i=0;
+    notNull = rowBatch.hasNulls? rowBatch.notNull.data() : nullptr;
+    for(std::vector<ColumnReader*>::iterator ptr=children.begin();
+        ptr != children.end(); ++ptr, ++i) {
+      if (encoded) {
+        (*ptr)->nextEncoded(*(dynamic_cast<StructVectorBatch&>(rowBatch).fields[i]),
+                    numValues, notNull);
+      } else {
+        (*ptr)->next(*(dynamic_cast<StructVectorBatch&>(rowBatch).fields[i]),
+                    numValues, notNull);
+      }
+    }
+  }
+
+  void StructColumnReader::seekToRowGroup(
+    std::unordered_map<uint64_t, PositionProvider>& positions) {
+    ColumnReader::seekToRowGroup(positions);
+
+    for(std::vector<ColumnReader*>::iterator ptr = children.begin();
+        ptr != children.end();
+        ++ptr) {
+      (*ptr)->seekToRowGroup(positions);
+    }
+  }
+
+  class ListColumnReader: public ColumnReader {
+  private:
+    std::unique_ptr<ColumnReader> child;
+    std::unique_ptr<RleDecoder> rle;
+
+  public:
+    ListColumnReader(const Type& type, StripeStreams& stipe);
+    ~ListColumnReader() override;
+
+    uint64_t skip(uint64_t numValues) override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char *notNull) override;
+
+    void nextEncoded(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char *notNull) override;
+
+    void seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+
+  private:
+    template<bool encoded>
+    void nextInternal(ColumnVectorBatch& rowBatch,
+                      uint64_t numValues,
+                      char *notNull);
+  };
+
+  ListColumnReader::ListColumnReader(const Type& type,
+                                     StripeStreams& stripe
+                                     ): ColumnReader(type, stripe) {
+    // count the number of selected sub-columns
+    const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
+    RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
+    std::unique_ptr<SeekableInputStream> stream =
+        stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true);
+    if (stream == nullptr)
+      throw ParseError("LENGTH stream not found in List column");
+    rle = createRleDecoder(std::move(stream), false, vers, memoryPool);
+    const Type& childType = *type.getSubtype(0);
+    if (selectedColumns[static_cast<uint64_t>(childType.getColumnId())]) {
+      child = buildReader(childType, stripe);
+    }
+  }
+
+  ListColumnReader::~ListColumnReader() {
+    // PASS
+  }
+
+  uint64_t ListColumnReader::skip(uint64_t numValues) {
+    numValues = ColumnReader::skip(numValues);
+    ColumnReader *childReader = child.get();
+    if (childReader) {
+      const uint64_t BUFFER_SIZE = 1024;
+      int64_t buffer[BUFFER_SIZE];
+      uint64_t childrenElements = 0;
+      uint64_t lengthsRead = 0;
+      while (lengthsRead < numValues) {
+        uint64_t chunk = std::min(numValues - lengthsRead, BUFFER_SIZE);
+        rle->next(buffer, chunk, nullptr);
+        for(size_t i=0; i < chunk; ++i) {
+          childrenElements += static_cast<size_t>(buffer[i]);
+        }
+        lengthsRead += chunk;
+      }
+      childReader->skip(childrenElements);
+    } else {
+      rle->skip(numValues);
+    }
+    return numValues;
+  }
+
+  void ListColumnReader::next(ColumnVectorBatch& rowBatch,
+                                      uint64_t numValues,
+                                      char *notNull) {
+    nextInternal<false>(rowBatch, numValues, notNull);
+  }
+
+  void ListColumnReader::nextEncoded(ColumnVectorBatch& rowBatch,
+                                      uint64_t numValues,
+                                      char *notNull) {
+    nextInternal<true>(rowBatch, numValues, notNull);
+  }
+
+  template<bool encoded>
+  void ListColumnReader::nextInternal(ColumnVectorBatch& rowBatch,
+                              uint64_t numValues,
+                              char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    ListVectorBatch &listBatch = dynamic_cast<ListVectorBatch&>(rowBatch);
+    int64_t* offsets = listBatch.offsets.data();
+    notNull = listBatch.hasNulls ? listBatch.notNull.data() : nullptr;
+    rle->next(offsets, numValues, notNull);
+    uint64_t totalChildren = 0;
+    if (notNull) {
+      for(size_t i=0; i < numValues; ++i) {
+        if (notNull[i]) {
+          uint64_t tmp = static_cast<uint64_t>(offsets[i]);
+          offsets[i] = static_cast<int64_t>(totalChildren);
+          totalChildren += tmp;
+        } else {
+          offsets[i] = static_cast<int64_t>(totalChildren);
+        }
+      }
+    } else {
+      for(size_t i=0; i < numValues; ++i) {
+        uint64_t tmp = static_cast<uint64_t>(offsets[i]);
+        offsets[i] = static_cast<int64_t>(totalChildren);
+        totalChildren += tmp;
+      }
+    }
+    offsets[numValues] = static_cast<int64_t>(totalChildren);
+    ColumnReader *childReader = child.get();
+    if (childReader) {
+      if (encoded) {
+        childReader->nextEncoded(*(listBatch.elements.get()), totalChildren, nullptr);
+      } else {
+        childReader->next(*(listBatch.elements.get()), totalChildren, nullptr);
+      }
+    }
+  }
+
+  void ListColumnReader::seekToRowGroup(
+    std::unordered_map<uint64_t, PositionProvider>& positions) {
+    ColumnReader::seekToRowGroup(positions);
+    rle->seek(positions.at(columnId));
+    if (child.get()) {
+      child->seekToRowGroup(positions);
+    }
+  }
+
+  class MapColumnReader: public ColumnReader {
+  private:
+    std::unique_ptr<ColumnReader> keyReader;
+    std::unique_ptr<ColumnReader> elementReader;
+    std::unique_ptr<RleDecoder> rle;
+
+  public:
+    MapColumnReader(const Type& type, StripeStreams& stipe);
+    ~MapColumnReader() override;
+
+    uint64_t skip(uint64_t numValues) override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char *notNull) override;
+
+    void nextEncoded(ColumnVectorBatch& rowBatch,
+                     uint64_t numValues,
+                     char *notNull) override;
+
+    void seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+
+  private:
+    template<bool encoded>
+    void nextInternal(ColumnVectorBatch& rowBatch,
+                      uint64_t numValues,
+                      char *notNull);
+  };
+
+  MapColumnReader::MapColumnReader(const Type& type,
+                                   StripeStreams& stripe
+                                   ): ColumnReader(type, stripe) {
+    // Determine if the key and/or value columns are selected
+    const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
+    RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
+    std::unique_ptr<SeekableInputStream> stream =
+        stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true);
+    if (stream == nullptr)
+      throw ParseError("LENGTH stream not found in Map column");
+    rle = createRleDecoder(std::move(stream), false, vers, memoryPool);
+    const Type& keyType = *type.getSubtype(0);
+    if (selectedColumns[static_cast<uint64_t>(keyType.getColumnId())]) {
+      keyReader = buildReader(keyType, stripe);
+    }
+    const Type& elementType = *type.getSubtype(1);
+    if (selectedColumns[static_cast<uint64_t>(elementType.getColumnId())]) {
+      elementReader = buildReader(elementType, stripe);
+    }
+  }
+
+  MapColumnReader::~MapColumnReader() {
+    // PASS
+  }
+
+  uint64_t MapColumnReader::skip(uint64_t numValues) {
+    numValues = ColumnReader::skip(numValues);
+    ColumnReader *rawKeyReader = keyReader.get();
+    ColumnReader *rawElementReader = elementReader.get();
+    if (rawKeyReader || rawElementReader) {
+      const uint64_t BUFFER_SIZE = 1024;
+      int64_t buffer[BUFFER_SIZE];
+      uint64_t childrenElements = 0;
+      uint64_t lengthsRead = 0;
+      while (lengthsRead < numValues) {
+        uint64_t chunk = std::min(numValues - lengthsRead, BUFFER_SIZE);
+        rle->next(buffer, chunk, nullptr);
+        for(size_t i=0; i < chunk; ++i) {
+          childrenElements += static_cast<size_t>(buffer[i]);
+        }
+        lengthsRead += chunk;
+      }
+      if (rawKeyReader) {
+        rawKeyReader->skip(childrenElements);
+      }
+      if (rawElementReader) {
+        rawElementReader->skip(childrenElements);
+      }
+    } else {
+      rle->skip(numValues);
+    }
+    return numValues;
+  }
+
+  void MapColumnReader::next(ColumnVectorBatch& rowBatch,
+                             uint64_t numValues,
+                             char *notNull)
+  {
+    nextInternal<false>(rowBatch, numValues, notNull);
+  }
+
+  void MapColumnReader::nextEncoded(ColumnVectorBatch& rowBatch,
+                             uint64_t numValues,
+                             char *notNull)
+  {
+    nextInternal<true>(rowBatch, numValues, notNull);
+  }
+
+  template<bool encoded>
+  void MapColumnReader::nextInternal(ColumnVectorBatch& rowBatch,
+                             uint64_t numValues,
+                             char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    MapVectorBatch &mapBatch = dynamic_cast<MapVectorBatch&>(rowBatch);
+    int64_t* offsets = mapBatch.offsets.data();
+    notNull = mapBatch.hasNulls ? mapBatch.notNull.data() : nullptr;
+    rle->next(offsets, numValues, notNull);
+    uint64_t totalChildren = 0;
+    if (notNull) {
+      for(size_t i=0; i < numValues; ++i) {
+        if (notNull[i]) {
+          uint64_t tmp = static_cast<uint64_t>(offsets[i]);
+          offsets[i] = static_cast<int64_t>(totalChildren);
+          totalChildren += tmp;
+        } else {
+          offsets[i] = static_cast<int64_t>(totalChildren);
+        }
+      }
+    } else {
+      for(size_t i=0; i < numValues; ++i) {
+        uint64_t tmp = static_cast<uint64_t>(offsets[i]);
+        offsets[i] = static_cast<int64_t>(totalChildren);
+        totalChildren += tmp;
+      }
+    }
+    offsets[numValues] = static_cast<int64_t>(totalChildren);
+    ColumnReader *rawKeyReader = keyReader.get();
+    if (rawKeyReader) {
+      if (encoded) {
+        rawKeyReader->nextEncoded(*(mapBatch.keys.get()), totalChildren, nullptr);
+      } else {
+        rawKeyReader->next(*(mapBatch.keys.get()), totalChildren, nullptr);
+      }
+    }
+    ColumnReader *rawElementReader = elementReader.get();
+    if (rawElementReader) {
+      if (encoded) {
+        rawElementReader->nextEncoded(*(mapBatch.elements.get()), totalChildren, nullptr);
+      } else {
+        rawElementReader->next(*(mapBatch.elements.get()), totalChildren, nullptr);
+      }
+    }
+  }
+
+  void MapColumnReader::seekToRowGroup(
+    std::unordered_map<uint64_t, PositionProvider>& positions) {
+    ColumnReader::seekToRowGroup(positions);
+    rle->seek(positions.at(columnId));
+    if (keyReader.get()) {
+      keyReader->seekToRowGroup(positions);
+    }
+    if (elementReader.get()) {
+      elementReader->seekToRowGroup(positions);
+    }
+  }
+
+  class UnionColumnReader: public ColumnReader {
+  private:
+    std::unique_ptr<ByteRleDecoder> rle;
+    std::vector<ColumnReader*> childrenReader;
+    std::vector<int64_t> childrenCounts;
+    uint64_t numChildren;
+
+  public:
+    UnionColumnReader(const Type& type, StripeStreams& stipe);
+    ~UnionColumnReader() override;
+
+    uint64_t skip(uint64_t numValues) override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char *notNull) override;
+
+    void nextEncoded(ColumnVectorBatch& rowBatch,
+                     uint64_t numValues,
+                     char *notNull) override;
+
+    void seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+
+  private:
+    template<bool encoded>
+    void nextInternal(ColumnVectorBatch& rowBatch,
+                      uint64_t numValues,
+                      char *notNull);
+  };
+
+  UnionColumnReader::UnionColumnReader(const Type& type,
+                                       StripeStreams& stripe
+                                       ): ColumnReader(type, stripe) {
+    numChildren = type.getSubtypeCount();
+    childrenReader.resize(numChildren);
+    childrenCounts.resize(numChildren);
+
+    std::unique_ptr<SeekableInputStream> stream =
+        stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+    if (stream == nullptr)
+      throw ParseError("LENGTH stream not found in Union column");
+    rle = createByteRleDecoder(std::move(stream));
+    // figure out which types are selected
+    const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
+    for(unsigned int i=0; i < numChildren; ++i) {
+      const Type &child = *type.getSubtype(i);
+      if (selectedColumns[static_cast<size_t>(child.getColumnId())]) {
+        childrenReader[i] = buildReader(child, stripe).release();
+      }
+    }
+  }
+
+  UnionColumnReader::~UnionColumnReader() {
+    for(std::vector<ColumnReader*>::iterator itr = childrenReader.begin();
+        itr != childrenReader.end(); ++itr) {
+      delete *itr;
+    }
+  }
+
+  uint64_t UnionColumnReader::skip(uint64_t numValues) {
+    numValues = ColumnReader::skip(numValues);
+    const uint64_t BUFFER_SIZE = 1024;
+    char buffer[BUFFER_SIZE];
+    uint64_t lengthsRead = 0;
+    int64_t *counts = childrenCounts.data();
+    memset(counts, 0, sizeof(int64_t) * numChildren);
+    while (lengthsRead < numValues) {
+      uint64_t chunk = std::min(numValues - lengthsRead, BUFFER_SIZE);
+      rle->next(buffer, chunk, nullptr);
+      for(size_t i=0; i < chunk; ++i) {
+        counts[static_cast<size_t>(buffer[i])] += 1;
+      }
+      lengthsRead += chunk;
+    }
+    for(size_t i=0; i < numChildren; ++i) {
+      if (counts[i] != 0 && childrenReader[i] != nullptr) {
+        childrenReader[i]->skip(static_cast<uint64_t>(counts[i]));
+      }
+    }
+    return numValues;
+  }
+
+  void UnionColumnReader::next(ColumnVectorBatch& rowBatch,
+                              uint64_t numValues,
+                              char *notNull) {
+    nextInternal<false>(rowBatch, numValues, notNull);
+  }
+
+  void UnionColumnReader::nextEncoded(ColumnVectorBatch& rowBatch,
+                              uint64_t numValues,
+                              char *notNull) {
+    nextInternal<true>(rowBatch, numValues, notNull);
+  }
+
+  template<bool encoded>
+  void UnionColumnReader::nextInternal(ColumnVectorBatch& rowBatch,
+                               uint64_t numValues,
+                               char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    UnionVectorBatch &unionBatch = dynamic_cast<UnionVectorBatch&>(rowBatch);
+    uint64_t* offsets = unionBatch.offsets.data();
+    int64_t* counts = childrenCounts.data();
+    memset(counts, 0, sizeof(int64_t) * numChildren);
+    unsigned char* tags = unionBatch.tags.data();
+    notNull = unionBatch.hasNulls ? unionBatch.notNull.data() : nullptr;
+    rle->next(reinterpret_cast<char *>(tags), numValues, notNull);
+    // set the offsets for each row
+    if (notNull) {
+      for(size_t i=0; i < numValues; ++i) {
+        if (notNull[i]) {
+          offsets[i] =
+            static_cast<uint64_t>(counts[static_cast<size_t>(tags[i])]++);
+        }
+      }
+    } else {
+      for(size_t i=0; i < numValues; ++i) {
+        offsets[i] =
+          static_cast<uint64_t>(counts[static_cast<size_t>(tags[i])]++);
+      }
+    }
+    // read the right number of each child column
+    for(size_t i=0; i < numChildren; ++i) {
+      if (childrenReader[i] != nullptr) {
+        if (encoded) {
+          childrenReader[i]->nextEncoded(*(unionBatch.children[i]),
+                                  static_cast<uint64_t>(counts[i]), nullptr);
+        } else {
+          childrenReader[i]->next(*(unionBatch.children[i]),
+                                  static_cast<uint64_t>(counts[i]), nullptr);
+        }
+      }
+    }
+  }
+
+  void UnionColumnReader::seekToRowGroup(
+    std::unordered_map<uint64_t, PositionProvider>& positions) {
+    ColumnReader::seekToRowGroup(positions);
+    rle->seek(positions.at(columnId));
+    for(size_t i = 0; i < numChildren; ++i) {
+      if (childrenReader[i] != nullptr) {
+        childrenReader[i]->seekToRowGroup(positions);
+      }
+    }
+  }
+
+  /**
+   * Destructively convert the number from zigzag encoding to the
+   * natural signed representation.
+   */
+  void unZigZagInt128(Int128& value) {
+    bool needsNegate = value.getLowBits() & 1;
+    value >>= 1;
+    if (needsNegate) {
+      value.negate();
+      value -= 1;
+    }
+  }
+
+  class Decimal64ColumnReader: public ColumnReader {
+  public:
+    static const uint32_t MAX_PRECISION_64 = 18;
+    static const uint32_t MAX_PRECISION_128 = 38;
+    static const int64_t POWERS_OF_TEN[MAX_PRECISION_64 + 1];
+
+  protected:
+    std::unique_ptr<SeekableInputStream> valueStream;
+    int32_t precision;
+    int32_t scale;
+    const char* buffer;
+    const char* bufferEnd;
+
+    std::unique_ptr<RleDecoder> scaleDecoder;
+
+    /**
+     * Read the valueStream for more bytes.
+     */
+    void readBuffer() {
+      while (buffer == bufferEnd) {
+        int length;
+        if (!valueStream->Next(reinterpret_cast<const void**>(&buffer),
+                               &length)) {
+          throw ParseError("Read past end of stream in Decimal64ColumnReader "+
+                           valueStream->getName());
+        }
+        bufferEnd = buffer + length;
+      }
+    }
+
+    void readInt64(int64_t& value, int32_t currentScale) {
+      value = 0;
+      size_t offset = 0;
+      while (true) {
+        readBuffer();
+        unsigned char ch = static_cast<unsigned char>(*(buffer++));
+        value |= static_cast<uint64_t>(ch & 0x7f) << offset;
+        offset += 7;
+        if (!(ch & 0x80)) {
+          break;
+        }
+      }
+      value = unZigZag(static_cast<uint64_t>(value));
+      if (scale > currentScale &&
+          static_cast<uint64_t>(scale - currentScale) <= MAX_PRECISION_64) {
+        value *= POWERS_OF_TEN[scale - currentScale];
+      } else if (scale < currentScale &&
+          static_cast<uint64_t>(currentScale - scale) <= MAX_PRECISION_64) {
+        value /= POWERS_OF_TEN[currentScale - scale];
+      } else if (scale != currentScale) {
+        throw ParseError("Decimal scale out of range");
+      }
+    }
+
+  public:
+    Decimal64ColumnReader(const Type& type, StripeStreams& stipe);
+    ~Decimal64ColumnReader() override;
+
+    uint64_t skip(uint64_t numValues) override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char *notNull) override;
+
+    void seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions) override;
+  };
+  const uint32_t Decimal64ColumnReader::MAX_PRECISION_64;
+  const uint32_t Decimal64ColumnReader::MAX_PRECISION_128;
+  const int64_t Decimal64ColumnReader::POWERS_OF_TEN[MAX_PRECISION_64 + 1]=
+    {1,
+     10,
+     100,
+     1000,
+     10000,
+     100000,
+     1000000,
+     10000000,
+     100000000,
+     1000000000,
+     10000000000,
+     100000000000,
+     1000000000000,
+     10000000000000,
+     100000000000000,
+     1000000000000000,
+     10000000000000000,
+     100000000000000000,
+     1000000000000000000};
+
+  Decimal64ColumnReader::Decimal64ColumnReader(const Type& type,
+                                               StripeStreams& stripe
+                                               ): ColumnReader(type, stripe) {
+    scale = static_cast<int32_t>(type.getScale());
+    precision = static_cast<int32_t>(type.getPrecision());
+    valueStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+    if (valueStream == nullptr)
+      throw ParseError("DATA stream not found in Decimal64Column");
+    buffer = nullptr;
+    bufferEnd = nullptr;
+    RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
+    std::unique_ptr<SeekableInputStream> stream =
+        stripe.getStream(columnId, proto::Stream_Kind_SECONDARY, true);
+    if (stream == nullptr)
+      throw ParseError("SECONDARY stream not found in Decimal64Column");
+    scaleDecoder = createRleDecoder(std::move(stream), true, vers, memoryPool);
+  }
+
+  Decimal64ColumnReader::~Decimal64ColumnReader() {
+    // PASS
+  }
+
+  uint64_t Decimal64ColumnReader::skip(uint64_t numValues) {
+    numValues = ColumnReader::skip(numValues);
+    uint64_t skipped = 0;
+    while (skipped < numValues) {
+      readBuffer();
+      if (!(0x80 & *(buffer++))) {
+        skipped += 1;
+      }
+    }
+    scaleDecoder->skip(numValues);
+    return numValues;
+  }
+
+  void Decimal64ColumnReader::next(ColumnVectorBatch& rowBatch,
+                                   uint64_t numValues,
+                                   char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
+    Decimal64VectorBatch &batch =
+      dynamic_cast<Decimal64VectorBatch&>(rowBatch);
+    int64_t* values = batch.values.data();
+    // read the next group of scales
+    int64_t* scaleBuffer = batch.readScales.data();
+    scaleDecoder->next(scaleBuffer, numValues, notNull);
+    batch.precision = precision;
+    batch.scale = scale;
+    if (notNull) {
+      for(size_t i=0; i < numValues; ++i) {
+        if (notNull[i]) {
+          readInt64(values[i], static_cast<int32_t>(scaleBuffer[i]));
+        }
+      }
+    } else {
+      for(size_t i=0; i < numValues; ++i) {
+        readInt64(values[i], static_cast<int32_t>(scaleBuffer[i]));
+      }
+    }
+  }
+
+  void scaleInt128(Int128& value, uint32_t scale, uint32_t currentScale) {
+    if (scale > currentScale) {
+      while(scale > currentScale) {
+        uint32_t scaleAdjust =
+          std::min(Decimal64ColumnReader::MAX_PRECISION_64,
+                   scale - currentScale);
+        value *= Decimal64ColumnReader::POWERS_OF_TEN[scaleAdjust];
+        currentScale += scaleAdjust;
+      }
+    } else if (scale < currentScale) {
+      Int128 remainder;
+      while(currentScale > scale) {
+        uint32_t scaleAdjust =
+          std::min(Decimal64ColumnReader::MAX_PRECISION_64,
+                   currentScale - scale);
+        value = value.divide(Decimal64ColumnReader::POWERS_OF_TEN[scaleAdjust],
+                             remainder);
+        currentScale -= scaleAdjust;
+      }
+    }
+  }
+
+  void Decimal64ColumnReader::seekToRowGroup(
+    std::unordered_map<uint64_t, PositionProvider>& positions) {
+    ColumnReader::seekToRowGroup(positions);
+    valueStream->seek(positions.at(columnId));
+    scaleDecoder->seek(positions.at(columnId));
+  }
+
+  class Decimal128ColumnReader: public Decimal64ColumnReader {
+  public:
+    Decimal128ColumnReader(const Type& type, StripeStreams& stipe);
+    ~Decimal128ColumnReader() override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char *notNull) override;
+
+  private:
+    void readInt128(Int128& value, int32_t currentScale) {
+      value = 0;
+      Int128 work;
+      uint32_t offset = 0;
+      while (true) {
+        readBuffer();
+        unsigned char ch = static_cast<unsigned char>(*(buffer++));
+        work = ch & 0x7f;
+        work <<= offset;
+        value |=  work;
+        offset += 7;
+        if (!(ch & 0x80)) {
+          break;
+        }
+      }
+      unZigZagInt128(value);
+      scaleInt128(value, static_cast<uint32_t>(scale),
+                  static_cast<uint32_t>(currentScale));
+    }
+  };
+
+  Decimal128ColumnReader::Decimal128ColumnReader
+                (const Type& type,
+                 StripeStreams& stripe
+                 ): Decimal64ColumnReader(type, stripe) {
+    // PASS
+  }
+
+  Decimal128ColumnReader::~Decimal128ColumnReader() {
+    // PASS
+  }
+
+  void Decimal128ColumnReader::next(ColumnVectorBatch& rowBatch,
+                                   uint64_t numValues,
+                                   char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
+    Decimal128VectorBatch &batch =
+      dynamic_cast<Decimal128VectorBatch&>(rowBatch);
+    Int128* values = batch.values.data();
+    // read the next group of scales
+    int64_t* scaleBuffer = batch.readScales.data();
+    scaleDecoder->next(scaleBuffer, numValues, notNull);
+    batch.precision = precision;
+    batch.scale = scale;
+    if (notNull) {
+      for(size_t i=0; i < numValues; ++i) {
+        if (notNull[i]) {
+          readInt128(values[i], static_cast<int32_t>(scaleBuffer[i]));
+        }
+      }
+    } else {
+      for(size_t i=0; i < numValues; ++i) {
+        readInt128(values[i], static_cast<int32_t>(scaleBuffer[i]));
+      }
+    }
+  }
+
+  class DecimalHive11ColumnReader: public Decimal64ColumnReader {
+  private:
+    bool throwOnOverflow;
+    std::ostream* errorStream;
+
+    /**
+     * Read an Int128 from the stream and correct it to the desired scale.
+     */
+    bool readInt128(Int128& value, int32_t currentScale) {
+      // -/+ 99999999999999999999999999999999999999
+      static const Int128 MIN_VALUE(-0x4b3b4ca85a86c47b, 0xf675ddc000000001);
+      static const Int128 MAX_VALUE( 0x4b3b4ca85a86c47a, 0x098a223fffffffff);
+
+      value = 0;
+      Int128 work;
+      uint32_t offset = 0;
+      bool result = true;
+      while (true) {
+        readBuffer();
+        unsigned char ch = static_cast<unsigned char>(*(buffer++));
+        work = ch & 0x7f;
+        // If we have read more than 128 bits, we flag the error, but keep
+        // reading bytes so the stream isn't thrown off.
+        if (offset > 128 || (offset == 126 && work > 3)) {
+          result = false;
+        }
+        work <<= offset;
+        value |=  work;
+        offset += 7;
+        if (!(ch & 0x80)) {
+          break;
+        }
+      }
+
+      if (!result) {
+        return result;
+      }
+      unZigZagInt128(value);
+      scaleInt128(value, static_cast<uint32_t>(scale),
+                  static_cast<uint32_t>(currentScale));
+      return value >= MIN_VALUE && value <= MAX_VALUE;
+    }
+
+  public:
+    DecimalHive11ColumnReader(const Type& type, StripeStreams& stipe);
+    ~DecimalHive11ColumnReader() override;
+
+    void next(ColumnVectorBatch& rowBatch,
+              uint64_t numValues,
+              char *notNull) override;
+  };
+
+  DecimalHive11ColumnReader::DecimalHive11ColumnReader
+                    (const Type& type,
+                     StripeStreams& stripe
+                     ): Decimal64ColumnReader(type, stripe) {
+    scale = stripe.getForcedScaleOnHive11Decimal();
+    throwOnOverflow = stripe.getThrowOnHive11DecimalOverflow();
+    errorStream = stripe.getErrorStream();
+  }
+
+  DecimalHive11ColumnReader::~DecimalHive11ColumnReader() {
+    // PASS
+  }
+
+  void DecimalHive11ColumnReader::next(ColumnVectorBatch& rowBatch,
+                                       uint64_t numValues,
+                                       char *notNull) {
+    ColumnReader::next(rowBatch, numValues, notNull);
+    notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr;
+    Decimal128VectorBatch &batch =
+      dynamic_cast<Decimal128VectorBatch&>(rowBatch);
+    Int128* values = batch.values.data();
+    // read the next group of scales
+    int64_t* scaleBuffer = batch.readScales.data();
+
+    scaleDecoder->next(scaleBuffer, numValues, notNull);
+
+    batch.precision = precision;
+    batch.scale = scale;
+    if (notNull) {
+      for(size_t i=0; i < numValues; ++i) {
+        if (notNull[i]) {
+          if (!readInt128(values[i],
+                          static_cast<int32_t>(scaleBuffer[i]))) {
+            if (throwOnOverflow) {
+              throw ParseError("Hive 0.11 decimal was more than 38 digits.");
+            } else {
+              *errorStream << "Warning: "
+                           << "Hive 0.11 decimal with more than 38 digits "
+                           << "replaced by NULL.\n";
+              notNull[i] = false;
+            }
+          }
+        }
+      }
+    } else {
+      for(size_t i=0; i < numValues; ++i) {
+        if (!readInt128(values[i],
+                        static_cast<int32_t>(scaleBuffer[i]))) {
+          if (throwOnOverflow) {
+            throw ParseError("Hive 0.11 decimal was more than 38 digits.");
+          } else {
+            *errorStream << "Warning: "
+                         << "Hive 0.11 decimal with more than 38 digits "
+                         << "replaced by NULL.\n";
+            batch.hasNulls = true;
+            batch.notNull[i] = false;
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Create a reader for the given stripe.
+   */
+  std::unique_ptr<ColumnReader> buildReader(const Type& type,
+                                            StripeStreams& stripe) {
+    switch (static_cast<int64_t>(type.getKind())) {
+    case DATE:
+    case INT:
+    case LONG:
+    case SHORT:
+      return std::unique_ptr<ColumnReader>(
+          new IntegerColumnReader(type, stripe));
+    case BINARY:
+    case CHAR:
+    case STRING:
+    case VARCHAR:
+      switch (static_cast<int64_t>(stripe.getEncoding(type.getColumnId()).kind())){
+      case proto::ColumnEncoding_Kind_DICTIONARY:
+      case proto::ColumnEncoding_Kind_DICTIONARY_V2:
+        return std::unique_ptr<ColumnReader>(
+            new StringDictionaryColumnReader(type, stripe));
+      case proto::ColumnEncoding_Kind_DIRECT:
+      case proto::ColumnEncoding_Kind_DIRECT_V2:
+        return std::unique_ptr<ColumnReader>(
+            new StringDirectColumnReader(type, stripe));
+      default:
+        throw NotImplementedYet("buildReader unhandled string encoding");
+      }
+
+    case BOOLEAN:
+      return std::unique_ptr<ColumnReader>(
+          new BooleanColumnReader(type, stripe));
+
+    case BYTE:
+      return std::unique_ptr<ColumnReader>(
+          new ByteColumnReader(type, stripe));
+
+    case LIST:
+      return std::unique_ptr<ColumnReader>(
+          new ListColumnReader(type, stripe));
+
+    case MAP:
+      return std::unique_ptr<ColumnReader>(
+          new MapColumnReader(type, stripe));
+
+    case UNION:
+      return std::unique_ptr<ColumnReader>(
+          new UnionColumnReader(type, stripe));
+
+    case STRUCT:
+      return std::unique_ptr<ColumnReader>(
+          new StructColumnReader(type, stripe));
+
+    case FLOAT:
+    case DOUBLE:
+      return std::unique_ptr<ColumnReader>(
+          new DoubleColumnReader(type, stripe));
+
+    case TIMESTAMP:
+      return std::unique_ptr<ColumnReader>
+        (new TimestampColumnReader(type, stripe));
+
+    case DECIMAL:
+      // is this a Hive 0.11 or 0.12 file?
+      if (type.getPrecision() == 0) {
+        return std::unique_ptr<ColumnReader>
+          (new DecimalHive11ColumnReader(type, stripe));
+
+      // can we represent the values using int64_t?
+      } else if (type.getPrecision() <=
+                 Decimal64ColumnReader::MAX_PRECISION_64) {
+        return std::unique_ptr<ColumnReader>
+          (new Decimal64ColumnReader(type, stripe));
+
+      // otherwise we use the Int128 implementation
+      } else {
+        return std::unique_ptr<ColumnReader>
+          (new Decimal128ColumnReader(type, stripe));
+      }
+
+    default:
+      throw NotImplementedYet("buildReader unhandled type");
+    }
+  }
+
+}
diff --git a/contrib/libs/apache/orc/c++/src/ColumnReader.hh b/contrib/libs/apache/orc/c++/src/ColumnReader.hh
index 5023cdfab5..0c64e5b80f 100644
--- a/contrib/libs/apache/orc/c++/src/ColumnReader.hh
+++ b/contrib/libs/apache/orc/c++/src/ColumnReader.hh
@@ -1,156 +1,156 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_COLUMN_READER_HH 
-#define ORC_COLUMN_READER_HH 
- 
-#include <unordered_map> 
- 
-#include "orc/Vector.hh" 
- 
-#include "ByteRLE.hh" 
-#include "Compression.hh" 
-#include "Timezone.hh" 
-#include "wrap/orc-proto-wrapper.hh" 
- 
-namespace orc { 
- 
-  class StripeStreams { 
-  public: 
-    virtual ~StripeStreams(); 
- 
-    /** 
-     * Get the array of booleans for which columns are selected. 
-     * @return the address of an array which contains true at the index of 
-     *    each columnId is selected. 
-     */ 
-    virtual const std::vector<bool> getSelectedColumns() const = 0; 
- 
-    /** 
-     * Get the encoding for the given column for this stripe. 
-     */ 
-    virtual proto::ColumnEncoding getEncoding(uint64_t columnId) const = 0; 
- 
-    /** 
-     * Get the stream for the given column/kind in this stripe. 
-     * @param columnId the id of the column 
-     * @param kind the kind of the stream 
-     * @param shouldStream should the reading page the stream in 
-     * @return the new stream 
-     */ 
-    virtual std::unique_ptr<SeekableInputStream> 
-                    getStream(uint64_t columnId, 
-                              proto::Stream_Kind kind, 
-                              bool shouldStream) const = 0; 
- 
-    /** 
-     * Get the memory pool for this reader. 
-     */ 
-    virtual MemoryPool& getMemoryPool() const = 0; 
- 
-    /** 
-     * Get the writer's timezone, so that we can convert their dates correctly. 
-     */ 
-    virtual const Timezone& getWriterTimezone() const = 0; 
- 
-    /** 
-     * Get the error stream. 
-     * @return a pointer to the stream that should get error messages 
-     */ 
-    virtual std::ostream* getErrorStream() const = 0; 
- 
-    /** 
-     * Should the reader throw when the scale overflows when reading Hive 0.11 
-     * decimals. 
-     * @return true if it should throw 
-     */ 
-    virtual bool getThrowOnHive11DecimalOverflow() const = 0; 
- 
-    /** 
-     * What is the scale forced on the Hive 0.11 decimals? 
-     * @return the number of scale digits 
-     */ 
-    virtual int32_t getForcedScaleOnHive11Decimal() const = 0; 
-  }; 
- 
-  /** 
-   * The interface for reading ORC data types. 
-   */ 
-  class ColumnReader { 
-  protected: 
-    std::unique_ptr<ByteRleDecoder> notNullDecoder; 
-    uint64_t columnId; 
-    MemoryPool& memoryPool; 
- 
-  public: 
-    ColumnReader(const Type& type, StripeStreams& stipe); 
- 
-    virtual ~ColumnReader(); 
- 
-    /** 
-     * Skip number of specified rows. 
-     * @param numValues the number of values to skip 
-     * @return the number of non-null values skipped 
-     */ 
-    virtual uint64_t skip(uint64_t numValues); 
- 
-    /** 
-     * Read the next group of values into this rowBatch. 
-     * @param rowBatch the memory to read into. 
-     * @param numValues the number of values to read 
-     * @param notNull if null, all values are not null. Otherwise, it is 
-     *           a mask (with at least numValues bytes) for which values to 
-     *           set. 
-     */ 
-    virtual void next(ColumnVectorBatch& rowBatch, 
-                      uint64_t numValues, 
-                      char* notNull); 
- 
-    /** 
-     * Read the next group of values without decoding 
-     * @param rowBatch the memory to read into. 
-     * @param numValues the number of values to read 
-     * @param notNull if null, all values are not null. Otherwise, it is 
-     *           a mask (with at least numValues bytes) for which values to 
-     *           set. 
-     */ 
-    virtual void nextEncoded(ColumnVectorBatch& rowBatch, 
-                      uint64_t numValues, 
-                      char* notNull) 
-    { 
-      rowBatch.isEncoded = false; 
-      next(rowBatch, numValues, notNull); 
-    } 
- 
-    /** 
-     * Seek to beginning of a row group in the current stripe 
-     * @param positions a list of PositionProviders storing the positions 
-     */ 
-    virtual void seekToRowGroup( 
-      std::unordered_map<uint64_t, PositionProvider>& positions); 
- 
-  }; 
- 
-  /** 
-   * Create a reader for the given stripe. 
-   */ 
-  std::unique_ptr<ColumnReader> buildReader(const Type& type, 
-                                            StripeStreams& stripe); 
-} 
- 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_COLUMN_READER_HH
+#define ORC_COLUMN_READER_HH
+
+#include <unordered_map>
+
+#include "orc/Vector.hh"
+
+#include "ByteRLE.hh"
+#include "Compression.hh"
+#include "Timezone.hh"
+#include "wrap/orc-proto-wrapper.hh"
+
+namespace orc {
+
+  class StripeStreams {
+  public:
+    virtual ~StripeStreams();
+
+    /**
+     * Get the array of booleans for which columns are selected.
+     * @return the address of an array which contains true at the index of
+     *    each columnId is selected.
+     */
+    virtual const std::vector<bool> getSelectedColumns() const = 0;
+
+    /**
+     * Get the encoding for the given column for this stripe.
+     */
+    virtual proto::ColumnEncoding getEncoding(uint64_t columnId) const = 0;
+
+    /**
+     * Get the stream for the given column/kind in this stripe.
+     * @param columnId the id of the column
+     * @param kind the kind of the stream
+     * @param shouldStream should the reading page the stream in
+     * @return the new stream
+     */
+    virtual std::unique_ptr<SeekableInputStream>
+                    getStream(uint64_t columnId,
+                              proto::Stream_Kind kind,
+                              bool shouldStream) const = 0;
+
+    /**
+     * Get the memory pool for this reader.
+     */
+    virtual MemoryPool& getMemoryPool() const = 0;
+
+    /**
+     * Get the writer's timezone, so that we can convert their dates correctly.
+     */
+    virtual const Timezone& getWriterTimezone() const = 0;
+
+    /**
+     * Get the error stream.
+     * @return a pointer to the stream that should get error messages
+     */
+    virtual std::ostream* getErrorStream() const = 0;
+
+    /**
+     * Should the reader throw when the scale overflows when reading Hive 0.11
+     * decimals.
+     * @return true if it should throw
+     */
+    virtual bool getThrowOnHive11DecimalOverflow() const = 0;
+
+    /**
+     * What is the scale forced on the Hive 0.11 decimals?
+     * @return the number of scale digits
+     */
+    virtual int32_t getForcedScaleOnHive11Decimal() const = 0;
+  };
+
+  /**
+   * The interface for reading ORC data types.
+   */
+  class ColumnReader {
+  protected:
+    std::unique_ptr<ByteRleDecoder> notNullDecoder;
+    uint64_t columnId;
+    MemoryPool& memoryPool;
+
+  public:
+    ColumnReader(const Type& type, StripeStreams& stipe);
+
+    virtual ~ColumnReader();
+
+    /**
+     * Skip number of specified rows.
+     * @param numValues the number of values to skip
+     * @return the number of non-null values skipped
+     */
+    virtual uint64_t skip(uint64_t numValues);
+
+    /**
+     * Read the next group of values into this rowBatch.
+     * @param rowBatch the memory to read into.
+     * @param numValues the number of values to read
+     * @param notNull if null, all values are not null. Otherwise, it is
+     *           a mask (with at least numValues bytes) for which values to
+     *           set.
+     */
+    virtual void next(ColumnVectorBatch& rowBatch,
+                      uint64_t numValues,
+                      char* notNull);
+
+    /**
+     * Read the next group of values without decoding
+     * @param rowBatch the memory to read into.
+     * @param numValues the number of values to read
+     * @param notNull if null, all values are not null. Otherwise, it is
+     *           a mask (with at least numValues bytes) for which values to
+     *           set.
+     */
+    virtual void nextEncoded(ColumnVectorBatch& rowBatch,
+                      uint64_t numValues,
+                      char* notNull)
+    {
+      rowBatch.isEncoded = false;
+      next(rowBatch, numValues, notNull);
+    }
+
+    /**
+     * Seek to beginning of a row group in the current stripe
+     * @param positions a list of PositionProviders storing the positions
+     */
+    virtual void seekToRowGroup(
+      std::unordered_map<uint64_t, PositionProvider>& positions);
+
+  };
+
+  /**
+   * Create a reader for the given stripe.
+   */
+  std::unique_ptr<ColumnReader> buildReader(const Type& type,
+                                            StripeStreams& stripe);
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/ColumnWriter.cc b/contrib/libs/apache/orc/c++/src/ColumnWriter.cc
index 8d4d00cc61..1408a15457 100644
--- a/contrib/libs/apache/orc/c++/src/ColumnWriter.cc
+++ b/contrib/libs/apache/orc/c++/src/ColumnWriter.cc
@@ -1,3013 +1,3013 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/Int128.hh" 
-#include "orc/Writer.hh" 
- 
-#include "ByteRLE.hh" 
-#include "ColumnWriter.hh" 
-#include "RLE.hh" 
-#include "Statistics.hh" 
-#include "Timezone.hh" 
- 
-namespace orc { 
-  StreamsFactory::~StreamsFactory() { 
-    //PASS 
-  } 
- 
-  class StreamsFactoryImpl : public StreamsFactory { 
-  public: 
-    StreamsFactoryImpl( 
-                       const WriterOptions& writerOptions, 
-                       OutputStream* outputStream) : 
-                       options(writerOptions), 
-                       outStream(outputStream) { 
-                       } 
- 
-    virtual std::unique_ptr<BufferedOutputStream> 
-                    createStream(proto::Stream_Kind kind) const override; 
-  private: 
-    const WriterOptions& options; 
-    OutputStream* outStream; 
-  }; 
- 
-  std::unique_ptr<BufferedOutputStream> StreamsFactoryImpl::createStream( 
-                                                    proto::Stream_Kind) const { 
-    // In the future, we can decide compression strategy and modifier 
-    // based on stream kind. But for now we just use the setting from 
-    // WriterOption 
-    return createCompressor( 
-                            options.getCompression(), 
-                            outStream, 
-                            options.getCompressionStrategy(), 
-                            // BufferedOutputStream initial capacity 
-                            1 * 1024 * 1024, 
-                            options.getCompressionBlockSize(), 
-                            *options.getMemoryPool()); 
-  } 
- 
-  std::unique_ptr<StreamsFactory> createStreamsFactory( 
-                                        const WriterOptions& options, 
-                                        OutputStream* outStream) { 
-    return std::unique_ptr<StreamsFactory>( 
-                                   new StreamsFactoryImpl(options, outStream)); 
-  } 
- 
-  RowIndexPositionRecorder::~RowIndexPositionRecorder() { 
-    // PASS 
-  } 
- 
-  proto::ColumnEncoding_Kind RleVersionMapper(RleVersion rleVersion) 
-  { 
-    switch (rleVersion) 
-    { 
-      case RleVersion_1: 
-        return proto::ColumnEncoding_Kind_DIRECT; 
-      case RleVersion_2: 
-        return proto::ColumnEncoding_Kind_DIRECT_V2; 
-      default: 
-        throw InvalidArgument("Invalid param"); 
-    } 
-  } 
- 
-  ColumnWriter::ColumnWriter( 
-                             const Type& type, 
-                             const StreamsFactory& factory, 
-                             const WriterOptions& options) : 
-                                columnId(type.getColumnId()), 
-                                colIndexStatistics(), 
-                                colStripeStatistics(), 
-                                colFileStatistics(), 
-                                enableIndex(options.getEnableIndex()), 
-                                rowIndex(), 
-                                rowIndexEntry(), 
-                                rowIndexPosition(), 
-                                enableBloomFilter(false), 
-                                memPool(*options.getMemoryPool()), 
-                                indexStream(), 
-                                bloomFilterStream() { 
- 
-    std::unique_ptr<BufferedOutputStream> presentStream = 
-        factory.createStream(proto::Stream_Kind_PRESENT); 
-    notNullEncoder = createBooleanRleEncoder(std::move(presentStream)); 
- 
-    colIndexStatistics = createColumnStatistics(type); 
-    colStripeStatistics = createColumnStatistics(type); 
-    colFileStatistics = createColumnStatistics(type); 
- 
-    if (enableIndex) { 
-      rowIndex = std::unique_ptr<proto::RowIndex>(new proto::RowIndex()); 
-      rowIndexEntry = 
-        std::unique_ptr<proto::RowIndexEntry>(new proto::RowIndexEntry()); 
-      rowIndexPosition = std::unique_ptr<RowIndexPositionRecorder>( 
-                     new RowIndexPositionRecorder(*rowIndexEntry)); 
-      indexStream = 
-        factory.createStream(proto::Stream_Kind_ROW_INDEX); 
- 
-      // BloomFilters for non-UTF8 strings and non-UTC timestamps are not supported 
-      if (options.isColumnUseBloomFilter(columnId) 
-          && options.getBloomFilterVersion() == BloomFilterVersion::UTF8) { 
-        enableBloomFilter = true; 
-        bloomFilter.reset(new BloomFilterImpl( 
-          options.getRowIndexStride(), options.getBloomFilterFPP())); 
-        bloomFilterIndex.reset(new proto::BloomFilterIndex()); 
-        bloomFilterStream = factory.createStream(proto::Stream_Kind_BLOOM_FILTER_UTF8); 
-      } 
-    } 
-  } 
- 
-  ColumnWriter::~ColumnWriter() { 
-    // PASS 
-  } 
- 
-  void ColumnWriter::add(ColumnVectorBatch& batch, 
-                         uint64_t offset, 
-                         uint64_t numValues, 
-                         const char* incomingMask) { 
-    notNullEncoder->add(batch.notNull.data() + offset, numValues, incomingMask); 
-  } 
- 
-  void ColumnWriter::flush(std::vector<proto::Stream>& streams) { 
-    proto::Stream stream; 
-    stream.set_kind(proto::Stream_Kind_PRESENT); 
-    stream.set_column(static_cast<uint32_t>(columnId)); 
-    stream.set_length(notNullEncoder->flush()); 
-    streams.push_back(stream); 
-  } 
- 
-  uint64_t ColumnWriter::getEstimatedSize() const { 
-    return notNullEncoder->getBufferSize(); 
-  } 
- 
-  void ColumnWriter::getStripeStatistics( 
-    std::vector<proto::ColumnStatistics>& stats) const { 
-    getProtoBufStatistics(stats, colStripeStatistics.get()); 
-  } 
- 
-  void ColumnWriter::mergeStripeStatsIntoFileStats() { 
-    colFileStatistics->merge(*colStripeStatistics); 
-    colStripeStatistics->reset(); 
-  } 
- 
-  void ColumnWriter::mergeRowGroupStatsIntoStripeStats() { 
-    colStripeStatistics->merge(*colIndexStatistics); 
-    colIndexStatistics->reset(); 
-  } 
- 
-  void ColumnWriter::getFileStatistics( 
-    std::vector<proto::ColumnStatistics>& stats) const { 
-    getProtoBufStatistics(stats, colFileStatistics.get()); 
-  } 
- 
-  void ColumnWriter::createRowIndexEntry() { 
-    proto::ColumnStatistics *indexStats = rowIndexEntry->mutable_statistics(); 
-    colIndexStatistics->toProtoBuf(*indexStats); 
- 
-    *rowIndex->add_entry() = *rowIndexEntry; 
- 
-    rowIndexEntry->clear_positions(); 
-    rowIndexEntry->clear_statistics(); 
- 
-    colStripeStatistics->merge(*colIndexStatistics); 
-    colIndexStatistics->reset(); 
- 
-    addBloomFilterEntry(); 
- 
-    recordPosition(); 
-  } 
- 
-  void ColumnWriter::addBloomFilterEntry() { 
-    if (enableBloomFilter) { 
-      BloomFilterUTF8Utils::serialize(*bloomFilter, *bloomFilterIndex->add_bloomfilter()); 
-      bloomFilter->reset(); 
-    } 
-  } 
- 
-  void ColumnWriter::writeIndex(std::vector<proto::Stream> &streams) const { 
-    // write row index to output stream 
-    rowIndex->SerializeToZeroCopyStream(indexStream.get()); 
- 
-    // construct row index stream 
-    proto::Stream stream; 
-    stream.set_kind(proto::Stream_Kind_ROW_INDEX); 
-    stream.set_column(static_cast<uint32_t>(columnId)); 
-    stream.set_length(indexStream->flush()); 
-    streams.push_back(stream); 
- 
-    // write BLOOM_FILTER_UTF8 stream 
-    if (enableBloomFilter) { 
-      if (!bloomFilterIndex->SerializeToZeroCopyStream(bloomFilterStream.get())) { 
-        throw std::logic_error("Failed to write bloom filter stream."); 
-      } 
-      stream.set_kind(proto::Stream_Kind_BLOOM_FILTER_UTF8); 
-      stream.set_column(static_cast<uint32_t>(columnId)); 
-      stream.set_length(bloomFilterStream->flush()); 
-      streams.push_back(stream); 
-    } 
-  } 
- 
-  void ColumnWriter::recordPosition() const { 
-    notNullEncoder->recordPosition(rowIndexPosition.get()); 
-  } 
- 
-  void ColumnWriter::reset() { 
-    if (enableIndex) { 
-      // clear row index 
-      rowIndex->clear_entry(); 
-      rowIndexEntry->clear_positions(); 
-      rowIndexEntry->clear_statistics(); 
- 
-      // write current positions 
-      recordPosition(); 
-    } 
- 
-    if (enableBloomFilter) { 
-      bloomFilter->reset(); 
-      bloomFilterIndex->clear_bloomfilter(); 
-    } 
-  } 
- 
-  void ColumnWriter::writeDictionary() { 
-    // PASS 
-  } 
- 
-  class StructColumnWriter : public ColumnWriter { 
-  public: 
-    StructColumnWriter( 
-                       const Type& type, 
-                       const StreamsFactory& factory, 
-                       const WriterOptions& options); 
-    ~StructColumnWriter() override; 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-    virtual void flush(std::vector<proto::Stream>& streams) override; 
- 
-    virtual uint64_t getEstimatedSize() const override; 
-    virtual void getColumnEncoding( 
-      std::vector<proto::ColumnEncoding>& encodings) const override; 
- 
-    virtual void getStripeStatistics( 
-      std::vector<proto::ColumnStatistics>& stats) const override; 
- 
-    virtual void getFileStatistics( 
-      std::vector<proto::ColumnStatistics>& stats) const override; 
- 
-    virtual void mergeStripeStatsIntoFileStats() override; 
- 
-    virtual void mergeRowGroupStatsIntoStripeStats() override; 
- 
-    virtual void createRowIndexEntry() override; 
- 
-    virtual void writeIndex( 
-      std::vector<proto::Stream> &streams) const override; 
- 
-    virtual void writeDictionary() override; 
- 
-    virtual void reset() override; 
- 
-  private: 
-    std::vector<ColumnWriter *> children; 
-  }; 
- 
-  StructColumnWriter::StructColumnWriter( 
-                                         const Type& type, 
-                                         const StreamsFactory& factory, 
-                                         const WriterOptions& options) : 
-                                         ColumnWriter(type, factory, options) { 
-    for(unsigned int i = 0; i < type.getSubtypeCount(); ++i) { 
-      const Type& child = *type.getSubtype(i); 
-      children.push_back(buildWriter(child, factory, options).release()); 
-    } 
- 
-    if (enableIndex) { 
-      recordPosition(); 
-    } 
-  } 
- 
-  StructColumnWriter::~StructColumnWriter() { 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      delete children[i]; 
-    } 
-  } 
- 
-  void StructColumnWriter::add( 
-                               ColumnVectorBatch& rowBatch, 
-                               uint64_t offset, 
-                               uint64_t numValues, 
-                               const char* incomingMask) { 
-    const StructVectorBatch* structBatch = 
-      dynamic_cast<const StructVectorBatch *>(&rowBatch); 
-    if (structBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to StructVectorBatch"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
-    const char* notNull = structBatch->hasNulls ? 
-                          structBatch->notNull.data() + offset : nullptr; 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->add(*structBatch->fields[i], offset, numValues, notNull); 
-    } 
- 
-    // update stats 
-    if (!notNull) { 
-      colIndexStatistics->increase(numValues); 
-    } else { 
-      uint64_t count = 0; 
-      for (uint64_t i = 0; i < numValues; ++i) { 
-        if (notNull[i]) { 
-          ++count; 
-        } 
-      } 
-      colIndexStatistics->increase(count); 
-      if (count < numValues) { 
-        colIndexStatistics->setHasNull(true); 
-      } 
-    } 
-  } 
- 
-  void StructColumnWriter::flush(std::vector<proto::Stream>& streams) { 
-    ColumnWriter::flush(streams); 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->flush(streams); 
-    } 
-  } 
- 
-  void StructColumnWriter::writeIndex( 
-                      std::vector<proto::Stream> &streams) const { 
-    ColumnWriter::writeIndex(streams); 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->writeIndex(streams); 
-    } 
-  } 
- 
-  uint64_t StructColumnWriter::getEstimatedSize() const { 
-    uint64_t size = ColumnWriter::getEstimatedSize(); 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      size += children[i]->getEstimatedSize(); 
-    } 
-    return size; 
-  } 
- 
-  void StructColumnWriter::getColumnEncoding( 
-                      std::vector<proto::ColumnEncoding>& encodings) const { 
-    proto::ColumnEncoding encoding; 
-    encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT); 
-    encoding.set_dictionarysize(0); 
-    encodings.push_back(encoding); 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->getColumnEncoding(encodings); 
-    } 
-  } 
- 
-  void StructColumnWriter::getStripeStatistics( 
-    std::vector<proto::ColumnStatistics>& stats) const { 
-    ColumnWriter::getStripeStatistics(stats); 
- 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->getStripeStatistics(stats); 
-    } 
-  } 
- 
-  void StructColumnWriter::mergeStripeStatsIntoFileStats() { 
-    ColumnWriter::mergeStripeStatsIntoFileStats(); 
- 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->mergeStripeStatsIntoFileStats(); 
-    } 
-  } 
- 
-  void StructColumnWriter::getFileStatistics( 
-    std::vector<proto::ColumnStatistics>& stats) const { 
-    ColumnWriter::getFileStatistics(stats); 
- 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->getFileStatistics(stats); 
-    } 
-  } 
- 
-  void StructColumnWriter::mergeRowGroupStatsIntoStripeStats()  { 
-    ColumnWriter::mergeRowGroupStatsIntoStripeStats(); 
- 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->mergeRowGroupStatsIntoStripeStats(); 
-    } 
-  } 
- 
-  void StructColumnWriter::createRowIndexEntry() { 
-    ColumnWriter::createRowIndexEntry(); 
- 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->createRowIndexEntry(); 
-    } 
-  } 
- 
-  void StructColumnWriter::reset() { 
-    ColumnWriter::reset(); 
- 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->reset(); 
-    } 
-  } 
- 
-  void StructColumnWriter::writeDictionary() { 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->writeDictionary(); 
-    } 
-  } 
- 
-  class IntegerColumnWriter : public ColumnWriter { 
-  public: 
-    IntegerColumnWriter( 
-                        const Type& type, 
-                        const StreamsFactory& factory, 
-                        const WriterOptions& options); 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-    virtual void flush(std::vector<proto::Stream>& streams) override; 
- 
-    virtual uint64_t getEstimatedSize() const override; 
- 
-    virtual void getColumnEncoding( 
-              std::vector<proto::ColumnEncoding>& encodings) const override; 
- 
-    virtual void recordPosition() const override; 
- 
-  protected: 
-    std::unique_ptr<RleEncoder> rleEncoder; 
- 
-  private: 
-    RleVersion rleVersion; 
-  }; 
- 
-  IntegerColumnWriter::IntegerColumnWriter( 
-                           const Type& type, 
-                           const StreamsFactory& factory, 
-                           const WriterOptions& options) : 
-                             ColumnWriter(type, factory, options), 
-                             rleVersion(options.getRleVersion()) { 
-    std::unique_ptr<BufferedOutputStream> dataStream = 
-      factory.createStream(proto::Stream_Kind_DATA); 
-    rleEncoder = createRleEncoder( 
-                                  std::move(dataStream), 
-                                  true, 
-                                  rleVersion, 
-                                  memPool, 
-                                  options.getAlignedBitpacking()); 
- 
-    if (enableIndex) { 
-      recordPosition(); 
-    } 
-  } 
- 
-  void IntegerColumnWriter::add( 
-                                ColumnVectorBatch& rowBatch, 
-                                uint64_t offset, 
-                                uint64_t numValues, 
-                                const char* incomingMask) { 
-    const LongVectorBatch* longBatch = 
-      dynamic_cast<const LongVectorBatch*>(&rowBatch); 
-    if (longBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to LongVectorBatch"); 
-    } 
-    IntegerColumnStatisticsImpl* intStats = 
-        dynamic_cast<IntegerColumnStatisticsImpl*>(colIndexStatistics.get()); 
-    if (intStats == nullptr) { 
-      throw InvalidArgument("Failed to cast to IntegerColumnStatisticsImpl"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    const int64_t* data = longBatch->data.data() + offset; 
-    const char* notNull = longBatch->hasNulls ? 
-                          longBatch->notNull.data() + offset : nullptr; 
- 
-    rleEncoder->add(data, numValues, notNull); 
- 
-    // update stats 
-    uint64_t count = 0; 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (notNull == nullptr || notNull[i]) { 
-        ++count; 
-        if (enableBloomFilter) { 
-          bloomFilter->addLong(data[i]); 
-        } 
-        intStats->update(data[i], 1); 
-      } 
-    } 
-    intStats->increase(count); 
-    if (count < numValues) { 
-      intStats->setHasNull(true); 
-    } 
-  } 
- 
-  void IntegerColumnWriter::flush(std::vector<proto::Stream>& streams) { 
-    ColumnWriter::flush(streams); 
- 
-    proto::Stream stream; 
-    stream.set_kind(proto::Stream_Kind_DATA); 
-    stream.set_column(static_cast<uint32_t>(columnId)); 
-    stream.set_length(rleEncoder->flush()); 
-    streams.push_back(stream); 
-  } 
- 
-  uint64_t IntegerColumnWriter::getEstimatedSize() const { 
-    uint64_t size = ColumnWriter::getEstimatedSize(); 
-    size += rleEncoder->getBufferSize(); 
-    return size; 
-  } 
- 
-  void IntegerColumnWriter::getColumnEncoding( 
-                       std::vector<proto::ColumnEncoding>& encodings) const { 
-    proto::ColumnEncoding encoding; 
-    encoding.set_kind(RleVersionMapper(rleVersion)); 
-    encoding.set_dictionarysize(0); 
-    if (enableBloomFilter) { 
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8); 
-    } 
-    encodings.push_back(encoding); 
-  } 
- 
-  void IntegerColumnWriter::recordPosition() const { 
-    ColumnWriter::recordPosition(); 
-    rleEncoder->recordPosition(rowIndexPosition.get()); 
-  } 
- 
-  class ByteColumnWriter : public ColumnWriter { 
-  public: 
-    ByteColumnWriter(const Type& type, 
-                     const StreamsFactory& factory, 
-                     const WriterOptions& options); 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-    virtual void flush(std::vector<proto::Stream>& streams) override; 
- 
-    virtual uint64_t getEstimatedSize() const override; 
- 
-    virtual void getColumnEncoding( 
-            std::vector<proto::ColumnEncoding>& encodings) const override; 
- 
-    virtual void recordPosition() const override; 
- 
-  private: 
-    std::unique_ptr<ByteRleEncoder> byteRleEncoder; 
-  }; 
- 
-  ByteColumnWriter::ByteColumnWriter( 
-                        const Type& type, 
-                        const StreamsFactory& factory, 
-                        const WriterOptions& options) : 
-                             ColumnWriter(type, factory, options) { 
-    std::unique_ptr<BufferedOutputStream> dataStream = 
-                                  factory.createStream(proto::Stream_Kind_DATA); 
-    byteRleEncoder = createByteRleEncoder(std::move(dataStream)); 
- 
-    if (enableIndex) { 
-      recordPosition(); 
-    } 
-  } 
- 
-  void ByteColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                             uint64_t offset, 
-                             uint64_t numValues, 
-                             const char* incomingMask) { 
-    LongVectorBatch* byteBatch = dynamic_cast<LongVectorBatch*>(&rowBatch); 
-    if (byteBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to LongVectorBatch"); 
-    } 
-    IntegerColumnStatisticsImpl* intStats = 
-        dynamic_cast<IntegerColumnStatisticsImpl*>(colIndexStatistics.get()); 
-    if (intStats == nullptr) { 
-      throw InvalidArgument("Failed to cast to IntegerColumnStatisticsImpl"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    int64_t* data = byteBatch->data.data() + offset; 
-    const char* notNull = byteBatch->hasNulls ? 
-                          byteBatch->notNull.data() + offset : nullptr; 
- 
-    char* byteData = reinterpret_cast<char*>(data); 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      byteData[i] = static_cast<char>(data[i]); 
-    } 
-    byteRleEncoder->add(byteData, numValues, notNull); 
- 
-    uint64_t count = 0; 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (notNull == nullptr || notNull[i]) { 
-        ++count; 
-        if (enableBloomFilter) { 
-          bloomFilter->addLong(data[i]); 
-        } 
-        intStats->update(static_cast<int64_t>(byteData[i]), 1); 
-      } 
-    } 
-    intStats->increase(count); 
-    if (count < numValues) { 
-      intStats->setHasNull(true); 
-    } 
-  } 
- 
-  void ByteColumnWriter::flush(std::vector<proto::Stream>& streams) { 
-    ColumnWriter::flush(streams); 
- 
-    proto::Stream stream; 
-    stream.set_kind(proto::Stream_Kind_DATA); 
-    stream.set_column(static_cast<uint32_t>(columnId)); 
-    stream.set_length(byteRleEncoder->flush()); 
-    streams.push_back(stream); 
-  } 
- 
-  uint64_t ByteColumnWriter::getEstimatedSize() const { 
-    uint64_t size = ColumnWriter::getEstimatedSize(); 
-    size += byteRleEncoder->getBufferSize(); 
-    return size; 
-  } 
- 
-  void ByteColumnWriter::getColumnEncoding( 
-    std::vector<proto::ColumnEncoding>& encodings) const { 
-    proto::ColumnEncoding encoding; 
-    encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT); 
-    encoding.set_dictionarysize(0); 
-    if (enableBloomFilter) { 
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8); 
-    } 
-    encodings.push_back(encoding); 
-  } 
- 
-  void ByteColumnWriter::recordPosition() const { 
-    ColumnWriter::recordPosition(); 
-    byteRleEncoder->recordPosition(rowIndexPosition.get()); 
-  } 
- 
-  class BooleanColumnWriter : public ColumnWriter { 
-  public: 
-    BooleanColumnWriter(const Type& type, 
-                        const StreamsFactory& factory, 
-                        const WriterOptions& options); 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-    virtual void flush(std::vector<proto::Stream>& streams) override; 
- 
-    virtual uint64_t getEstimatedSize() const override; 
- 
-    virtual void getColumnEncoding( 
-        std::vector<proto::ColumnEncoding>& encodings) const override; 
- 
-    virtual void recordPosition() const override; 
- 
-  private: 
-    std::unique_ptr<ByteRleEncoder> rleEncoder; 
-  }; 
- 
-  BooleanColumnWriter::BooleanColumnWriter( 
-                           const Type& type, 
-                           const StreamsFactory& factory, 
-                           const WriterOptions& options) : 
-                               ColumnWriter(type, factory, options) { 
-    std::unique_ptr<BufferedOutputStream> dataStream = 
-      factory.createStream(proto::Stream_Kind_DATA); 
-    rleEncoder = createBooleanRleEncoder(std::move(dataStream)); 
- 
-    if (enableIndex) { 
-      recordPosition(); 
-    } 
-  } 
- 
-  void BooleanColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                                uint64_t offset, 
-                                uint64_t numValues, 
-                                const char* incomingMask) { 
-    LongVectorBatch* byteBatch = dynamic_cast<LongVectorBatch*>(&rowBatch); 
-    if (byteBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to LongVectorBatch"); 
-    } 
-    BooleanColumnStatisticsImpl* boolStats = 
-        dynamic_cast<BooleanColumnStatisticsImpl*>(colIndexStatistics.get()); 
-    if (boolStats == nullptr) { 
-      throw InvalidArgument("Failed to cast to BooleanColumnStatisticsImpl"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    int64_t* data = byteBatch->data.data() + offset; 
-    const char* notNull = byteBatch->hasNulls ? 
-                          byteBatch->notNull.data() + offset : nullptr; 
- 
-    char* byteData = reinterpret_cast<char*>(data); 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      byteData[i] = static_cast<char>(data[i]); 
-    } 
-    rleEncoder->add(byteData, numValues, notNull); 
- 
-    uint64_t count = 0; 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (notNull == nullptr || notNull[i]) { 
-        ++count; 
-        if (enableBloomFilter) { 
-          bloomFilter->addLong(data[i]); 
-        } 
-        boolStats->update(byteData[i] != 0, 1); 
-      } 
-    } 
-    boolStats->increase(count); 
-    if (count < numValues) { 
-      boolStats->setHasNull(true); 
-    } 
-  } 
- 
-  void BooleanColumnWriter::flush(std::vector<proto::Stream>& streams) { 
-    ColumnWriter::flush(streams); 
- 
-    proto::Stream stream; 
-    stream.set_kind(proto::Stream_Kind_DATA); 
-    stream.set_column(static_cast<uint32_t>(columnId)); 
-    stream.set_length(rleEncoder->flush()); 
-    streams.push_back(stream); 
-  } 
- 
-  uint64_t BooleanColumnWriter::getEstimatedSize() const { 
-    uint64_t size = ColumnWriter::getEstimatedSize(); 
-    size += rleEncoder->getBufferSize(); 
-    return size; 
-  } 
- 
-  void BooleanColumnWriter::getColumnEncoding( 
-                       std::vector<proto::ColumnEncoding>& encodings) const { 
-    proto::ColumnEncoding encoding; 
-    encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT); 
-    encoding.set_dictionarysize(0); 
-    if (enableBloomFilter) { 
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8); 
-    } 
-    encodings.push_back(encoding); 
-  } 
- 
-  void BooleanColumnWriter::recordPosition() const { 
-    ColumnWriter::recordPosition(); 
-    rleEncoder->recordPosition(rowIndexPosition.get()); 
-  } 
- 
-  class DoubleColumnWriter : public ColumnWriter { 
-  public: 
-    DoubleColumnWriter(const Type& type, 
-                       const StreamsFactory& factory, 
-                       const WriterOptions& options, 
-                       bool isFloat); 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-    virtual void flush(std::vector<proto::Stream>& streams) override; 
- 
-    virtual uint64_t getEstimatedSize() const override; 
- 
-    virtual void getColumnEncoding( 
-        std::vector<proto::ColumnEncoding>& encodings) const override; 
- 
-    virtual void recordPosition() const override; 
- 
-  private: 
-    bool isFloat; 
-    std::unique_ptr<AppendOnlyBufferedStream> dataStream; 
-    DataBuffer<char> buffer; 
-  }; 
- 
-  DoubleColumnWriter::DoubleColumnWriter( 
-                          const Type& type, 
-                          const StreamsFactory& factory, 
-                          const WriterOptions& options, 
-                          bool isFloatType) : 
-                              ColumnWriter(type, factory, options), 
-                              isFloat(isFloatType), 
-                              buffer(*options.getMemoryPool()) { 
-    dataStream.reset(new AppendOnlyBufferedStream( 
-                             factory.createStream(proto::Stream_Kind_DATA))); 
-    buffer.resize(isFloat ? 4 : 8); 
- 
-    if (enableIndex) { 
-      recordPosition(); 
-    } 
-  } 
- 
-  // Floating point types are stored using IEEE 754 floating point bit layout. 
-  // Float columns use 4 bytes per value and double columns use 8 bytes. 
-  template <typename FLOAT_TYPE, typename INTEGER_TYPE> 
-  inline void encodeFloatNum(FLOAT_TYPE input, char* output) { 
-    INTEGER_TYPE* intBits = reinterpret_cast<INTEGER_TYPE*>(&input); 
-    for (size_t i = 0; i < sizeof(INTEGER_TYPE); ++i) { 
-      output[i] = static_cast<char>(((*intBits) >> (8 * i)) & 0xff); 
-    } 
-  } 
- 
-  void DoubleColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                               uint64_t offset, 
-                               uint64_t numValues, 
-                               const char* incomingMask) { 
-    const DoubleVectorBatch* dblBatch = 
-      dynamic_cast<const DoubleVectorBatch*>(&rowBatch); 
-    if (dblBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to DoubleVectorBatch"); 
-    } 
-    DoubleColumnStatisticsImpl* doubleStats = 
-      dynamic_cast<DoubleColumnStatisticsImpl*>(colIndexStatistics.get()); 
-    if (doubleStats == nullptr) { 
-      throw InvalidArgument("Failed to cast to DoubleColumnStatisticsImpl"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    const double* doubleData = dblBatch->data.data() + offset; 
-    const char* notNull = dblBatch->hasNulls ? 
-                          dblBatch->notNull.data() + offset : nullptr; 
- 
-    size_t bytes = isFloat ? 4 : 8; 
-    char* data = buffer.data(); 
-    uint64_t count = 0; 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (!notNull || notNull[i]) { 
-        if (isFloat) { 
-          encodeFloatNum<float, int32_t>(static_cast<float>(doubleData[i]), data); 
-        } else { 
-          encodeFloatNum<double, int64_t>(doubleData[i], data); 
-        } 
-        dataStream->write(data, bytes); 
-        ++count; 
-        if (enableBloomFilter) { 
-          bloomFilter->addDouble(doubleData[i]); 
-        } 
-        doubleStats->update(doubleData[i]); 
-      } 
-    } 
-    doubleStats->increase(count); 
-    if (count < numValues) { 
-      doubleStats->setHasNull(true); 
-    } 
-  } 
- 
-  void DoubleColumnWriter::flush(std::vector<proto::Stream>& streams) { 
-    ColumnWriter::flush(streams); 
- 
-    proto::Stream stream; 
-    stream.set_kind(proto::Stream_Kind_DATA); 
-    stream.set_column(static_cast<uint32_t>(columnId)); 
-    stream.set_length(dataStream->flush()); 
-    streams.push_back(stream); 
-  } 
- 
-  uint64_t DoubleColumnWriter::getEstimatedSize() const { 
-    uint64_t size = ColumnWriter::getEstimatedSize(); 
-    size += dataStream->getSize(); 
-    return size; 
-  } 
- 
-  void DoubleColumnWriter::getColumnEncoding( 
-                      std::vector<proto::ColumnEncoding>& encodings) const { 
-    proto::ColumnEncoding encoding; 
-    encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT); 
-    encoding.set_dictionarysize(0); 
-    if (enableBloomFilter) { 
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8); 
-    } 
-    encodings.push_back(encoding); 
-  } 
- 
-  void DoubleColumnWriter::recordPosition() const { 
-    ColumnWriter::recordPosition(); 
-    dataStream->recordPosition(rowIndexPosition.get()); 
-  } 
- 
-  /** 
-   * Implementation of increasing sorted string dictionary 
-   */ 
-  class SortedStringDictionary { 
-  public: 
-    struct DictEntry { 
-      DictEntry(const char * str, size_t len):data(str),length(len) {} 
-      const char * data; 
-      size_t length; 
-    }; 
- 
-    SortedStringDictionary():totalLength(0) {} 
- 
-    // insert a new string into dictionary, return its insertion order 
-    size_t insert(const char * data, size_t len); 
- 
-    // write dictionary data & length to output buffer 
-    void flush(AppendOnlyBufferedStream * dataStream, 
-               RleEncoder * lengthEncoder) const; 
- 
-    // reorder input index buffer from insertion order to dictionary order 
-    void reorder(std::vector<int64_t>& idxBuffer) const; 
- 
-    // get dict entries in insertion order 
-    void getEntriesInInsertionOrder(std::vector<const DictEntry *>&) const; 
- 
-    // return count of entries 
-    size_t size() const; 
- 
-    // return total length of strings in the dictioanry 
-    uint64_t length() const; 
- 
-    void clear(); 
- 
-  private: 
-    struct LessThan { 
-      bool operator()(const DictEntry& left, const DictEntry& right) const { 
-        int ret = memcmp(left.data, right.data, std::min(left.length, right.length)); 
-        if (ret != 0) { 
-          return ret < 0; 
-        } 
-        return left.length < right.length; 
-      } 
-    }; 
- 
-    std::map<DictEntry, size_t, LessThan> dict; 
-    std::vector<std::vector<char>> data; 
-    uint64_t totalLength; 
- 
-    // use friend class here to avoid being bothered by const function calls 
-    friend class StringColumnWriter; 
-    friend class CharColumnWriter; 
-    friend class VarCharColumnWriter; 
-    // store indexes of insertion order in the dictionary for not-null rows 
-    std::vector<int64_t> idxInDictBuffer; 
-  }; 
- 
-  // insert a new string into dictionary, return its insertion order 
-  size_t SortedStringDictionary::insert(const char * str, size_t len) { 
-    auto ret = dict.insert({DictEntry(str, len), dict.size()}); 
-    if (ret.second) { 
-      // make a copy to internal storage 
-      data.push_back(std::vector<char>(len)); 
-      memcpy(data.back().data(), str, len); 
-      // update dictionary entry to link pointer to internal storage 
-      DictEntry * entry = const_cast<DictEntry *>(&(ret.first->first)); 
-      entry->data = data.back().data(); 
-      totalLength += len; 
-    } 
-    return ret.first->second; 
-  } 
- 
-  // write dictionary data & length to output buffer 
-  void SortedStringDictionary::flush(AppendOnlyBufferedStream * dataStream, 
-                               RleEncoder * lengthEncoder) const { 
-    for (auto it = dict.cbegin(); it != dict.cend(); ++it) { 
-      dataStream->write(it->first.data, it->first.length); 
-      lengthEncoder->write(static_cast<int64_t>(it->first.length)); 
-    } 
-  } 
- 
-  /** 
-   * Reorder input index buffer from insertion order to dictionary order 
-   * 
-   * We require this function because string values are buffered by indexes 
-   * in their insertion order. Until the entire dictionary is complete can 
-   * we get their sorted indexes in the dictionary in that ORC specification 
-   * demands dictionary should be ordered. Therefore this function transforms 
-   * the indexes from insertion order to dictionary value order for final 
-   * output. 
-   */ 
-  void SortedStringDictionary::reorder(std::vector<int64_t>& idxBuffer) const { 
-    // iterate the dictionary to get mapping from insertion order to value order 
-    std::vector<size_t> mapping(dict.size()); 
-    size_t dictIdx = 0; 
-    for (auto it = dict.cbegin(); it != dict.cend(); ++it) { 
-      mapping[it->second] = dictIdx++; 
-    } 
- 
-    // do the transformation 
-    for (size_t i = 0; i != idxBuffer.size(); ++i) { 
-      idxBuffer[i] = static_cast<int64_t>( 
-        mapping[static_cast<size_t>(idxBuffer[i])]); 
-    } 
-  } 
- 
-  // get dict entries in insertion order 
-  void SortedStringDictionary::getEntriesInInsertionOrder( 
-                    std::vector<const DictEntry *>& entries) const { 
-    entries.resize(dict.size()); 
-    for (auto it = dict.cbegin(); it != dict.cend(); ++it) { 
-      entries[it->second] = &(it->first); 
-    } 
-  } 
- 
-  // return count of entries 
-  size_t SortedStringDictionary::size() const { 
-    return dict.size(); 
-  } 
- 
-  // return total length of strings in the dictioanry 
-  uint64_t SortedStringDictionary::length() const { 
-    return totalLength; 
-  } 
- 
-  void SortedStringDictionary::clear()  { 
-    totalLength = 0; 
-    data.clear(); 
-    dict.clear(); 
-  } 
- 
-  class StringColumnWriter : public ColumnWriter { 
-  public: 
-    StringColumnWriter(const Type& type, 
-                       const StreamsFactory& factory, 
-                       const WriterOptions& options); 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-    virtual void flush(std::vector<proto::Stream>& streams) override; 
- 
-    virtual uint64_t getEstimatedSize() const override; 
- 
-    virtual void getColumnEncoding( 
-        std::vector<proto::ColumnEncoding>& encodings) const override; 
- 
-    virtual void recordPosition() const override; 
- 
-    virtual void createRowIndexEntry() override; 
- 
-    virtual void writeDictionary() override; 
- 
-    virtual void reset() override; 
- 
-  private: 
-    /** 
-     * dictionary related functions 
-     */ 
-    bool checkDictionaryKeyRatio(); 
-    void createDirectStreams(); 
-    void createDictStreams(); 
-    void deleteDictStreams(); 
-    void fallbackToDirectEncoding(); 
- 
-  protected: 
-    RleVersion rleVersion; 
-    bool useCompression; 
-    const StreamsFactory& streamsFactory; 
-    bool alignedBitPacking; 
- 
-    // direct encoding streams 
-    std::unique_ptr<RleEncoder> directLengthEncoder; 
-    std::unique_ptr<AppendOnlyBufferedStream> directDataStream; 
- 
-    // dictionary encoding streams 
-    std::unique_ptr<RleEncoder> dictDataEncoder; 
-    std::unique_ptr<RleEncoder> dictLengthEncoder; 
-    std::unique_ptr<AppendOnlyBufferedStream> dictStream; 
- 
-    /** 
-     * dictionary related variables 
-     */ 
-    SortedStringDictionary dictionary; 
-    // whether or not dictionary checking is done 
-    bool doneDictionaryCheck; 
-    // whether or not it should be used 
-    bool useDictionary; 
-    // keys in the dictionary should not exceed this ratio 
-    double dictSizeThreshold; 
- 
-    // record start row of each row group; null rows are skipped 
-    mutable std::vector<size_t> startOfRowGroups; 
-  }; 
- 
-  StringColumnWriter::StringColumnWriter( 
-                          const Type& type, 
-                          const StreamsFactory& factory, 
-                          const WriterOptions& options) : 
-                              ColumnWriter(type, factory, options), 
-                              rleVersion(options.getRleVersion()), 
-                              useCompression(options.getCompression() != CompressionKind_NONE), 
-                              streamsFactory(factory), 
-                              alignedBitPacking(options.getAlignedBitpacking()), 
-                              doneDictionaryCheck(false), 
-                              useDictionary(options.getEnableDictionary()), 
-                              dictSizeThreshold(options.getDictionaryKeySizeThreshold()){ 
-    if (type.getKind() == TypeKind::BINARY) { 
-      useDictionary = false; 
-      doneDictionaryCheck = true; 
-    } 
- 
-    if (useDictionary) { 
-      createDictStreams(); 
-    } else { 
-      doneDictionaryCheck = true; 
-      createDirectStreams(); 
-    } 
- 
-    if (enableIndex) { 
-      recordPosition(); 
-    } 
-  } 
- 
-  void StringColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                               uint64_t offset, 
-                               uint64_t numValues, 
-                               const char* incomingMask) { 
-    const StringVectorBatch* stringBatch = 
-      dynamic_cast<const StringVectorBatch*>(&rowBatch); 
-    if (stringBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to StringVectorBatch"); 
-    } 
- 
-    StringColumnStatisticsImpl* strStats = 
-        dynamic_cast<StringColumnStatisticsImpl*>(colIndexStatistics.get()); 
-    if (strStats == nullptr) { 
-      throw InvalidArgument("Failed to cast to StringColumnStatisticsImpl"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    char *const * data = stringBatch->data.data() + offset; 
-    const int64_t* length = stringBatch->length.data() + offset; 
-    const char* notNull = stringBatch->hasNulls ? 
-                          stringBatch->notNull.data() + offset : nullptr; 
- 
-    if (!useDictionary){ 
-      directLengthEncoder->add(length, numValues, notNull); 
-    } 
- 
-    uint64_t count = 0; 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (!notNull || notNull[i]) { 
-        const size_t len = static_cast<size_t>(length[i]); 
-        if (useDictionary) { 
-          size_t index = dictionary.insert(data[i], len); 
-          dictionary.idxInDictBuffer.push_back(static_cast<int64_t>(index)); 
-        } else { 
-          directDataStream->write(data[i], len); 
-        } 
-        if (enableBloomFilter) { 
-          bloomFilter->addBytes(data[i], static_cast<int64_t>(len)); 
-        } 
-        strStats->update(data[i], len); 
-        ++count; 
-      } 
-    } 
-    strStats->increase(count); 
-    if (count < numValues) { 
-      strStats->setHasNull(true); 
-    } 
-  } 
- 
-  void StringColumnWriter::flush(std::vector<proto::Stream>& streams) { 
-    ColumnWriter::flush(streams); 
- 
-    if (useDictionary) { 
-      proto::Stream data; 
-      data.set_kind(proto::Stream_Kind_DATA); 
-      data.set_column(static_cast<uint32_t>(columnId)); 
-      data.set_length(dictDataEncoder->flush()); 
-      streams.push_back(data); 
- 
-      proto::Stream dict; 
-      dict.set_kind(proto::Stream_Kind_DICTIONARY_DATA); 
-      dict.set_column(static_cast<uint32_t>(columnId)); 
-      dict.set_length(dictStream->flush()); 
-      streams.push_back(dict); 
- 
-      proto::Stream length; 
-      length.set_kind(proto::Stream_Kind_LENGTH); 
-      length.set_column(static_cast<uint32_t>(columnId)); 
-      length.set_length(dictLengthEncoder->flush()); 
-      streams.push_back(length); 
-    } else { 
-      proto::Stream length; 
-      length.set_kind(proto::Stream_Kind_LENGTH); 
-      length.set_column(static_cast<uint32_t>(columnId)); 
-      length.set_length(directLengthEncoder->flush()); 
-      streams.push_back(length); 
- 
-      proto::Stream data; 
-      data.set_kind(proto::Stream_Kind_DATA); 
-      data.set_column(static_cast<uint32_t>(columnId)); 
-      data.set_length(directDataStream->flush()); 
-      streams.push_back(data); 
-    } 
-  } 
- 
-  uint64_t StringColumnWriter::getEstimatedSize() const { 
-    uint64_t size = ColumnWriter::getEstimatedSize(); 
-    if (!useDictionary) { 
-      size += directLengthEncoder->getBufferSize(); 
-      size += directDataStream->getSize(); 
-    } else { 
-      size += dictionary.length(); 
-      size += dictionary.size() * sizeof(int32_t); 
-      size += dictionary.idxInDictBuffer.size() * sizeof(int32_t); 
-      if (useCompression) { 
-        size /= 3;  // estimated ratio is 3:1 
-      } 
-    } 
-    return size; 
-  } 
- 
-  void StringColumnWriter::getColumnEncoding( 
-    std::vector<proto::ColumnEncoding>& encodings) const { 
-    proto::ColumnEncoding encoding; 
-    if (!useDictionary) { 
-      encoding.set_kind(rleVersion == RleVersion_1 ? 
-                        proto::ColumnEncoding_Kind_DIRECT : 
-                        proto::ColumnEncoding_Kind_DIRECT_V2); 
-    } else { 
-      encoding.set_kind(rleVersion == RleVersion_1 ? 
-                        proto::ColumnEncoding_Kind_DICTIONARY : 
-                        proto::ColumnEncoding_Kind_DICTIONARY_V2); 
-    } 
-    encoding.set_dictionarysize(static_cast<uint32_t>(dictionary.size())); 
-    if (enableBloomFilter) { 
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8); 
-    } 
-    encodings.push_back(encoding); 
-  } 
- 
-  void StringColumnWriter::recordPosition() const { 
-    ColumnWriter::recordPosition(); 
-    if (!useDictionary) { 
-      directDataStream->recordPosition(rowIndexPosition.get()); 
-      directLengthEncoder->recordPosition(rowIndexPosition.get()); 
-    } else { 
-      if (enableIndex) { 
-        startOfRowGroups.push_back(dictionary.idxInDictBuffer.size()); 
-      } 
-    } 
-  } 
- 
-  bool StringColumnWriter::checkDictionaryKeyRatio() { 
-    if (!doneDictionaryCheck) { 
-      useDictionary = dictionary.size() <= static_cast<size_t>( 
-        static_cast<double>(dictionary.idxInDictBuffer.size()) * dictSizeThreshold); 
-      doneDictionaryCheck = true; 
-    } 
- 
-    return useDictionary; 
-  } 
- 
-  void StringColumnWriter::createRowIndexEntry() { 
-    if (useDictionary && !doneDictionaryCheck) { 
-      if (!checkDictionaryKeyRatio()) { 
-        fallbackToDirectEncoding(); 
-      } 
-    } 
-    ColumnWriter::createRowIndexEntry(); 
-  } 
- 
-  void StringColumnWriter::reset() { 
-    ColumnWriter::reset(); 
- 
-    dictionary.clear(); 
-    dictionary.idxInDictBuffer.resize(0); 
-    startOfRowGroups.clear(); 
-    startOfRowGroups.push_back(0); 
-  } 
- 
-  void StringColumnWriter::createDirectStreams() { 
-    std::unique_ptr<BufferedOutputStream> directLengthStream = 
-      streamsFactory.createStream(proto::Stream_Kind_LENGTH); 
-    directLengthEncoder = createRleEncoder(std::move(directLengthStream), 
-                                           false, 
-                                           rleVersion, 
-                                           memPool, 
-                                           alignedBitPacking); 
-    directDataStream.reset(new AppendOnlyBufferedStream( 
-      streamsFactory.createStream(proto::Stream_Kind_DATA))); 
-  } 
- 
-  void StringColumnWriter::createDictStreams() { 
-    std::unique_ptr<BufferedOutputStream> dictDataStream = 
-      streamsFactory.createStream(proto::Stream_Kind_DATA); 
-    dictDataEncoder = createRleEncoder(std::move(dictDataStream), 
-                                       false, 
-                                       rleVersion, 
-                                       memPool, 
-                                       alignedBitPacking); 
-    std::unique_ptr<BufferedOutputStream> dictLengthStream = 
-      streamsFactory.createStream(proto::Stream_Kind_LENGTH); 
-    dictLengthEncoder = createRleEncoder(std::move(dictLengthStream), 
-                                         false, 
-                                         rleVersion, 
-                                         memPool, 
-                                         alignedBitPacking); 
-    dictStream.reset(new AppendOnlyBufferedStream( 
-      streamsFactory.createStream(proto::Stream_Kind_DICTIONARY_DATA))); 
-  } 
- 
-  void StringColumnWriter::deleteDictStreams() { 
-    dictDataEncoder.reset(nullptr); 
-    dictLengthEncoder.reset(nullptr); 
-    dictStream.reset(nullptr); 
- 
-    dictionary.clear(); 
-    dictionary.idxInDictBuffer.clear(); 
-    startOfRowGroups.clear(); 
-  } 
- 
-  void StringColumnWriter::writeDictionary() { 
-    if (useDictionary  && !doneDictionaryCheck) { 
-      // when index is disabled, dictionary check happens while writing 1st stripe 
-      if (!checkDictionaryKeyRatio()) { 
-        fallbackToDirectEncoding(); 
-        return; 
-      } 
-    } 
- 
-    if (useDictionary) { 
-      // flush dictionary data & length streams 
-      dictionary.flush(dictStream.get(), dictLengthEncoder.get()); 
- 
-      // convert index from insertion order to dictionary order 
-      dictionary.reorder(dictionary.idxInDictBuffer); 
- 
-      // write data sequences 
-      int64_t * data = dictionary.idxInDictBuffer.data(); 
-      if (enableIndex) { 
-        size_t prevOffset = 0; 
-        for (size_t i = 0; i < startOfRowGroups.size(); ++i) { 
-          // write sequences in batch for a row group stride 
-          size_t offset = startOfRowGroups[i]; 
-          dictDataEncoder->add(data + prevOffset, offset - prevOffset, nullptr); 
- 
-          // update index positions 
-          int rowGroupId = static_cast<int>(i); 
-          proto::RowIndexEntry* indexEntry = 
-            (rowGroupId < rowIndex->entry_size()) ? 
-            rowIndex->mutable_entry(rowGroupId) : rowIndexEntry.get(); 
- 
-          // add positions for direct streams 
-          RowIndexPositionRecorder recorder(*indexEntry); 
-          dictDataEncoder->recordPosition(&recorder); 
- 
-          prevOffset = offset; 
-        } 
- 
-        dictDataEncoder->add(data + prevOffset, 
-                             dictionary.idxInDictBuffer.size() - prevOffset, 
-                             nullptr); 
-      } else { 
-        dictDataEncoder->add(data, dictionary.idxInDictBuffer.size(), nullptr); 
-      } 
-    } 
-  } 
- 
-  void StringColumnWriter::fallbackToDirectEncoding() { 
-    createDirectStreams(); 
- 
-    if (enableIndex) { 
-      // fallback happens at the 1st row group; 
-      // simply complete positions for direct streams 
-      proto::RowIndexEntry * indexEntry = rowIndexEntry.get(); 
-      RowIndexPositionRecorder recorder(*indexEntry); 
-      directDataStream->recordPosition(&recorder); 
-      directLengthEncoder->recordPosition(&recorder); 
-    } 
- 
-    // get dictionary entries in insertion order 
-    std::vector<const SortedStringDictionary::DictEntry *> entries; 
-    dictionary.getEntriesInInsertionOrder(entries); 
- 
-    // store each length of the data into a vector 
-    const SortedStringDictionary::DictEntry * dictEntry = nullptr; 
-    for (uint64_t i = 0; i != dictionary.idxInDictBuffer.size(); ++i) { 
-      // write one row data in direct encoding 
-      dictEntry = entries[static_cast<size_t>(dictionary.idxInDictBuffer[i])]; 
-      directDataStream->write(dictEntry->data, dictEntry->length); 
-      directLengthEncoder->write(static_cast<int64_t>(dictEntry->length)); 
-    } 
- 
-    deleteDictStreams(); 
-  } 
- 
-  struct Utf8Utils { 
-    /** 
-     * Counts how many utf-8 chars of the input data 
-     */ 
-    static uint64_t charLength(const char * data, uint64_t length) { 
-      uint64_t chars = 0; 
-      for (uint64_t i = 0; i < length; i++) { 
-        if (isUtfStartByte(data[i])) { 
-          chars++; 
-        } 
-      } 
-      return chars; 
-    } 
- 
-    /** 
-     * Return the number of bytes required to read at most maxCharLength 
-     * characters in full from a utf-8 encoded byte array provided 
-     * by data. This does not validate utf-8 data, but 
-     * operates correctly on already valid utf-8 data. 
-     * 
-     * @param maxCharLength number of characters required 
-     * @param data the bytes of UTF-8 
-     * @param length the length of data to truncate 
-     */ 
-    static uint64_t truncateBytesTo(uint64_t maxCharLength, 
-                                    const char * data, 
-                                    uint64_t length) { 
-      uint64_t chars = 0; 
-      if (length <= maxCharLength) { 
-        return length; 
-      } 
-      for (uint64_t i = 0; i < length; i++) { 
-        if (isUtfStartByte(data[i])) { 
-          chars++; 
-        } 
-        if (chars > maxCharLength) { 
-          return i; 
-        } 
-      } 
-      // everything fits 
-      return length; 
-    } 
- 
-    /** 
-     * Checks if b is the first byte of a UTF-8 character. 
-     */ 
-    inline static bool isUtfStartByte(char b) { 
-      return (b & 0xC0) != 0x80; 
-    } 
- 
-    /** 
-     * Find the start of the last character that ends in the current string. 
-     * @param text the bytes of the utf-8 
-     * @param from the first byte location 
-     * @param until the last byte location 
-     * @return the index of the last character 
-    */ 
-    static uint64_t findLastCharacter(const char * text, uint64_t from, uint64_t until) { 
-      uint64_t posn = until; 
-      /* we don't expect characters more than 5 bytes */ 
-      while (posn >= from) { 
-        if (isUtfStartByte(text[posn])) { 
-          return posn; 
-        } 
-        posn -= 1; 
-      } 
-      /* beginning of a valid char not found */ 
-      throw std::logic_error( 
-        "Could not truncate string, beginning of a valid char not found"); 
-    } 
-  }; 
- 
-  class CharColumnWriter : public StringColumnWriter { 
-  public: 
-    CharColumnWriter(const Type& type, 
-                     const StreamsFactory& factory, 
-                     const WriterOptions& options) : 
-                         StringColumnWriter(type, factory, options), 
-                         maxLength(type.getMaximumLength()), 
-                         padBuffer(*options.getMemoryPool()) { 
-      // utf-8 is currently 4 bytes long, but it could be up to 6 
-      padBuffer.resize(maxLength * 6); 
-    } 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-  private: 
-    uint64_t maxLength; 
-    DataBuffer<char> padBuffer; 
-  }; 
- 
-  void CharColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                             uint64_t offset, 
-                             uint64_t numValues, 
-                             const char* incomingMask) { 
-    StringVectorBatch* charsBatch = dynamic_cast<StringVectorBatch*>(&rowBatch); 
-    if (charsBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to StringVectorBatch"); 
-    } 
- 
-    StringColumnStatisticsImpl* strStats = 
-        dynamic_cast<StringColumnStatisticsImpl*>(colIndexStatistics.get()); 
-    if (strStats == nullptr) { 
-      throw InvalidArgument("Failed to cast to StringColumnStatisticsImpl"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    char** data = charsBatch->data.data() + offset; 
-    int64_t* length = charsBatch->length.data() + offset; 
-    const char* notNull = charsBatch->hasNulls ? 
-                          charsBatch->notNull.data() + offset : nullptr; 
- 
-    uint64_t count = 0; 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (!notNull || notNull[i]) { 
-        const char * charData = nullptr; 
-        uint64_t originLength = static_cast<uint64_t>(length[i]); 
-        uint64_t charLength = Utf8Utils::charLength(data[i], originLength); 
-        if (charLength >= maxLength) { 
-          charData = data[i]; 
-          length[i] = static_cast<int64_t>( 
-            Utf8Utils::truncateBytesTo(maxLength, data[i], originLength)); 
-        } else { 
-          charData = padBuffer.data(); 
-          // the padding is exactly 1 byte per char 
-          length[i] = length[i] + static_cast<int64_t>(maxLength - charLength); 
-          memcpy(padBuffer.data(), data[i], originLength); 
-          memset(padBuffer.data() + originLength, 
-                 ' ', 
-                 static_cast<size_t>(length[i]) - originLength); 
-        } 
- 
-        if (useDictionary) { 
-          size_t index = dictionary.insert(charData, static_cast<size_t>(length[i])); 
-          dictionary.idxInDictBuffer.push_back(static_cast<int64_t>(index)); 
-        } else { 
-          directDataStream->write(charData, static_cast<size_t>(length[i])); 
-        } 
- 
-        if (enableBloomFilter) { 
-          bloomFilter->addBytes(data[i], length[i]); 
-        } 
-        strStats->update(charData, static_cast<size_t>(length[i])); 
-        ++count; 
-      } 
-    } 
- 
-    if (!useDictionary) { 
-      directLengthEncoder->add(length, numValues, notNull); 
-    } 
- 
-    strStats->increase(count); 
-    if (count < numValues) { 
-      strStats->setHasNull(true); 
-    } 
-  } 
- 
-  class VarCharColumnWriter : public StringColumnWriter { 
-  public: 
-    VarCharColumnWriter(const Type& type, 
-                        const StreamsFactory& factory, 
-                        const WriterOptions& options) : 
-                            StringColumnWriter(type, factory, options), 
-                            maxLength(type.getMaximumLength()) { 
-      // PASS 
-    } 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-  private: 
-    uint64_t maxLength; 
-  }; 
- 
-  void VarCharColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                                uint64_t offset, 
-                                uint64_t numValues, 
-                                const char* incomingMask) { 
-    StringVectorBatch* charsBatch = dynamic_cast<StringVectorBatch*>(&rowBatch); 
-    if (charsBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to StringVectorBatch"); 
-    } 
- 
-    StringColumnStatisticsImpl* strStats = 
-        dynamic_cast<StringColumnStatisticsImpl*>(colIndexStatistics.get()); 
-    if (strStats == nullptr) { 
-      throw InvalidArgument("Failed to cast to StringColumnStatisticsImpl"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    char* const* data = charsBatch->data.data() + offset; 
-    int64_t* length = charsBatch->length.data() + offset; 
-    const char* notNull = charsBatch->hasNulls ? 
-                          charsBatch->notNull.data() + offset : nullptr; 
- 
-    uint64_t count = 0; 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (!notNull || notNull[i]) { 
-        uint64_t itemLength = Utf8Utils::truncateBytesTo( 
-          maxLength, data[i], static_cast<uint64_t>(length[i])); 
-        length[i] = static_cast<int64_t>(itemLength); 
- 
-        if (useDictionary) { 
-          size_t index = dictionary.insert(data[i], static_cast<size_t>(length[i])); 
-          dictionary.idxInDictBuffer.push_back(static_cast<int64_t>(index)); 
-        } else { 
-          directDataStream->write(data[i], static_cast<size_t>(length[i])); 
-        } 
- 
-        if (enableBloomFilter) { 
-          bloomFilter->addBytes(data[i], length[i]); 
-        } 
-        strStats->update(data[i], static_cast<size_t>(length[i])); 
-        ++count; 
-      } 
-    } 
- 
-    if (!useDictionary) { 
-      directLengthEncoder->add(length, numValues, notNull); 
-    } 
- 
-    strStats->increase(count); 
-    if (count < numValues) { 
-      strStats->setHasNull(true); 
-    } 
-  } 
- 
-  class BinaryColumnWriter : public StringColumnWriter { 
-  public: 
-    BinaryColumnWriter(const Type& type, 
-                       const StreamsFactory& factory, 
-                       const WriterOptions& options) : 
-                           StringColumnWriter(type, factory, options) { 
-      // PASS 
-    } 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
-  }; 
- 
-  void BinaryColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                               uint64_t offset, 
-                               uint64_t numValues, 
-                               const char* incomingMask) { 
-    StringVectorBatch* binBatch = dynamic_cast<StringVectorBatch*>(&rowBatch); 
-    if (binBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to StringVectorBatch"); 
-    } 
- 
-    BinaryColumnStatisticsImpl* binStats = 
-        dynamic_cast<BinaryColumnStatisticsImpl*>(colIndexStatistics.get()); 
-    if (binStats == nullptr) { 
-      throw InvalidArgument("Failed to cast to BinaryColumnStatisticsImpl"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    char** data = binBatch->data.data() + offset; 
-    int64_t* length = binBatch->length.data() + offset; 
-    const char* notNull = binBatch->hasNulls ? 
-                          binBatch->notNull.data() + offset : nullptr; 
- 
-    uint64_t count = 0; 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      uint64_t unsignedLength = static_cast<uint64_t>(length[i]); 
-      if (!notNull || notNull[i]) { 
-        directDataStream->write(data[i], unsignedLength); 
- 
-        binStats->update(unsignedLength); 
-        ++count; 
-      } 
-    } 
-    directLengthEncoder->add(length, numValues, notNull); 
-    binStats->increase(count); 
-    if (count < numValues) { 
-      binStats->setHasNull(true); 
-    } 
-  } 
- 
-  class TimestampColumnWriter : public ColumnWriter { 
-  public: 
-    TimestampColumnWriter(const Type& type, 
-                          const StreamsFactory& factory, 
-                          const WriterOptions& options); 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-    virtual void flush(std::vector<proto::Stream>& streams) override; 
- 
-    virtual uint64_t getEstimatedSize() const override; 
- 
-    virtual void getColumnEncoding( 
-        std::vector<proto::ColumnEncoding>& encodings) const override; 
- 
-    virtual void recordPosition() const override; 
- 
-  protected: 
-    std::unique_ptr<RleEncoder> secRleEncoder, nanoRleEncoder; 
- 
-  private: 
-    RleVersion rleVersion; 
-    const Timezone& timezone; 
-  }; 
- 
-  TimestampColumnWriter::TimestampColumnWriter( 
-                             const Type& type, 
-                             const StreamsFactory& factory, 
-                             const WriterOptions& options) : 
-                                 ColumnWriter(type, factory, options), 
-                                 rleVersion(options.getRleVersion()), 
-                                 timezone(getTimezoneByName("GMT")){ 
-    std::unique_ptr<BufferedOutputStream> dataStream = 
-        factory.createStream(proto::Stream_Kind_DATA); 
-    std::unique_ptr<BufferedOutputStream> secondaryStream = 
-        factory.createStream(proto::Stream_Kind_SECONDARY); 
-    secRleEncoder = createRleEncoder(std::move(dataStream), 
-                                     true, 
-                                     rleVersion, 
-                                     memPool, 
-                                     options.getAlignedBitpacking()); 
-    nanoRleEncoder = createRleEncoder(std::move(secondaryStream), 
-                                      false, 
-                                      rleVersion, 
-                                      memPool, 
-                                      options.getAlignedBitpacking()); 
- 
-    if (enableIndex) { 
-      recordPosition(); 
-    } 
-  } 
- 
-  // Because the number of nanoseconds often has a large number of trailing zeros, 
-  // the number has trailing decimal zero digits removed and the last three bits 
-  // are used to record how many zeros were removed if the trailing zeros are 
-  // more than 2. Thus 1000 nanoseconds would be serialized as 0x0a and 
-  // 100000 would be serialized as 0x0c. 
-  static int64_t formatNano(int64_t nanos) { 
-    if (nanos == 0) { 
-      return 0; 
-    } else if (nanos % 100 != 0) { 
-      return (nanos) << 3; 
-    } else { 
-      nanos /= 100; 
-      int64_t trailingZeros = 1; 
-      while (nanos % 10 == 0 && trailingZeros < 7) { 
-        nanos /= 10; 
-        trailingZeros += 1; 
-      } 
-      return (nanos) << 3 | trailingZeros; 
-    } 
-  } 
- 
-  void TimestampColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                                  uint64_t offset, 
-                                  uint64_t numValues, 
-                                  const char* incomingMask) { 
-    TimestampVectorBatch* tsBatch = 
-      dynamic_cast<TimestampVectorBatch*>(&rowBatch); 
-    if (tsBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to TimestampVectorBatch"); 
-    } 
- 
-    TimestampColumnStatisticsImpl* tsStats = 
-        dynamic_cast<TimestampColumnStatisticsImpl*>(colIndexStatistics.get()); 
-    if (tsStats == nullptr) { 
-      throw InvalidArgument("Failed to cast to TimestampColumnStatisticsImpl"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    const char* notNull = tsBatch->hasNulls ? 
-                          tsBatch->notNull.data() + offset : nullptr; 
-    int64_t *secs = tsBatch->data.data() + offset; 
-    int64_t *nanos = tsBatch->nanoseconds.data() + offset; 
- 
-    uint64_t count = 0; 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (notNull == nullptr || notNull[i]) { 
-        // TimestampVectorBatch already stores data in UTC 
-        int64_t millsUTC = secs[i] * 1000 + nanos[i] / 1000000; 
-        ++count; 
-        if (enableBloomFilter) { 
-          bloomFilter->addLong(millsUTC); 
-        } 
-        tsStats->update(millsUTC); 
- 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Int128.hh"
+#include "orc/Writer.hh"
+
+#include "ByteRLE.hh"
+#include "ColumnWriter.hh"
+#include "RLE.hh"
+#include "Statistics.hh"
+#include "Timezone.hh"
+
+namespace orc {
+  StreamsFactory::~StreamsFactory() {
+    //PASS
+  }
+
+  class StreamsFactoryImpl : public StreamsFactory {
+  public:
+    StreamsFactoryImpl(
+                       const WriterOptions& writerOptions,
+                       OutputStream* outputStream) :
+                       options(writerOptions),
+                       outStream(outputStream) {
+                       }
+
+    virtual std::unique_ptr<BufferedOutputStream>
+                    createStream(proto::Stream_Kind kind) const override;
+  private:
+    const WriterOptions& options;
+    OutputStream* outStream;
+  };
+
+  std::unique_ptr<BufferedOutputStream> StreamsFactoryImpl::createStream(
+                                                    proto::Stream_Kind) const {
+    // In the future, we can decide compression strategy and modifier
+    // based on stream kind. But for now we just use the setting from
+    // WriterOption
+    return createCompressor(
+                            options.getCompression(),
+                            outStream,
+                            options.getCompressionStrategy(),
+                            // BufferedOutputStream initial capacity
+                            1 * 1024 * 1024,
+                            options.getCompressionBlockSize(),
+                            *options.getMemoryPool());
+  }
+
+  std::unique_ptr<StreamsFactory> createStreamsFactory(
+                                        const WriterOptions& options,
+                                        OutputStream* outStream) {
+    return std::unique_ptr<StreamsFactory>(
+                                   new StreamsFactoryImpl(options, outStream));
+  }
+
+  RowIndexPositionRecorder::~RowIndexPositionRecorder() {
+    // PASS
+  }
+
+  proto::ColumnEncoding_Kind RleVersionMapper(RleVersion rleVersion)
+  {
+    switch (rleVersion)
+    {
+      case RleVersion_1:
+        return proto::ColumnEncoding_Kind_DIRECT;
+      case RleVersion_2:
+        return proto::ColumnEncoding_Kind_DIRECT_V2;
+      default:
+        throw InvalidArgument("Invalid param");
+    }
+  }
+
+  ColumnWriter::ColumnWriter(
+                             const Type& type,
+                             const StreamsFactory& factory,
+                             const WriterOptions& options) :
+                                columnId(type.getColumnId()),
+                                colIndexStatistics(),
+                                colStripeStatistics(),
+                                colFileStatistics(),
+                                enableIndex(options.getEnableIndex()),
+                                rowIndex(),
+                                rowIndexEntry(),
+                                rowIndexPosition(),
+                                enableBloomFilter(false),
+                                memPool(*options.getMemoryPool()),
+                                indexStream(),
+                                bloomFilterStream() {
+
+    std::unique_ptr<BufferedOutputStream> presentStream =
+        factory.createStream(proto::Stream_Kind_PRESENT);
+    notNullEncoder = createBooleanRleEncoder(std::move(presentStream));
+
+    colIndexStatistics = createColumnStatistics(type);
+    colStripeStatistics = createColumnStatistics(type);
+    colFileStatistics = createColumnStatistics(type);
+
+    if (enableIndex) {
+      rowIndex = std::unique_ptr<proto::RowIndex>(new proto::RowIndex());
+      rowIndexEntry =
+        std::unique_ptr<proto::RowIndexEntry>(new proto::RowIndexEntry());
+      rowIndexPosition = std::unique_ptr<RowIndexPositionRecorder>(
+                     new RowIndexPositionRecorder(*rowIndexEntry));
+      indexStream =
+        factory.createStream(proto::Stream_Kind_ROW_INDEX);
+
+      // BloomFilters for non-UTF8 strings and non-UTC timestamps are not supported
+      if (options.isColumnUseBloomFilter(columnId)
+          && options.getBloomFilterVersion() == BloomFilterVersion::UTF8) {
+        enableBloomFilter = true;
+        bloomFilter.reset(new BloomFilterImpl(
+          options.getRowIndexStride(), options.getBloomFilterFPP()));
+        bloomFilterIndex.reset(new proto::BloomFilterIndex());
+        bloomFilterStream = factory.createStream(proto::Stream_Kind_BLOOM_FILTER_UTF8);
+      }
+    }
+  }
+
+  ColumnWriter::~ColumnWriter() {
+    // PASS
+  }
+
+  void ColumnWriter::add(ColumnVectorBatch& batch,
+                         uint64_t offset,
+                         uint64_t numValues,
+                         const char* incomingMask) {
+    notNullEncoder->add(batch.notNull.data() + offset, numValues, incomingMask);
+  }
+
+  void ColumnWriter::flush(std::vector<proto::Stream>& streams) {
+    proto::Stream stream;
+    stream.set_kind(proto::Stream_Kind_PRESENT);
+    stream.set_column(static_cast<uint32_t>(columnId));
+    stream.set_length(notNullEncoder->flush());
+    streams.push_back(stream);
+  }
+
+  uint64_t ColumnWriter::getEstimatedSize() const {
+    return notNullEncoder->getBufferSize();
+  }
+
+  void ColumnWriter::getStripeStatistics(
+    std::vector<proto::ColumnStatistics>& stats) const {
+    getProtoBufStatistics(stats, colStripeStatistics.get());
+  }
+
+  void ColumnWriter::mergeStripeStatsIntoFileStats() {
+    colFileStatistics->merge(*colStripeStatistics);
+    colStripeStatistics->reset();
+  }
+
+  void ColumnWriter::mergeRowGroupStatsIntoStripeStats() {
+    colStripeStatistics->merge(*colIndexStatistics);
+    colIndexStatistics->reset();
+  }
+
+  void ColumnWriter::getFileStatistics(
+    std::vector<proto::ColumnStatistics>& stats) const {
+    getProtoBufStatistics(stats, colFileStatistics.get());
+  }
+
+  void ColumnWriter::createRowIndexEntry() {
+    proto::ColumnStatistics *indexStats = rowIndexEntry->mutable_statistics();
+    colIndexStatistics->toProtoBuf(*indexStats);
+
+    *rowIndex->add_entry() = *rowIndexEntry;
+
+    rowIndexEntry->clear_positions();
+    rowIndexEntry->clear_statistics();
+
+    colStripeStatistics->merge(*colIndexStatistics);
+    colIndexStatistics->reset();
+
+    addBloomFilterEntry();
+
+    recordPosition();
+  }
+
+  void ColumnWriter::addBloomFilterEntry() {
+    if (enableBloomFilter) {
+      BloomFilterUTF8Utils::serialize(*bloomFilter, *bloomFilterIndex->add_bloomfilter());
+      bloomFilter->reset();
+    }
+  }
+
+  void ColumnWriter::writeIndex(std::vector<proto::Stream> &streams) const {
+    // write row index to output stream
+    rowIndex->SerializeToZeroCopyStream(indexStream.get());
+
+    // construct row index stream
+    proto::Stream stream;
+    stream.set_kind(proto::Stream_Kind_ROW_INDEX);
+    stream.set_column(static_cast<uint32_t>(columnId));
+    stream.set_length(indexStream->flush());
+    streams.push_back(stream);
+
+    // write BLOOM_FILTER_UTF8 stream
+    if (enableBloomFilter) {
+      if (!bloomFilterIndex->SerializeToZeroCopyStream(bloomFilterStream.get())) {
+        throw std::logic_error("Failed to write bloom filter stream.");
+      }
+      stream.set_kind(proto::Stream_Kind_BLOOM_FILTER_UTF8);
+      stream.set_column(static_cast<uint32_t>(columnId));
+      stream.set_length(bloomFilterStream->flush());
+      streams.push_back(stream);
+    }
+  }
+
+  void ColumnWriter::recordPosition() const {
+    notNullEncoder->recordPosition(rowIndexPosition.get());
+  }
+
+  void ColumnWriter::reset() {
+    if (enableIndex) {
+      // clear row index
+      rowIndex->clear_entry();
+      rowIndexEntry->clear_positions();
+      rowIndexEntry->clear_statistics();
+
+      // write current positions
+      recordPosition();
+    }
+
+    if (enableBloomFilter) {
+      bloomFilter->reset();
+      bloomFilterIndex->clear_bloomfilter();
+    }
+  }
+
+  void ColumnWriter::writeDictionary() {
+    // PASS
+  }
+
+  class StructColumnWriter : public ColumnWriter {
+  public:
+    StructColumnWriter(
+                       const Type& type,
+                       const StreamsFactory& factory,
+                       const WriterOptions& options);
+    ~StructColumnWriter() override;
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+    virtual void flush(std::vector<proto::Stream>& streams) override;
+
+    virtual uint64_t getEstimatedSize() const override;
+    virtual void getColumnEncoding(
+      std::vector<proto::ColumnEncoding>& encodings) const override;
+
+    virtual void getStripeStatistics(
+      std::vector<proto::ColumnStatistics>& stats) const override;
+
+    virtual void getFileStatistics(
+      std::vector<proto::ColumnStatistics>& stats) const override;
+
+    virtual void mergeStripeStatsIntoFileStats() override;
+
+    virtual void mergeRowGroupStatsIntoStripeStats() override;
+
+    virtual void createRowIndexEntry() override;
+
+    virtual void writeIndex(
+      std::vector<proto::Stream> &streams) const override;
+
+    virtual void writeDictionary() override;
+
+    virtual void reset() override;
+
+  private:
+    std::vector<ColumnWriter *> children;
+  };
+
+  StructColumnWriter::StructColumnWriter(
+                                         const Type& type,
+                                         const StreamsFactory& factory,
+                                         const WriterOptions& options) :
+                                         ColumnWriter(type, factory, options) {
+    for(unsigned int i = 0; i < type.getSubtypeCount(); ++i) {
+      const Type& child = *type.getSubtype(i);
+      children.push_back(buildWriter(child, factory, options).release());
+    }
+
+    if (enableIndex) {
+      recordPosition();
+    }
+  }
+
+  StructColumnWriter::~StructColumnWriter() {
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      delete children[i];
+    }
+  }
+
+  void StructColumnWriter::add(
+                               ColumnVectorBatch& rowBatch,
+                               uint64_t offset,
+                               uint64_t numValues,
+                               const char* incomingMask) {
+    const StructVectorBatch* structBatch =
+      dynamic_cast<const StructVectorBatch *>(&rowBatch);
+    if (structBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to StructVectorBatch");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+    const char* notNull = structBatch->hasNulls ?
+                          structBatch->notNull.data() + offset : nullptr;
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->add(*structBatch->fields[i], offset, numValues, notNull);
+    }
+
+    // update stats
+    if (!notNull) {
+      colIndexStatistics->increase(numValues);
+    } else {
+      uint64_t count = 0;
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (notNull[i]) {
+          ++count;
+        }
+      }
+      colIndexStatistics->increase(count);
+      if (count < numValues) {
+        colIndexStatistics->setHasNull(true);
+      }
+    }
+  }
+
+  void StructColumnWriter::flush(std::vector<proto::Stream>& streams) {
+    ColumnWriter::flush(streams);
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->flush(streams);
+    }
+  }
+
+  void StructColumnWriter::writeIndex(
+                      std::vector<proto::Stream> &streams) const {
+    ColumnWriter::writeIndex(streams);
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->writeIndex(streams);
+    }
+  }
+
+  uint64_t StructColumnWriter::getEstimatedSize() const {
+    uint64_t size = ColumnWriter::getEstimatedSize();
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      size += children[i]->getEstimatedSize();
+    }
+    return size;
+  }
+
+  void StructColumnWriter::getColumnEncoding(
+                      std::vector<proto::ColumnEncoding>& encodings) const {
+    proto::ColumnEncoding encoding;
+    encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
+    encoding.set_dictionarysize(0);
+    encodings.push_back(encoding);
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->getColumnEncoding(encodings);
+    }
+  }
+
+  void StructColumnWriter::getStripeStatistics(
+    std::vector<proto::ColumnStatistics>& stats) const {
+    ColumnWriter::getStripeStatistics(stats);
+
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->getStripeStatistics(stats);
+    }
+  }
+
+  void StructColumnWriter::mergeStripeStatsIntoFileStats() {
+    ColumnWriter::mergeStripeStatsIntoFileStats();
+
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->mergeStripeStatsIntoFileStats();
+    }
+  }
+
+  void StructColumnWriter::getFileStatistics(
+    std::vector<proto::ColumnStatistics>& stats) const {
+    ColumnWriter::getFileStatistics(stats);
+
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->getFileStatistics(stats);
+    }
+  }
+
+  void StructColumnWriter::mergeRowGroupStatsIntoStripeStats()  {
+    ColumnWriter::mergeRowGroupStatsIntoStripeStats();
+
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->mergeRowGroupStatsIntoStripeStats();
+    }
+  }
+
+  void StructColumnWriter::createRowIndexEntry() {
+    ColumnWriter::createRowIndexEntry();
+
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->createRowIndexEntry();
+    }
+  }
+
+  void StructColumnWriter::reset() {
+    ColumnWriter::reset();
+
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->reset();
+    }
+  }
+
+  void StructColumnWriter::writeDictionary() {
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->writeDictionary();
+    }
+  }
+
+  class IntegerColumnWriter : public ColumnWriter {
+  public:
+    IntegerColumnWriter(
+                        const Type& type,
+                        const StreamsFactory& factory,
+                        const WriterOptions& options);
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+    virtual void flush(std::vector<proto::Stream>& streams) override;
+
+    virtual uint64_t getEstimatedSize() const override;
+
+    virtual void getColumnEncoding(
+              std::vector<proto::ColumnEncoding>& encodings) const override;
+
+    virtual void recordPosition() const override;
+
+  protected:
+    std::unique_ptr<RleEncoder> rleEncoder;
+
+  private:
+    RleVersion rleVersion;
+  };
+
+  IntegerColumnWriter::IntegerColumnWriter(
+                           const Type& type,
+                           const StreamsFactory& factory,
+                           const WriterOptions& options) :
+                             ColumnWriter(type, factory, options),
+                             rleVersion(options.getRleVersion()) {
+    std::unique_ptr<BufferedOutputStream> dataStream =
+      factory.createStream(proto::Stream_Kind_DATA);
+    rleEncoder = createRleEncoder(
+                                  std::move(dataStream),
+                                  true,
+                                  rleVersion,
+                                  memPool,
+                                  options.getAlignedBitpacking());
+
+    if (enableIndex) {
+      recordPosition();
+    }
+  }
+
+  void IntegerColumnWriter::add(
+                                ColumnVectorBatch& rowBatch,
+                                uint64_t offset,
+                                uint64_t numValues,
+                                const char* incomingMask) {
+    const LongVectorBatch* longBatch =
+      dynamic_cast<const LongVectorBatch*>(&rowBatch);
+    if (longBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to LongVectorBatch");
+    }
+    IntegerColumnStatisticsImpl* intStats =
+        dynamic_cast<IntegerColumnStatisticsImpl*>(colIndexStatistics.get());
+    if (intStats == nullptr) {
+      throw InvalidArgument("Failed to cast to IntegerColumnStatisticsImpl");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    const int64_t* data = longBatch->data.data() + offset;
+    const char* notNull = longBatch->hasNulls ?
+                          longBatch->notNull.data() + offset : nullptr;
+
+    rleEncoder->add(data, numValues, notNull);
+
+    // update stats
+    uint64_t count = 0;
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (notNull == nullptr || notNull[i]) {
+        ++count;
+        if (enableBloomFilter) {
+          bloomFilter->addLong(data[i]);
+        }
+        intStats->update(data[i], 1);
+      }
+    }
+    intStats->increase(count);
+    if (count < numValues) {
+      intStats->setHasNull(true);
+    }
+  }
+
+  void IntegerColumnWriter::flush(std::vector<proto::Stream>& streams) {
+    ColumnWriter::flush(streams);
+
+    proto::Stream stream;
+    stream.set_kind(proto::Stream_Kind_DATA);
+    stream.set_column(static_cast<uint32_t>(columnId));
+    stream.set_length(rleEncoder->flush());
+    streams.push_back(stream);
+  }
+
+  uint64_t IntegerColumnWriter::getEstimatedSize() const {
+    uint64_t size = ColumnWriter::getEstimatedSize();
+    size += rleEncoder->getBufferSize();
+    return size;
+  }
+
+  void IntegerColumnWriter::getColumnEncoding(
+                       std::vector<proto::ColumnEncoding>& encodings) const {
+    proto::ColumnEncoding encoding;
+    encoding.set_kind(RleVersionMapper(rleVersion));
+    encoding.set_dictionarysize(0);
+    if (enableBloomFilter) {
+      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+    }
+    encodings.push_back(encoding);
+  }
+
+  void IntegerColumnWriter::recordPosition() const {
+    ColumnWriter::recordPosition();
+    rleEncoder->recordPosition(rowIndexPosition.get());
+  }
+
+  class ByteColumnWriter : public ColumnWriter {
+  public:
+    ByteColumnWriter(const Type& type,
+                     const StreamsFactory& factory,
+                     const WriterOptions& options);
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+    virtual void flush(std::vector<proto::Stream>& streams) override;
+
+    virtual uint64_t getEstimatedSize() const override;
+
+    virtual void getColumnEncoding(
+            std::vector<proto::ColumnEncoding>& encodings) const override;
+
+    virtual void recordPosition() const override;
+
+  private:
+    std::unique_ptr<ByteRleEncoder> byteRleEncoder;
+  };
+
+  ByteColumnWriter::ByteColumnWriter(
+                        const Type& type,
+                        const StreamsFactory& factory,
+                        const WriterOptions& options) :
+                             ColumnWriter(type, factory, options) {
+    std::unique_ptr<BufferedOutputStream> dataStream =
+                                  factory.createStream(proto::Stream_Kind_DATA);
+    byteRleEncoder = createByteRleEncoder(std::move(dataStream));
+
+    if (enableIndex) {
+      recordPosition();
+    }
+  }
+
+  void ByteColumnWriter::add(ColumnVectorBatch& rowBatch,
+                             uint64_t offset,
+                             uint64_t numValues,
+                             const char* incomingMask) {
+    LongVectorBatch* byteBatch = dynamic_cast<LongVectorBatch*>(&rowBatch);
+    if (byteBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to LongVectorBatch");
+    }
+    IntegerColumnStatisticsImpl* intStats =
+        dynamic_cast<IntegerColumnStatisticsImpl*>(colIndexStatistics.get());
+    if (intStats == nullptr) {
+      throw InvalidArgument("Failed to cast to IntegerColumnStatisticsImpl");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    int64_t* data = byteBatch->data.data() + offset;
+    const char* notNull = byteBatch->hasNulls ?
+                          byteBatch->notNull.data() + offset : nullptr;
+
+    char* byteData = reinterpret_cast<char*>(data);
+    for (uint64_t i = 0; i < numValues; ++i) {
+      byteData[i] = static_cast<char>(data[i]);
+    }
+    byteRleEncoder->add(byteData, numValues, notNull);
+
+    uint64_t count = 0;
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (notNull == nullptr || notNull[i]) {
+        ++count;
+        if (enableBloomFilter) {
+          bloomFilter->addLong(data[i]);
+        }
+        intStats->update(static_cast<int64_t>(byteData[i]), 1);
+      }
+    }
+    intStats->increase(count);
+    if (count < numValues) {
+      intStats->setHasNull(true);
+    }
+  }
+
+  void ByteColumnWriter::flush(std::vector<proto::Stream>& streams) {
+    ColumnWriter::flush(streams);
+
+    proto::Stream stream;
+    stream.set_kind(proto::Stream_Kind_DATA);
+    stream.set_column(static_cast<uint32_t>(columnId));
+    stream.set_length(byteRleEncoder->flush());
+    streams.push_back(stream);
+  }
+
+  uint64_t ByteColumnWriter::getEstimatedSize() const {
+    uint64_t size = ColumnWriter::getEstimatedSize();
+    size += byteRleEncoder->getBufferSize();
+    return size;
+  }
+
+  void ByteColumnWriter::getColumnEncoding(
+    std::vector<proto::ColumnEncoding>& encodings) const {
+    proto::ColumnEncoding encoding;
+    encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
+    encoding.set_dictionarysize(0);
+    if (enableBloomFilter) {
+      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+    }
+    encodings.push_back(encoding);
+  }
+
+  void ByteColumnWriter::recordPosition() const {
+    ColumnWriter::recordPosition();
+    byteRleEncoder->recordPosition(rowIndexPosition.get());
+  }
+
+  class BooleanColumnWriter : public ColumnWriter {
+  public:
+    BooleanColumnWriter(const Type& type,
+                        const StreamsFactory& factory,
+                        const WriterOptions& options);
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+    virtual void flush(std::vector<proto::Stream>& streams) override;
+
+    virtual uint64_t getEstimatedSize() const override;
+
+    virtual void getColumnEncoding(
+        std::vector<proto::ColumnEncoding>& encodings) const override;
+
+    virtual void recordPosition() const override;
+
+  private:
+    std::unique_ptr<ByteRleEncoder> rleEncoder;
+  };
+
+  BooleanColumnWriter::BooleanColumnWriter(
+                           const Type& type,
+                           const StreamsFactory& factory,
+                           const WriterOptions& options) :
+                               ColumnWriter(type, factory, options) {
+    std::unique_ptr<BufferedOutputStream> dataStream =
+      factory.createStream(proto::Stream_Kind_DATA);
+    rleEncoder = createBooleanRleEncoder(std::move(dataStream));
+
+    if (enableIndex) {
+      recordPosition();
+    }
+  }
+
+  void BooleanColumnWriter::add(ColumnVectorBatch& rowBatch,
+                                uint64_t offset,
+                                uint64_t numValues,
+                                const char* incomingMask) {
+    LongVectorBatch* byteBatch = dynamic_cast<LongVectorBatch*>(&rowBatch);
+    if (byteBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to LongVectorBatch");
+    }
+    BooleanColumnStatisticsImpl* boolStats =
+        dynamic_cast<BooleanColumnStatisticsImpl*>(colIndexStatistics.get());
+    if (boolStats == nullptr) {
+      throw InvalidArgument("Failed to cast to BooleanColumnStatisticsImpl");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    int64_t* data = byteBatch->data.data() + offset;
+    const char* notNull = byteBatch->hasNulls ?
+                          byteBatch->notNull.data() + offset : nullptr;
+
+    char* byteData = reinterpret_cast<char*>(data);
+    for (uint64_t i = 0; i < numValues; ++i) {
+      byteData[i] = static_cast<char>(data[i]);
+    }
+    rleEncoder->add(byteData, numValues, notNull);
+
+    uint64_t count = 0;
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (notNull == nullptr || notNull[i]) {
+        ++count;
+        if (enableBloomFilter) {
+          bloomFilter->addLong(data[i]);
+        }
+        boolStats->update(byteData[i] != 0, 1);
+      }
+    }
+    boolStats->increase(count);
+    if (count < numValues) {
+      boolStats->setHasNull(true);
+    }
+  }
+
+  void BooleanColumnWriter::flush(std::vector<proto::Stream>& streams) {
+    ColumnWriter::flush(streams);
+
+    proto::Stream stream;
+    stream.set_kind(proto::Stream_Kind_DATA);
+    stream.set_column(static_cast<uint32_t>(columnId));
+    stream.set_length(rleEncoder->flush());
+    streams.push_back(stream);
+  }
+
+  uint64_t BooleanColumnWriter::getEstimatedSize() const {
+    uint64_t size = ColumnWriter::getEstimatedSize();
+    size += rleEncoder->getBufferSize();
+    return size;
+  }
+
+  void BooleanColumnWriter::getColumnEncoding(
+                       std::vector<proto::ColumnEncoding>& encodings) const {
+    proto::ColumnEncoding encoding;
+    encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
+    encoding.set_dictionarysize(0);
+    if (enableBloomFilter) {
+      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+    }
+    encodings.push_back(encoding);
+  }
+
+  void BooleanColumnWriter::recordPosition() const {
+    ColumnWriter::recordPosition();
+    rleEncoder->recordPosition(rowIndexPosition.get());
+  }
+
+  class DoubleColumnWriter : public ColumnWriter {
+  public:
+    DoubleColumnWriter(const Type& type,
+                       const StreamsFactory& factory,
+                       const WriterOptions& options,
+                       bool isFloat);
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+    virtual void flush(std::vector<proto::Stream>& streams) override;
+
+    virtual uint64_t getEstimatedSize() const override;
+
+    virtual void getColumnEncoding(
+        std::vector<proto::ColumnEncoding>& encodings) const override;
+
+    virtual void recordPosition() const override;
+
+  private:
+    bool isFloat;
+    std::unique_ptr<AppendOnlyBufferedStream> dataStream;
+    DataBuffer<char> buffer;
+  };
+
+  DoubleColumnWriter::DoubleColumnWriter(
+                          const Type& type,
+                          const StreamsFactory& factory,
+                          const WriterOptions& options,
+                          bool isFloatType) :
+                              ColumnWriter(type, factory, options),
+                              isFloat(isFloatType),
+                              buffer(*options.getMemoryPool()) {
+    dataStream.reset(new AppendOnlyBufferedStream(
+                             factory.createStream(proto::Stream_Kind_DATA)));
+    buffer.resize(isFloat ? 4 : 8);
+
+    if (enableIndex) {
+      recordPosition();
+    }
+  }
+
+  // Floating point types are stored using IEEE 754 floating point bit layout.
+  // Float columns use 4 bytes per value and double columns use 8 bytes.
+  template <typename FLOAT_TYPE, typename INTEGER_TYPE>
+  inline void encodeFloatNum(FLOAT_TYPE input, char* output) {
+    INTEGER_TYPE* intBits = reinterpret_cast<INTEGER_TYPE*>(&input);
+    for (size_t i = 0; i < sizeof(INTEGER_TYPE); ++i) {
+      output[i] = static_cast<char>(((*intBits) >> (8 * i)) & 0xff);
+    }
+  }
+
+  void DoubleColumnWriter::add(ColumnVectorBatch& rowBatch,
+                               uint64_t offset,
+                               uint64_t numValues,
+                               const char* incomingMask) {
+    const DoubleVectorBatch* dblBatch =
+      dynamic_cast<const DoubleVectorBatch*>(&rowBatch);
+    if (dblBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to DoubleVectorBatch");
+    }
+    DoubleColumnStatisticsImpl* doubleStats =
+      dynamic_cast<DoubleColumnStatisticsImpl*>(colIndexStatistics.get());
+    if (doubleStats == nullptr) {
+      throw InvalidArgument("Failed to cast to DoubleColumnStatisticsImpl");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    const double* doubleData = dblBatch->data.data() + offset;
+    const char* notNull = dblBatch->hasNulls ?
+                          dblBatch->notNull.data() + offset : nullptr;
+
+    size_t bytes = isFloat ? 4 : 8;
+    char* data = buffer.data();
+    uint64_t count = 0;
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!notNull || notNull[i]) {
+        if (isFloat) {
+          encodeFloatNum<float, int32_t>(static_cast<float>(doubleData[i]), data);
+        } else {
+          encodeFloatNum<double, int64_t>(doubleData[i], data);
+        }
+        dataStream->write(data, bytes);
+        ++count;
+        if (enableBloomFilter) {
+          bloomFilter->addDouble(doubleData[i]);
+        }
+        doubleStats->update(doubleData[i]);
+      }
+    }
+    doubleStats->increase(count);
+    if (count < numValues) {
+      doubleStats->setHasNull(true);
+    }
+  }
+
+  void DoubleColumnWriter::flush(std::vector<proto::Stream>& streams) {
+    ColumnWriter::flush(streams);
+
+    proto::Stream stream;
+    stream.set_kind(proto::Stream_Kind_DATA);
+    stream.set_column(static_cast<uint32_t>(columnId));
+    stream.set_length(dataStream->flush());
+    streams.push_back(stream);
+  }
+
+  uint64_t DoubleColumnWriter::getEstimatedSize() const {
+    uint64_t size = ColumnWriter::getEstimatedSize();
+    size += dataStream->getSize();
+    return size;
+  }
+
+  void DoubleColumnWriter::getColumnEncoding(
+                      std::vector<proto::ColumnEncoding>& encodings) const {
+    proto::ColumnEncoding encoding;
+    encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
+    encoding.set_dictionarysize(0);
+    if (enableBloomFilter) {
+      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+    }
+    encodings.push_back(encoding);
+  }
+
+  void DoubleColumnWriter::recordPosition() const {
+    ColumnWriter::recordPosition();
+    dataStream->recordPosition(rowIndexPosition.get());
+  }
+
+  /**
+   * Implementation of increasing sorted string dictionary
+   */
+  class SortedStringDictionary {
+  public:
+    struct DictEntry {
+      DictEntry(const char * str, size_t len):data(str),length(len) {}
+      const char * data;
+      size_t length;
+    };
+
+    SortedStringDictionary():totalLength(0) {}
+
+    // insert a new string into dictionary, return its insertion order
+    size_t insert(const char * data, size_t len);
+
+    // write dictionary data & length to output buffer
+    void flush(AppendOnlyBufferedStream * dataStream,
+               RleEncoder * lengthEncoder) const;
+
+    // reorder input index buffer from insertion order to dictionary order
+    void reorder(std::vector<int64_t>& idxBuffer) const;
+
+    // get dict entries in insertion order
+    void getEntriesInInsertionOrder(std::vector<const DictEntry *>&) const;
+
+    // return count of entries
+    size_t size() const;
+
+    // return total length of strings in the dictioanry
+    uint64_t length() const;
+
+    void clear();
+
+  private:
+    struct LessThan {
+      bool operator()(const DictEntry& left, const DictEntry& right) const {
+        int ret = memcmp(left.data, right.data, std::min(left.length, right.length));
+        if (ret != 0) {
+          return ret < 0;
+        }
+        return left.length < right.length;
+      }
+    };
+
+    std::map<DictEntry, size_t, LessThan> dict;
+    std::vector<std::vector<char>> data;
+    uint64_t totalLength;
+
+    // use friend class here to avoid being bothered by const function calls
+    friend class StringColumnWriter;
+    friend class CharColumnWriter;
+    friend class VarCharColumnWriter;
+    // store indexes of insertion order in the dictionary for not-null rows
+    std::vector<int64_t> idxInDictBuffer;
+  };
+
+  // insert a new string into dictionary, return its insertion order
+  size_t SortedStringDictionary::insert(const char * str, size_t len) {
+    auto ret = dict.insert({DictEntry(str, len), dict.size()});
+    if (ret.second) {
+      // make a copy to internal storage
+      data.push_back(std::vector<char>(len));
+      memcpy(data.back().data(), str, len);
+      // update dictionary entry to link pointer to internal storage
+      DictEntry * entry = const_cast<DictEntry *>(&(ret.first->first));
+      entry->data = data.back().data();
+      totalLength += len;
+    }
+    return ret.first->second;
+  }
+
+  // write dictionary data & length to output buffer
+  void SortedStringDictionary::flush(AppendOnlyBufferedStream * dataStream,
+                               RleEncoder * lengthEncoder) const {
+    for (auto it = dict.cbegin(); it != dict.cend(); ++it) {
+      dataStream->write(it->first.data, it->first.length);
+      lengthEncoder->write(static_cast<int64_t>(it->first.length));
+    }
+  }
+
+  /**
+   * Reorder input index buffer from insertion order to dictionary order
+   *
+   * We require this function because string values are buffered by indexes
+   * in their insertion order. Until the entire dictionary is complete can
+   * we get their sorted indexes in the dictionary in that ORC specification
+   * demands dictionary should be ordered. Therefore this function transforms
+   * the indexes from insertion order to dictionary value order for final
+   * output.
+   */
+  void SortedStringDictionary::reorder(std::vector<int64_t>& idxBuffer) const {
+    // iterate the dictionary to get mapping from insertion order to value order
+    std::vector<size_t> mapping(dict.size());
+    size_t dictIdx = 0;
+    for (auto it = dict.cbegin(); it != dict.cend(); ++it) {
+      mapping[it->second] = dictIdx++;
+    }
+
+    // do the transformation
+    for (size_t i = 0; i != idxBuffer.size(); ++i) {
+      idxBuffer[i] = static_cast<int64_t>(
+        mapping[static_cast<size_t>(idxBuffer[i])]);
+    }
+  }
+
+  // get dict entries in insertion order
+  void SortedStringDictionary::getEntriesInInsertionOrder(
+                    std::vector<const DictEntry *>& entries) const {
+    entries.resize(dict.size());
+    for (auto it = dict.cbegin(); it != dict.cend(); ++it) {
+      entries[it->second] = &(it->first);
+    }
+  }
+
+  // return count of entries
+  size_t SortedStringDictionary::size() const {
+    return dict.size();
+  }
+
+  // return total length of strings in the dictioanry
+  uint64_t SortedStringDictionary::length() const {
+    return totalLength;
+  }
+
+  void SortedStringDictionary::clear()  {
+    totalLength = 0;
+    data.clear();
+    dict.clear();
+  }
+
+  class StringColumnWriter : public ColumnWriter {
+  public:
+    StringColumnWriter(const Type& type,
+                       const StreamsFactory& factory,
+                       const WriterOptions& options);
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+    virtual void flush(std::vector<proto::Stream>& streams) override;
+
+    virtual uint64_t getEstimatedSize() const override;
+
+    virtual void getColumnEncoding(
+        std::vector<proto::ColumnEncoding>& encodings) const override;
+
+    virtual void recordPosition() const override;
+
+    virtual void createRowIndexEntry() override;
+
+    virtual void writeDictionary() override;
+
+    virtual void reset() override;
+
+  private:
+    /**
+     * dictionary related functions
+     */
+    bool checkDictionaryKeyRatio();
+    void createDirectStreams();
+    void createDictStreams();
+    void deleteDictStreams();
+    void fallbackToDirectEncoding();
+
+  protected:
+    RleVersion rleVersion;
+    bool useCompression;
+    const StreamsFactory& streamsFactory;
+    bool alignedBitPacking;
+
+    // direct encoding streams
+    std::unique_ptr<RleEncoder> directLengthEncoder;
+    std::unique_ptr<AppendOnlyBufferedStream> directDataStream;
+
+    // dictionary encoding streams
+    std::unique_ptr<RleEncoder> dictDataEncoder;
+    std::unique_ptr<RleEncoder> dictLengthEncoder;
+    std::unique_ptr<AppendOnlyBufferedStream> dictStream;
+
+    /**
+     * dictionary related variables
+     */
+    SortedStringDictionary dictionary;
+    // whether or not dictionary checking is done
+    bool doneDictionaryCheck;
+    // whether or not it should be used
+    bool useDictionary;
+    // keys in the dictionary should not exceed this ratio
+    double dictSizeThreshold;
+
+    // record start row of each row group; null rows are skipped
+    mutable std::vector<size_t> startOfRowGroups;
+  };
+
+  StringColumnWriter::StringColumnWriter(
+                          const Type& type,
+                          const StreamsFactory& factory,
+                          const WriterOptions& options) :
+                              ColumnWriter(type, factory, options),
+                              rleVersion(options.getRleVersion()),
+                              useCompression(options.getCompression() != CompressionKind_NONE),
+                              streamsFactory(factory),
+                              alignedBitPacking(options.getAlignedBitpacking()),
+                              doneDictionaryCheck(false),
+                              useDictionary(options.getEnableDictionary()),
+                              dictSizeThreshold(options.getDictionaryKeySizeThreshold()){
+    if (type.getKind() == TypeKind::BINARY) {
+      useDictionary = false;
+      doneDictionaryCheck = true;
+    }
+
+    if (useDictionary) {
+      createDictStreams();
+    } else {
+      doneDictionaryCheck = true;
+      createDirectStreams();
+    }
+
+    if (enableIndex) {
+      recordPosition();
+    }
+  }
+
+  void StringColumnWriter::add(ColumnVectorBatch& rowBatch,
+                               uint64_t offset,
+                               uint64_t numValues,
+                               const char* incomingMask) {
+    const StringVectorBatch* stringBatch =
+      dynamic_cast<const StringVectorBatch*>(&rowBatch);
+    if (stringBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to StringVectorBatch");
+    }
+
+    StringColumnStatisticsImpl* strStats =
+        dynamic_cast<StringColumnStatisticsImpl*>(colIndexStatistics.get());
+    if (strStats == nullptr) {
+      throw InvalidArgument("Failed to cast to StringColumnStatisticsImpl");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    char *const * data = stringBatch->data.data() + offset;
+    const int64_t* length = stringBatch->length.data() + offset;
+    const char* notNull = stringBatch->hasNulls ?
+                          stringBatch->notNull.data() + offset : nullptr;
+
+    if (!useDictionary){
+      directLengthEncoder->add(length, numValues, notNull);
+    }
+
+    uint64_t count = 0;
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!notNull || notNull[i]) {
+        const size_t len = static_cast<size_t>(length[i]);
+        if (useDictionary) {
+          size_t index = dictionary.insert(data[i], len);
+          dictionary.idxInDictBuffer.push_back(static_cast<int64_t>(index));
+        } else {
+          directDataStream->write(data[i], len);
+        }
+        if (enableBloomFilter) {
+          bloomFilter->addBytes(data[i], static_cast<int64_t>(len));
+        }
+        strStats->update(data[i], len);
+        ++count;
+      }
+    }
+    strStats->increase(count);
+    if (count < numValues) {
+      strStats->setHasNull(true);
+    }
+  }
+
+  void StringColumnWriter::flush(std::vector<proto::Stream>& streams) {
+    ColumnWriter::flush(streams);
+
+    if (useDictionary) {
+      proto::Stream data;
+      data.set_kind(proto::Stream_Kind_DATA);
+      data.set_column(static_cast<uint32_t>(columnId));
+      data.set_length(dictDataEncoder->flush());
+      streams.push_back(data);
+
+      proto::Stream dict;
+      dict.set_kind(proto::Stream_Kind_DICTIONARY_DATA);
+      dict.set_column(static_cast<uint32_t>(columnId));
+      dict.set_length(dictStream->flush());
+      streams.push_back(dict);
+
+      proto::Stream length;
+      length.set_kind(proto::Stream_Kind_LENGTH);
+      length.set_column(static_cast<uint32_t>(columnId));
+      length.set_length(dictLengthEncoder->flush());
+      streams.push_back(length);
+    } else {
+      proto::Stream length;
+      length.set_kind(proto::Stream_Kind_LENGTH);
+      length.set_column(static_cast<uint32_t>(columnId));
+      length.set_length(directLengthEncoder->flush());
+      streams.push_back(length);
+
+      proto::Stream data;
+      data.set_kind(proto::Stream_Kind_DATA);
+      data.set_column(static_cast<uint32_t>(columnId));
+      data.set_length(directDataStream->flush());
+      streams.push_back(data);
+    }
+  }
+
+  uint64_t StringColumnWriter::getEstimatedSize() const {
+    uint64_t size = ColumnWriter::getEstimatedSize();
+    if (!useDictionary) {
+      size += directLengthEncoder->getBufferSize();
+      size += directDataStream->getSize();
+    } else {
+      size += dictionary.length();
+      size += dictionary.size() * sizeof(int32_t);
+      size += dictionary.idxInDictBuffer.size() * sizeof(int32_t);
+      if (useCompression) {
+        size /= 3;  // estimated ratio is 3:1
+      }
+    }
+    return size;
+  }
+
+  void StringColumnWriter::getColumnEncoding(
+    std::vector<proto::ColumnEncoding>& encodings) const {
+    proto::ColumnEncoding encoding;
+    if (!useDictionary) {
+      encoding.set_kind(rleVersion == RleVersion_1 ?
+                        proto::ColumnEncoding_Kind_DIRECT :
+                        proto::ColumnEncoding_Kind_DIRECT_V2);
+    } else {
+      encoding.set_kind(rleVersion == RleVersion_1 ?
+                        proto::ColumnEncoding_Kind_DICTIONARY :
+                        proto::ColumnEncoding_Kind_DICTIONARY_V2);
+    }
+    encoding.set_dictionarysize(static_cast<uint32_t>(dictionary.size()));
+    if (enableBloomFilter) {
+      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+    }
+    encodings.push_back(encoding);
+  }
+
+  void StringColumnWriter::recordPosition() const {
+    ColumnWriter::recordPosition();
+    if (!useDictionary) {
+      directDataStream->recordPosition(rowIndexPosition.get());
+      directLengthEncoder->recordPosition(rowIndexPosition.get());
+    } else {
+      if (enableIndex) {
+        startOfRowGroups.push_back(dictionary.idxInDictBuffer.size());
+      }
+    }
+  }
+
+  bool StringColumnWriter::checkDictionaryKeyRatio() {
+    if (!doneDictionaryCheck) {
+      useDictionary = dictionary.size() <= static_cast<size_t>(
+        static_cast<double>(dictionary.idxInDictBuffer.size()) * dictSizeThreshold);
+      doneDictionaryCheck = true;
+    }
+
+    return useDictionary;
+  }
+
+  void StringColumnWriter::createRowIndexEntry() {
+    if (useDictionary && !doneDictionaryCheck) {
+      if (!checkDictionaryKeyRatio()) {
+        fallbackToDirectEncoding();
+      }
+    }
+    ColumnWriter::createRowIndexEntry();
+  }
+
+  void StringColumnWriter::reset() {
+    ColumnWriter::reset();
+
+    dictionary.clear();
+    dictionary.idxInDictBuffer.resize(0);
+    startOfRowGroups.clear();
+    startOfRowGroups.push_back(0);
+  }
+
+  void StringColumnWriter::createDirectStreams() {
+    std::unique_ptr<BufferedOutputStream> directLengthStream =
+      streamsFactory.createStream(proto::Stream_Kind_LENGTH);
+    directLengthEncoder = createRleEncoder(std::move(directLengthStream),
+                                           false,
+                                           rleVersion,
+                                           memPool,
+                                           alignedBitPacking);
+    directDataStream.reset(new AppendOnlyBufferedStream(
+      streamsFactory.createStream(proto::Stream_Kind_DATA)));
+  }
+
+  void StringColumnWriter::createDictStreams() {
+    std::unique_ptr<BufferedOutputStream> dictDataStream =
+      streamsFactory.createStream(proto::Stream_Kind_DATA);
+    dictDataEncoder = createRleEncoder(std::move(dictDataStream),
+                                       false,
+                                       rleVersion,
+                                       memPool,
+                                       alignedBitPacking);
+    std::unique_ptr<BufferedOutputStream> dictLengthStream =
+      streamsFactory.createStream(proto::Stream_Kind_LENGTH);
+    dictLengthEncoder = createRleEncoder(std::move(dictLengthStream),
+                                         false,
+                                         rleVersion,
+                                         memPool,
+                                         alignedBitPacking);
+    dictStream.reset(new AppendOnlyBufferedStream(
+      streamsFactory.createStream(proto::Stream_Kind_DICTIONARY_DATA)));
+  }
+
+  void StringColumnWriter::deleteDictStreams() {
+    dictDataEncoder.reset(nullptr);
+    dictLengthEncoder.reset(nullptr);
+    dictStream.reset(nullptr);
+
+    dictionary.clear();
+    dictionary.idxInDictBuffer.clear();
+    startOfRowGroups.clear();
+  }
+
+  void StringColumnWriter::writeDictionary() {
+    if (useDictionary  && !doneDictionaryCheck) {
+      // when index is disabled, dictionary check happens while writing 1st stripe
+      if (!checkDictionaryKeyRatio()) {
+        fallbackToDirectEncoding();
+        return;
+      }
+    }
+
+    if (useDictionary) {
+      // flush dictionary data & length streams
+      dictionary.flush(dictStream.get(), dictLengthEncoder.get());
+
+      // convert index from insertion order to dictionary order
+      dictionary.reorder(dictionary.idxInDictBuffer);
+
+      // write data sequences
+      int64_t * data = dictionary.idxInDictBuffer.data();
+      if (enableIndex) {
+        size_t prevOffset = 0;
+        for (size_t i = 0; i < startOfRowGroups.size(); ++i) {
+          // write sequences in batch for a row group stride
+          size_t offset = startOfRowGroups[i];
+          dictDataEncoder->add(data + prevOffset, offset - prevOffset, nullptr);
+
+          // update index positions
+          int rowGroupId = static_cast<int>(i);
+          proto::RowIndexEntry* indexEntry =
+            (rowGroupId < rowIndex->entry_size()) ?
+            rowIndex->mutable_entry(rowGroupId) : rowIndexEntry.get();
+
+          // add positions for direct streams
+          RowIndexPositionRecorder recorder(*indexEntry);
+          dictDataEncoder->recordPosition(&recorder);
+
+          prevOffset = offset;
+        }
+
+        dictDataEncoder->add(data + prevOffset,
+                             dictionary.idxInDictBuffer.size() - prevOffset,
+                             nullptr);
+      } else {
+        dictDataEncoder->add(data, dictionary.idxInDictBuffer.size(), nullptr);
+      }
+    }
+  }
+
+  void StringColumnWriter::fallbackToDirectEncoding() {
+    createDirectStreams();
+
+    if (enableIndex) {
+      // fallback happens at the 1st row group;
+      // simply complete positions for direct streams
+      proto::RowIndexEntry * indexEntry = rowIndexEntry.get();
+      RowIndexPositionRecorder recorder(*indexEntry);
+      directDataStream->recordPosition(&recorder);
+      directLengthEncoder->recordPosition(&recorder);
+    }
+
+    // get dictionary entries in insertion order
+    std::vector<const SortedStringDictionary::DictEntry *> entries;
+    dictionary.getEntriesInInsertionOrder(entries);
+
+    // store each length of the data into a vector
+    const SortedStringDictionary::DictEntry * dictEntry = nullptr;
+    for (uint64_t i = 0; i != dictionary.idxInDictBuffer.size(); ++i) {
+      // write one row data in direct encoding
+      dictEntry = entries[static_cast<size_t>(dictionary.idxInDictBuffer[i])];
+      directDataStream->write(dictEntry->data, dictEntry->length);
+      directLengthEncoder->write(static_cast<int64_t>(dictEntry->length));
+    }
+
+    deleteDictStreams();
+  }
+
+  struct Utf8Utils {
+    /**
+     * Counts how many utf-8 chars of the input data
+     */
+    static uint64_t charLength(const char * data, uint64_t length) {
+      uint64_t chars = 0;
+      for (uint64_t i = 0; i < length; i++) {
+        if (isUtfStartByte(data[i])) {
+          chars++;
+        }
+      }
+      return chars;
+    }
+
+    /**
+     * Return the number of bytes required to read at most maxCharLength
+     * characters in full from a utf-8 encoded byte array provided
+     * by data. This does not validate utf-8 data, but
+     * operates correctly on already valid utf-8 data.
+     *
+     * @param maxCharLength number of characters required
+     * @param data the bytes of UTF-8
+     * @param length the length of data to truncate
+     */
+    static uint64_t truncateBytesTo(uint64_t maxCharLength,
+                                    const char * data,
+                                    uint64_t length) {
+      uint64_t chars = 0;
+      if (length <= maxCharLength) {
+        return length;
+      }
+      for (uint64_t i = 0; i < length; i++) {
+        if (isUtfStartByte(data[i])) {
+          chars++;
+        }
+        if (chars > maxCharLength) {
+          return i;
+        }
+      }
+      // everything fits
+      return length;
+    }
+
+    /**
+     * Checks if b is the first byte of a UTF-8 character.
+     */
+    inline static bool isUtfStartByte(char b) {
+      return (b & 0xC0) != 0x80;
+    }
+
+    /**
+     * Find the start of the last character that ends in the current string.
+     * @param text the bytes of the utf-8
+     * @param from the first byte location
+     * @param until the last byte location
+     * @return the index of the last character
+    */
+    static uint64_t findLastCharacter(const char * text, uint64_t from, uint64_t until) {
+      uint64_t posn = until;
+      /* we don't expect characters more than 5 bytes */
+      while (posn >= from) {
+        if (isUtfStartByte(text[posn])) {
+          return posn;
+        }
+        posn -= 1;
+      }
+      /* beginning of a valid char not found */
+      throw std::logic_error(
+        "Could not truncate string, beginning of a valid char not found");
+    }
+  };
+
+  class CharColumnWriter : public StringColumnWriter {
+  public:
+    CharColumnWriter(const Type& type,
+                     const StreamsFactory& factory,
+                     const WriterOptions& options) :
+                         StringColumnWriter(type, factory, options),
+                         maxLength(type.getMaximumLength()),
+                         padBuffer(*options.getMemoryPool()) {
+      // utf-8 is currently 4 bytes long, but it could be up to 6
+      padBuffer.resize(maxLength * 6);
+    }
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+  private:
+    uint64_t maxLength;
+    DataBuffer<char> padBuffer;
+  };
+
+  void CharColumnWriter::add(ColumnVectorBatch& rowBatch,
+                             uint64_t offset,
+                             uint64_t numValues,
+                             const char* incomingMask) {
+    StringVectorBatch* charsBatch = dynamic_cast<StringVectorBatch*>(&rowBatch);
+    if (charsBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to StringVectorBatch");
+    }
+
+    StringColumnStatisticsImpl* strStats =
+        dynamic_cast<StringColumnStatisticsImpl*>(colIndexStatistics.get());
+    if (strStats == nullptr) {
+      throw InvalidArgument("Failed to cast to StringColumnStatisticsImpl");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    char** data = charsBatch->data.data() + offset;
+    int64_t* length = charsBatch->length.data() + offset;
+    const char* notNull = charsBatch->hasNulls ?
+                          charsBatch->notNull.data() + offset : nullptr;
+
+    uint64_t count = 0;
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!notNull || notNull[i]) {
+        const char * charData = nullptr;
+        uint64_t originLength = static_cast<uint64_t>(length[i]);
+        uint64_t charLength = Utf8Utils::charLength(data[i], originLength);
+        if (charLength >= maxLength) {
+          charData = data[i];
+          length[i] = static_cast<int64_t>(
+            Utf8Utils::truncateBytesTo(maxLength, data[i], originLength));
+        } else {
+          charData = padBuffer.data();
+          // the padding is exactly 1 byte per char
+          length[i] = length[i] + static_cast<int64_t>(maxLength - charLength);
+          memcpy(padBuffer.data(), data[i], originLength);
+          memset(padBuffer.data() + originLength,
+                 ' ',
+                 static_cast<size_t>(length[i]) - originLength);
+        }
+
+        if (useDictionary) {
+          size_t index = dictionary.insert(charData, static_cast<size_t>(length[i]));
+          dictionary.idxInDictBuffer.push_back(static_cast<int64_t>(index));
+        } else {
+          directDataStream->write(charData, static_cast<size_t>(length[i]));
+        }
+
+        if (enableBloomFilter) {
+          bloomFilter->addBytes(data[i], length[i]);
+        }
+        strStats->update(charData, static_cast<size_t>(length[i]));
+        ++count;
+      }
+    }
+
+    if (!useDictionary) {
+      directLengthEncoder->add(length, numValues, notNull);
+    }
+
+    strStats->increase(count);
+    if (count < numValues) {
+      strStats->setHasNull(true);
+    }
+  }
+
+  class VarCharColumnWriter : public StringColumnWriter {
+  public:
+    VarCharColumnWriter(const Type& type,
+                        const StreamsFactory& factory,
+                        const WriterOptions& options) :
+                            StringColumnWriter(type, factory, options),
+                            maxLength(type.getMaximumLength()) {
+      // PASS
+    }
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+  private:
+    uint64_t maxLength;
+  };
+
+  void VarCharColumnWriter::add(ColumnVectorBatch& rowBatch,
+                                uint64_t offset,
+                                uint64_t numValues,
+                                const char* incomingMask) {
+    StringVectorBatch* charsBatch = dynamic_cast<StringVectorBatch*>(&rowBatch);
+    if (charsBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to StringVectorBatch");
+    }
+
+    StringColumnStatisticsImpl* strStats =
+        dynamic_cast<StringColumnStatisticsImpl*>(colIndexStatistics.get());
+    if (strStats == nullptr) {
+      throw InvalidArgument("Failed to cast to StringColumnStatisticsImpl");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    char* const* data = charsBatch->data.data() + offset;
+    int64_t* length = charsBatch->length.data() + offset;
+    const char* notNull = charsBatch->hasNulls ?
+                          charsBatch->notNull.data() + offset : nullptr;
+
+    uint64_t count = 0;
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!notNull || notNull[i]) {
+        uint64_t itemLength = Utf8Utils::truncateBytesTo(
+          maxLength, data[i], static_cast<uint64_t>(length[i]));
+        length[i] = static_cast<int64_t>(itemLength);
+
+        if (useDictionary) {
+          size_t index = dictionary.insert(data[i], static_cast<size_t>(length[i]));
+          dictionary.idxInDictBuffer.push_back(static_cast<int64_t>(index));
+        } else {
+          directDataStream->write(data[i], static_cast<size_t>(length[i]));
+        }
+
+        if (enableBloomFilter) {
+          bloomFilter->addBytes(data[i], length[i]);
+        }
+        strStats->update(data[i], static_cast<size_t>(length[i]));
+        ++count;
+      }
+    }
+
+    if (!useDictionary) {
+      directLengthEncoder->add(length, numValues, notNull);
+    }
+
+    strStats->increase(count);
+    if (count < numValues) {
+      strStats->setHasNull(true);
+    }
+  }
+
+  class BinaryColumnWriter : public StringColumnWriter {
+  public:
+    BinaryColumnWriter(const Type& type,
+                       const StreamsFactory& factory,
+                       const WriterOptions& options) :
+                           StringColumnWriter(type, factory, options) {
+      // PASS
+    }
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+  };
+
+  void BinaryColumnWriter::add(ColumnVectorBatch& rowBatch,
+                               uint64_t offset,
+                               uint64_t numValues,
+                               const char* incomingMask) {
+    StringVectorBatch* binBatch = dynamic_cast<StringVectorBatch*>(&rowBatch);
+    if (binBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to StringVectorBatch");
+    }
+
+    BinaryColumnStatisticsImpl* binStats =
+        dynamic_cast<BinaryColumnStatisticsImpl*>(colIndexStatistics.get());
+    if (binStats == nullptr) {
+      throw InvalidArgument("Failed to cast to BinaryColumnStatisticsImpl");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    char** data = binBatch->data.data() + offset;
+    int64_t* length = binBatch->length.data() + offset;
+    const char* notNull = binBatch->hasNulls ?
+                          binBatch->notNull.data() + offset : nullptr;
+
+    uint64_t count = 0;
+    for (uint64_t i = 0; i < numValues; ++i) {
+      uint64_t unsignedLength = static_cast<uint64_t>(length[i]);
+      if (!notNull || notNull[i]) {
+        directDataStream->write(data[i], unsignedLength);
+
+        binStats->update(unsignedLength);
+        ++count;
+      }
+    }
+    directLengthEncoder->add(length, numValues, notNull);
+    binStats->increase(count);
+    if (count < numValues) {
+      binStats->setHasNull(true);
+    }
+  }
+
+  class TimestampColumnWriter : public ColumnWriter {
+  public:
+    TimestampColumnWriter(const Type& type,
+                          const StreamsFactory& factory,
+                          const WriterOptions& options);
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+    virtual void flush(std::vector<proto::Stream>& streams) override;
+
+    virtual uint64_t getEstimatedSize() const override;
+
+    virtual void getColumnEncoding(
+        std::vector<proto::ColumnEncoding>& encodings) const override;
+
+    virtual void recordPosition() const override;
+
+  protected:
+    std::unique_ptr<RleEncoder> secRleEncoder, nanoRleEncoder;
+
+  private:
+    RleVersion rleVersion;
+    const Timezone& timezone;
+  };
+
+  TimestampColumnWriter::TimestampColumnWriter(
+                             const Type& type,
+                             const StreamsFactory& factory,
+                             const WriterOptions& options) :
+                                 ColumnWriter(type, factory, options),
+                                 rleVersion(options.getRleVersion()),
+                                 timezone(getTimezoneByName("GMT")){
+    std::unique_ptr<BufferedOutputStream> dataStream =
+        factory.createStream(proto::Stream_Kind_DATA);
+    std::unique_ptr<BufferedOutputStream> secondaryStream =
+        factory.createStream(proto::Stream_Kind_SECONDARY);
+    secRleEncoder = createRleEncoder(std::move(dataStream),
+                                     true,
+                                     rleVersion,
+                                     memPool,
+                                     options.getAlignedBitpacking());
+    nanoRleEncoder = createRleEncoder(std::move(secondaryStream),
+                                      false,
+                                      rleVersion,
+                                      memPool,
+                                      options.getAlignedBitpacking());
+
+    if (enableIndex) {
+      recordPosition();
+    }
+  }
+
+  // Because the number of nanoseconds often has a large number of trailing zeros,
+  // the number has trailing decimal zero digits removed and the last three bits
+  // are used to record how many zeros were removed if the trailing zeros are
+  // more than 2. Thus 1000 nanoseconds would be serialized as 0x0a and
+  // 100000 would be serialized as 0x0c.
+  static int64_t formatNano(int64_t nanos) {
+    if (nanos == 0) {
+      return 0;
+    } else if (nanos % 100 != 0) {
+      return (nanos) << 3;
+    } else {
+      nanos /= 100;
+      int64_t trailingZeros = 1;
+      while (nanos % 10 == 0 && trailingZeros < 7) {
+        nanos /= 10;
+        trailingZeros += 1;
+      }
+      return (nanos) << 3 | trailingZeros;
+    }
+  }
+
+  void TimestampColumnWriter::add(ColumnVectorBatch& rowBatch,
+                                  uint64_t offset,
+                                  uint64_t numValues,
+                                  const char* incomingMask) {
+    TimestampVectorBatch* tsBatch =
+      dynamic_cast<TimestampVectorBatch*>(&rowBatch);
+    if (tsBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to TimestampVectorBatch");
+    }
+
+    TimestampColumnStatisticsImpl* tsStats =
+        dynamic_cast<TimestampColumnStatisticsImpl*>(colIndexStatistics.get());
+    if (tsStats == nullptr) {
+      throw InvalidArgument("Failed to cast to TimestampColumnStatisticsImpl");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    const char* notNull = tsBatch->hasNulls ?
+                          tsBatch->notNull.data() + offset : nullptr;
+    int64_t *secs = tsBatch->data.data() + offset;
+    int64_t *nanos = tsBatch->nanoseconds.data() + offset;
+
+    uint64_t count = 0;
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (notNull == nullptr || notNull[i]) {
+        // TimestampVectorBatch already stores data in UTC
+        int64_t millsUTC = secs[i] * 1000 + nanos[i] / 1000000;
+        ++count;
+        if (enableBloomFilter) {
+          bloomFilter->addLong(millsUTC);
+        }
+        tsStats->update(millsUTC);
+
         if (secs[i] < 0 && nanos[i] > 999999) {
-          secs[i] += 1; 
-        } 
- 
-        secs[i] -= timezone.getEpoch(); 
-        nanos[i] = formatNano(nanos[i]); 
-      } 
-    } 
-    tsStats->increase(count); 
-    if (count < numValues) { 
-      tsStats->setHasNull(true); 
-    } 
- 
-    secRleEncoder->add(secs, numValues, notNull); 
-    nanoRleEncoder->add(nanos, numValues, notNull); 
-  } 
- 
-  void TimestampColumnWriter::flush(std::vector<proto::Stream>& streams) { 
-    ColumnWriter::flush(streams); 
- 
-    proto::Stream dataStream; 
-    dataStream.set_kind(proto::Stream_Kind_DATA); 
-    dataStream.set_column(static_cast<uint32_t>(columnId)); 
-    dataStream.set_length(secRleEncoder->flush()); 
-    streams.push_back(dataStream); 
- 
-    proto::Stream secondaryStream; 
-    secondaryStream.set_kind(proto::Stream_Kind_SECONDARY); 
-    secondaryStream.set_column(static_cast<uint32_t>(columnId)); 
-    secondaryStream.set_length(nanoRleEncoder->flush()); 
-    streams.push_back(secondaryStream); 
-  } 
- 
-  uint64_t TimestampColumnWriter::getEstimatedSize() const { 
-    uint64_t size = ColumnWriter::getEstimatedSize(); 
-    size += secRleEncoder->getBufferSize(); 
-    size += nanoRleEncoder->getBufferSize(); 
-    return size; 
-  } 
- 
-  void TimestampColumnWriter::getColumnEncoding( 
-    std::vector<proto::ColumnEncoding>& encodings) const { 
-    proto::ColumnEncoding encoding; 
-    encoding.set_kind(RleVersionMapper(rleVersion)); 
-    encoding.set_dictionarysize(0); 
-    if (enableBloomFilter) { 
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8); 
-    } 
-    encodings.push_back(encoding); 
-  } 
- 
-  void TimestampColumnWriter::recordPosition() const { 
-    ColumnWriter::recordPosition(); 
-    secRleEncoder->recordPosition(rowIndexPosition.get()); 
-    nanoRleEncoder->recordPosition(rowIndexPosition.get()); 
-  } 
- 
-  class DateColumnWriter : public IntegerColumnWriter { 
-  public: 
-    DateColumnWriter(const Type& type, 
-                     const StreamsFactory& factory, 
-                     const WriterOptions& options); 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
-  }; 
- 
-  DateColumnWriter::DateColumnWriter( 
-                        const Type &type, 
-                        const StreamsFactory &factory, 
-                        const WriterOptions &options) : 
-                            IntegerColumnWriter(type, factory, options) { 
-    // PASS 
-  } 
- 
-  void DateColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                             uint64_t offset, 
-                             uint64_t numValues, 
-                             const char* incomingMask) { 
-    const LongVectorBatch* longBatch = 
-      dynamic_cast<const LongVectorBatch*>(&rowBatch); 
-    if (longBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to LongVectorBatch"); 
-    } 
- 
-    DateColumnStatisticsImpl* dateStats = 
-        dynamic_cast<DateColumnStatisticsImpl*>(colIndexStatistics.get()); 
-    if (dateStats == nullptr) { 
-      throw InvalidArgument("Failed to cast to DateColumnStatisticsImpl"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    const int64_t* data = longBatch->data.data() + offset; 
-    const char* notNull = longBatch->hasNulls ? 
-                          longBatch->notNull.data() + offset : nullptr; 
- 
-    rleEncoder->add(data, numValues, notNull); 
- 
-    uint64_t count = 0; 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (!notNull || notNull[i]) { 
-        ++count; 
-        dateStats->update(static_cast<int32_t>(data[i])); 
-        if (enableBloomFilter) { 
-          bloomFilter->addLong(data[i]); 
-        } 
-      } 
-    } 
-    dateStats->increase(count); 
-    if (count < numValues) { 
-      dateStats->setHasNull(true); 
-    } 
-  } 
- 
-  class Decimal64ColumnWriter : public ColumnWriter { 
-  public: 
-    static const uint32_t MAX_PRECISION_64 = 18; 
-    static const uint32_t MAX_PRECISION_128 = 38; 
- 
-    Decimal64ColumnWriter(const Type& type, 
-                          const StreamsFactory& factory, 
-                          const WriterOptions& options); 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-    virtual void flush(std::vector<proto::Stream>& streams) override; 
- 
-    virtual uint64_t getEstimatedSize() const override; 
- 
-    virtual void getColumnEncoding( 
-        std::vector<proto::ColumnEncoding>& encodings) const override; 
- 
-    virtual void recordPosition() const override; 
- 
-  protected: 
-    RleVersion rleVersion; 
-    uint64_t precision; 
-    uint64_t scale; 
-    std::unique_ptr<AppendOnlyBufferedStream> valueStream; 
-    std::unique_ptr<RleEncoder> scaleEncoder; 
- 
-  private: 
-    char buffer[10]; 
-  }; 
- 
-  Decimal64ColumnWriter::Decimal64ColumnWriter( 
-                             const Type& type, 
-                             const StreamsFactory& factory, 
-                             const WriterOptions& options) : 
-                                 ColumnWriter(type, factory, options), 
-                                 rleVersion(options.getRleVersion()), 
-                                 precision(type.getPrecision()), 
-                                 scale(type.getScale()) { 
-    valueStream.reset(new AppendOnlyBufferedStream( 
-        factory.createStream(proto::Stream_Kind_DATA))); 
-    std::unique_ptr<BufferedOutputStream> scaleStream = 
-        factory.createStream(proto::Stream_Kind_SECONDARY); 
-    scaleEncoder = createRleEncoder(std::move(scaleStream), 
-                                    true, 
-                                    rleVersion, 
-                                    memPool, 
-                                    options.getAlignedBitpacking()); 
- 
-    if (enableIndex) { 
-      recordPosition(); 
-    } 
-  } 
- 
-  void Decimal64ColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                                  uint64_t offset, 
-                                  uint64_t numValues, 
-                                  const char* incomingMask) { 
-    const Decimal64VectorBatch* decBatch = 
-      dynamic_cast<const Decimal64VectorBatch*>(&rowBatch); 
-    if (decBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to Decimal64VectorBatch"); 
-    } 
- 
-    DecimalColumnStatisticsImpl* decStats = 
-      dynamic_cast<DecimalColumnStatisticsImpl*>(colIndexStatistics.get()); 
-    if (decStats == nullptr) { 
-      throw InvalidArgument("Failed to cast to DecimalColumnStatisticsImpl"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    const char* notNull = decBatch->hasNulls ? 
-                          decBatch->notNull.data() + offset : nullptr; 
-    const int64_t* values = decBatch->values.data() + offset; 
- 
-    uint64_t count = 0; 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (!notNull || notNull[i]) { 
-        int64_t val = zigZag(values[i]); 
-        char* data = buffer; 
-        while (true) { 
-          if ((val & ~0x7f) == 0) { 
-            *(data++) = (static_cast<char>(val)); 
-            break; 
-          } else { 
-            *(data++) = static_cast<char>(0x80 | (val & 0x7f)); 
-            // cast val to unsigned so as to force 0-fill right shift 
-            val = (static_cast<uint64_t>(val) >> 7); 
-          } 
-        } 
-        valueStream->write(buffer, static_cast<size_t>(data - buffer)); 
-        ++count; 
-        if (enableBloomFilter) { 
-          std::string decimal = Decimal( 
-            values[i], static_cast<int32_t>(scale)).toString(); 
-          bloomFilter->addBytes( 
-            decimal.c_str(), static_cast<int64_t>(decimal.size())); 
-        } 
-        decStats->update(Decimal(values[i], static_cast<int32_t>(scale))); 
-      } 
-    } 
-    decStats->increase(count); 
-    if (count < numValues) { 
-      decStats->setHasNull(true); 
-    } 
-    std::vector<int64_t> scales(numValues, static_cast<int64_t>(scale)); 
-    scaleEncoder->add(scales.data(), numValues, notNull); 
-  } 
- 
-  void Decimal64ColumnWriter::flush(std::vector<proto::Stream>& streams) { 
-    ColumnWriter::flush(streams); 
- 
-    proto::Stream dataStream; 
-    dataStream.set_kind(proto::Stream_Kind_DATA); 
-    dataStream.set_column(static_cast<uint32_t>(columnId)); 
-    dataStream.set_length(valueStream->flush()); 
-    streams.push_back(dataStream); 
- 
-    proto::Stream secondaryStream; 
-    secondaryStream.set_kind(proto::Stream_Kind_SECONDARY); 
-    secondaryStream.set_column(static_cast<uint32_t>(columnId)); 
-    secondaryStream.set_length(scaleEncoder->flush()); 
-    streams.push_back(secondaryStream); 
-  } 
- 
-  uint64_t Decimal64ColumnWriter::getEstimatedSize() const { 
-    uint64_t size = ColumnWriter::getEstimatedSize(); 
-    size += valueStream->getSize(); 
-    size += scaleEncoder->getBufferSize(); 
-    return size; 
-  } 
- 
-  void Decimal64ColumnWriter::getColumnEncoding( 
-    std::vector<proto::ColumnEncoding>& encodings) const { 
-    proto::ColumnEncoding encoding; 
-    encoding.set_kind(RleVersionMapper(rleVersion)); 
-    encoding.set_dictionarysize(0); 
-    if (enableBloomFilter) { 
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8); 
-    } 
-    encodings.push_back(encoding); 
-  } 
- 
-  void Decimal64ColumnWriter::recordPosition() const { 
-    ColumnWriter::recordPosition(); 
-    valueStream->recordPosition(rowIndexPosition.get()); 
-    scaleEncoder->recordPosition(rowIndexPosition.get()); 
-  } 
- 
-  class Decimal128ColumnWriter : public Decimal64ColumnWriter { 
-  public: 
-    Decimal128ColumnWriter(const Type& type, 
-                           const StreamsFactory& factory, 
-                           const WriterOptions& options); 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-  private: 
-    char buffer[20]; 
-  }; 
- 
-  Decimal128ColumnWriter::Decimal128ColumnWriter( 
-                              const Type& type, 
-                              const StreamsFactory& factory, 
-                              const WriterOptions& options) : 
-                                Decimal64ColumnWriter(type, factory, options) { 
-    // PASS 
-  } 
- 
-  // Zigzag encoding moves the sign bit to the least significant bit using the 
-  // expression (val « 1) ^ (val » 63) and derives its name from the fact that 
-  // positive and negative numbers alternate once encoded. 
-  Int128 zigZagInt128(const Int128& value) { 
-    bool isNegative = value < 0; 
-    Int128 val = value.abs(); 
-    val <<= 1; 
-    if (isNegative) { 
-      val -= 1; 
-    } 
-    return val; 
-  } 
- 
-  void Decimal128ColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                                   uint64_t offset, 
-                                   uint64_t numValues, 
-                                   const char* incomingMask) { 
-    const Decimal128VectorBatch* decBatch = 
-      dynamic_cast<const Decimal128VectorBatch*>(&rowBatch); 
-    if (decBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to Decimal128VectorBatch"); 
-    } 
- 
-    DecimalColumnStatisticsImpl* decStats = 
-      dynamic_cast<DecimalColumnStatisticsImpl*>(colIndexStatistics.get()); 
-    if (decStats == nullptr) { 
-      throw InvalidArgument("Failed to cast to DecimalColumnStatisticsImpl"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    const char* notNull = decBatch->hasNulls ? 
-                          decBatch->notNull.data() + offset : nullptr; 
-    const Int128* values = decBatch->values.data() + offset; 
- 
-    // The current encoding of decimal columns stores the integer representation 
-    // of the value as an unbounded length zigzag encoded base 128 varint. 
-    uint64_t count = 0; 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (!notNull || notNull[i]) { 
-        Int128 val = zigZagInt128(values[i]); 
-        char* data = buffer; 
-        while (true) { 
-          if ((val & ~0x7f) == 0) { 
-            *(data++) = (static_cast<char>(val.getLowBits())); 
-            break; 
-          } else { 
-            *(data++) = static_cast<char>(0x80 | (val.getLowBits() & 0x7f)); 
-            val >>= 7; 
-          } 
-        } 
-        valueStream->write(buffer, static_cast<size_t>(data - buffer)); 
- 
-        ++count; 
-        if (enableBloomFilter) { 
-          std::string decimal = Decimal( 
-            values[i], static_cast<int32_t>(scale)).toString(); 
-          bloomFilter->addBytes( 
-            decimal.c_str(), static_cast<int64_t>(decimal.size())); 
-        } 
-        decStats->update(Decimal(values[i], static_cast<int32_t>(scale))); 
-      } 
-    } 
-    decStats->increase(count); 
-    if (count < numValues) { 
-      decStats->setHasNull(true); 
-    } 
-    std::vector<int64_t> scales(numValues, static_cast<int64_t>(scale)); 
-    scaleEncoder->add(scales.data(), numValues, notNull); 
-  } 
- 
-  class ListColumnWriter : public ColumnWriter { 
-  public: 
-    ListColumnWriter(const Type& type, 
-                     const StreamsFactory& factory, 
-                     const WriterOptions& options); 
-    ~ListColumnWriter() override; 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-    virtual void flush(std::vector<proto::Stream>& streams) override; 
- 
-    virtual uint64_t getEstimatedSize() const override; 
- 
-    virtual void getColumnEncoding( 
-      std::vector<proto::ColumnEncoding>& encodings) const override; 
- 
-    virtual void getStripeStatistics( 
-      std::vector<proto::ColumnStatistics>& stats) const override; 
- 
-    virtual void getFileStatistics( 
-      std::vector<proto::ColumnStatistics>& stats) const override; 
- 
-    virtual void mergeStripeStatsIntoFileStats() override; 
- 
-    virtual void mergeRowGroupStatsIntoStripeStats() override; 
- 
-    virtual void createRowIndexEntry() override; 
- 
-    virtual void writeIndex( 
-      std::vector<proto::Stream> &streams) const override; 
- 
-    virtual void recordPosition() const override; 
- 
-    virtual void writeDictionary() override; 
- 
-    virtual void reset() override; 
- 
-  private: 
-    std::unique_ptr<RleEncoder> lengthEncoder; 
-    RleVersion rleVersion; 
-    std::unique_ptr<ColumnWriter> child; 
-  }; 
- 
-  ListColumnWriter::ListColumnWriter(const Type& type, 
-                                     const StreamsFactory& factory, 
-                                     const WriterOptions& options) : 
-                                       ColumnWriter(type, factory, options), 
-                                       rleVersion(options.getRleVersion()){ 
- 
-    std::unique_ptr<BufferedOutputStream> lengthStream = 
-      factory.createStream(proto::Stream_Kind_LENGTH); 
-    lengthEncoder = createRleEncoder(std::move(lengthStream), 
-                                     false, 
-                                     rleVersion, 
-                                     memPool, 
-                                     options.getAlignedBitpacking()); 
- 
-    if (type.getSubtypeCount() == 1) { 
-      child = buildWriter(*type.getSubtype(0), factory, options); 
-    } 
- 
-    if (enableIndex) { 
-      recordPosition(); 
-    } 
-  } 
- 
-  ListColumnWriter::~ListColumnWriter() { 
-    // PASS 
-  } 
- 
-  void ListColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                             uint64_t offset, 
-                             uint64_t numValues, 
-                             const char* incomingMask) { 
-    ListVectorBatch* listBatch = dynamic_cast<ListVectorBatch*>(&rowBatch); 
-    if (listBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to ListVectorBatch"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    int64_t* offsets = listBatch->offsets.data() + offset; 
-    const char* notNull = listBatch->hasNulls ? 
-                          listBatch->notNull.data() + offset : nullptr; 
- 
-    uint64_t elemOffset = static_cast<uint64_t>(offsets[0]); 
-    uint64_t totalNumValues = static_cast<uint64_t>(offsets[numValues] - offsets[0]); 
- 
-    // translate offsets to lengths 
-    for (uint64_t i = 0; i != numValues; ++i) { 
-      offsets[i] = offsets[i + 1] - offsets[i]; 
-    } 
- 
-    // unnecessary to deal with null as elements are packed together 
-    if (child.get()) { 
-      child->add(*listBatch->elements, elemOffset, totalNumValues, nullptr); 
-    } 
-    lengthEncoder->add(offsets, numValues, notNull); 
- 
-    if (enableIndex) { 
-      if (!notNull) { 
-        colIndexStatistics->increase(numValues); 
-      } else { 
-        uint64_t count = 0; 
-        for (uint64_t i = 0; i < numValues; ++i) { 
-          if (notNull[i]) { 
-            ++count; 
-            if (enableBloomFilter) { 
-              bloomFilter->addLong(offsets[i]); 
-            } 
-          } 
-        } 
-        colIndexStatistics->increase(count); 
-        if (count < numValues) { 
-          colIndexStatistics->setHasNull(true); 
-        } 
-      } 
-    } 
-  } 
- 
-  void ListColumnWriter::flush(std::vector<proto::Stream>& streams) { 
-    ColumnWriter::flush(streams); 
- 
-    proto::Stream stream; 
-    stream.set_kind(proto::Stream_Kind_LENGTH); 
-    stream.set_column(static_cast<uint32_t>(columnId)); 
-    stream.set_length(lengthEncoder->flush()); 
-    streams.push_back(stream); 
- 
-    if (child.get()) { 
-      child->flush(streams); 
-    } 
-  } 
- 
-  void ListColumnWriter::writeIndex(std::vector<proto::Stream> &streams) const { 
-    ColumnWriter::writeIndex(streams); 
-    if (child.get()) { 
-      child->writeIndex(streams); 
-    } 
-  } 
- 
-  uint64_t ListColumnWriter::getEstimatedSize() const { 
-    uint64_t size = ColumnWriter::getEstimatedSize(); 
-    if (child.get()) { 
-      size += lengthEncoder->getBufferSize(); 
-      size += child->getEstimatedSize(); 
-    } 
-    return size; 
-  } 
- 
-  void ListColumnWriter::getColumnEncoding( 
-                    std::vector<proto::ColumnEncoding>& encodings) const { 
-    proto::ColumnEncoding encoding; 
-    encoding.set_kind(RleVersionMapper(rleVersion)); 
-    encoding.set_dictionarysize(0); 
-    if (enableBloomFilter) { 
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8); 
-    } 
-    encodings.push_back(encoding); 
-    if (child.get()) { 
-      child->getColumnEncoding(encodings); 
-    } 
-  } 
- 
-  void ListColumnWriter::getStripeStatistics( 
-                    std::vector<proto::ColumnStatistics>& stats) const { 
-    ColumnWriter::getStripeStatistics(stats); 
-    if (child.get()) { 
-      child->getStripeStatistics(stats); 
-    } 
-  } 
- 
-  void ListColumnWriter::mergeStripeStatsIntoFileStats() { 
-    ColumnWriter::mergeStripeStatsIntoFileStats(); 
-    if (child.get()) { 
-      child->mergeStripeStatsIntoFileStats(); 
-    } 
-  } 
- 
-  void ListColumnWriter::getFileStatistics( 
-                    std::vector<proto::ColumnStatistics>& stats) const { 
-    ColumnWriter::getFileStatistics(stats); 
-    if (child.get()) { 
-      child->getFileStatistics(stats); 
-    } 
-  } 
- 
-  void ListColumnWriter::mergeRowGroupStatsIntoStripeStats()  { 
-    ColumnWriter::mergeRowGroupStatsIntoStripeStats(); 
-    if (child.get()) { 
-      child->mergeRowGroupStatsIntoStripeStats(); 
-    } 
-  } 
- 
-  void ListColumnWriter::createRowIndexEntry() { 
-    ColumnWriter::createRowIndexEntry(); 
-    if (child.get()) { 
-      child->createRowIndexEntry(); 
-    } 
-  } 
- 
-  void ListColumnWriter::recordPosition() const { 
-    ColumnWriter::recordPosition(); 
-    lengthEncoder->recordPosition(rowIndexPosition.get()); 
-  } 
- 
-  void ListColumnWriter::reset() { 
-    ColumnWriter::reset(); 
-    if (child) { 
-      child->reset(); 
-    } 
-  } 
- 
-  void ListColumnWriter::writeDictionary() { 
-    if (child) { 
-      child->writeDictionary(); 
-    } 
-  } 
- 
-  class MapColumnWriter : public ColumnWriter { 
-  public: 
-    MapColumnWriter(const Type& type, 
-                    const StreamsFactory& factory, 
-                    const WriterOptions& options); 
-    ~MapColumnWriter() override; 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-    virtual void flush(std::vector<proto::Stream>& streams) override; 
- 
-    virtual uint64_t getEstimatedSize() const override; 
- 
-    virtual void getColumnEncoding( 
-      std::vector<proto::ColumnEncoding>& encodings) const override; 
- 
-    virtual void getStripeStatistics( 
-      std::vector<proto::ColumnStatistics>& stats) const override; 
- 
-    virtual void getFileStatistics( 
-      std::vector<proto::ColumnStatistics>& stats) const override; 
- 
-    virtual void mergeStripeStatsIntoFileStats() override; 
- 
-    virtual void mergeRowGroupStatsIntoStripeStats() override; 
- 
-    virtual void createRowIndexEntry() override; 
- 
-    virtual void writeIndex( 
-      std::vector<proto::Stream> &streams) const override; 
- 
-    virtual void recordPosition() const override; 
- 
-    virtual void writeDictionary() override; 
- 
-    virtual void reset() override; 
- 
-  private: 
-    std::unique_ptr<ColumnWriter> keyWriter; 
-    std::unique_ptr<ColumnWriter> elemWriter; 
-    std::unique_ptr<RleEncoder> lengthEncoder; 
-    RleVersion rleVersion; 
-  }; 
- 
-  MapColumnWriter::MapColumnWriter(const Type& type, 
-                                   const StreamsFactory& factory, 
-                                   const WriterOptions& options) : 
-                                     ColumnWriter(type, factory, options), 
-                                     rleVersion(options.getRleVersion()){ 
-    std::unique_ptr<BufferedOutputStream> lengthStream = 
-      factory.createStream(proto::Stream_Kind_LENGTH); 
-    lengthEncoder = createRleEncoder(std::move(lengthStream), 
-                                     false, 
-                                     rleVersion, 
-                                     memPool, 
-                                     options.getAlignedBitpacking()); 
- 
-    if (type.getSubtypeCount() > 0) { 
-      keyWriter = buildWriter(*type.getSubtype(0), factory, options); 
-    } 
- 
-    if (type.getSubtypeCount() > 1) { 
-      elemWriter = buildWriter(*type.getSubtype(1), factory, options); 
-    } 
- 
-    if (enableIndex) { 
-      recordPosition(); 
-    } 
-  } 
- 
-  MapColumnWriter::~MapColumnWriter() { 
-    // PASS 
-  } 
- 
-  void MapColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                            uint64_t offset, 
-                            uint64_t numValues, 
-                            const char* incomingMask) { 
-    MapVectorBatch* mapBatch = dynamic_cast<MapVectorBatch*>(&rowBatch); 
-    if (mapBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to MapVectorBatch"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    int64_t* offsets = mapBatch->offsets.data() + offset; 
-    const char* notNull = mapBatch->hasNulls ? 
-                          mapBatch->notNull.data() + offset : nullptr; 
- 
-    uint64_t elemOffset = static_cast<uint64_t>(offsets[0]); 
-    uint64_t totalNumValues = static_cast<uint64_t>(offsets[numValues] - offsets[0]); 
- 
-    // translate offsets to lengths 
-    for (uint64_t i = 0; i != numValues; ++i) { 
-      offsets[i] = offsets[i + 1] - offsets[i]; 
-    } 
- 
-    lengthEncoder->add(offsets, numValues, notNull); 
- 
-    // unnecessary to deal with null as keys and values are packed together 
-    if (keyWriter.get()) { 
-      keyWriter->add(*mapBatch->keys, elemOffset, totalNumValues, nullptr); 
-    } 
-    if (elemWriter.get()) { 
-      elemWriter->add(*mapBatch->elements, elemOffset, totalNumValues, nullptr); 
-    } 
- 
-    if (enableIndex) { 
-      if (!notNull) { 
-        colIndexStatistics->increase(numValues); 
-      } else { 
-        uint64_t count = 0; 
-        for (uint64_t i = 0; i < numValues; ++i) { 
-          if (notNull[i]) { 
-            ++count; 
-            if (enableBloomFilter) { 
-              bloomFilter->addLong(offsets[i]); 
-            } 
-          } 
-        } 
-        colIndexStatistics->increase(count); 
-        if (count < numValues) { 
-          colIndexStatistics->setHasNull(true); 
-        } 
-      } 
-    } 
-  } 
- 
-  void MapColumnWriter::flush(std::vector<proto::Stream>& streams) { 
-    ColumnWriter::flush(streams); 
- 
-    proto::Stream stream; 
-    stream.set_kind(proto::Stream_Kind_LENGTH); 
-    stream.set_column(static_cast<uint32_t>(columnId)); 
-    stream.set_length(lengthEncoder->flush()); 
-    streams.push_back(stream); 
- 
-    if (keyWriter.get()) { 
-      keyWriter->flush(streams); 
-    } 
-    if (elemWriter.get()) { 
-      elemWriter->flush(streams); 
-    } 
-  } 
- 
-  void MapColumnWriter::writeIndex( 
-    std::vector<proto::Stream> &streams) const { 
-    ColumnWriter::writeIndex(streams); 
-    if (keyWriter.get()) { 
-      keyWriter->writeIndex(streams); 
-    } 
-    if (elemWriter.get()) { 
-      elemWriter->writeIndex(streams); 
-    } 
-  } 
- 
-  uint64_t MapColumnWriter::getEstimatedSize() const { 
-    uint64_t size = ColumnWriter::getEstimatedSize(); 
-    size += lengthEncoder->getBufferSize(); 
-    if (keyWriter.get()) { 
-      size += keyWriter->getEstimatedSize(); 
-    } 
-    if (elemWriter.get()) { 
-      size += elemWriter->getEstimatedSize(); 
-    } 
-    return size; 
-  } 
- 
-  void MapColumnWriter::getColumnEncoding( 
-                   std::vector<proto::ColumnEncoding>& encodings) const { 
-    proto::ColumnEncoding encoding; 
-    encoding.set_kind(RleVersionMapper(rleVersion)); 
-    encoding.set_dictionarysize(0); 
-    if (enableBloomFilter) { 
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8); 
-    } 
-    encodings.push_back(encoding); 
-    if (keyWriter.get()) { 
-      keyWriter->getColumnEncoding(encodings); 
-    } 
-    if (elemWriter.get()) { 
-      elemWriter->getColumnEncoding(encodings); 
-    } 
-  } 
- 
-  void MapColumnWriter::getStripeStatistics( 
-                   std::vector<proto::ColumnStatistics>& stats) const { 
-    ColumnWriter::getStripeStatistics(stats); 
-    if (keyWriter.get()) { 
-      keyWriter->getStripeStatistics(stats); 
-    } 
-    if (elemWriter.get()) { 
-      elemWriter->getStripeStatistics(stats); 
-    } 
-  } 
- 
-  void MapColumnWriter::mergeStripeStatsIntoFileStats() { 
-    ColumnWriter::mergeStripeStatsIntoFileStats(); 
-    if (keyWriter.get()) { 
-      keyWriter->mergeStripeStatsIntoFileStats(); 
-    } 
-    if (elemWriter.get()) { 
-      elemWriter->mergeStripeStatsIntoFileStats(); 
-    } 
-  } 
- 
-  void MapColumnWriter::getFileStatistics( 
-                   std::vector<proto::ColumnStatistics>& stats) const { 
-    ColumnWriter::getFileStatistics(stats); 
-    if (keyWriter.get()) { 
-      keyWriter->getFileStatistics(stats); 
-    } 
-    if (elemWriter.get()) { 
-      elemWriter->getFileStatistics(stats); 
-    } 
-  } 
- 
-  void MapColumnWriter::mergeRowGroupStatsIntoStripeStats()  { 
-    ColumnWriter::mergeRowGroupStatsIntoStripeStats(); 
-    if (keyWriter.get()) { 
-      keyWriter->mergeRowGroupStatsIntoStripeStats(); 
-    } 
-    if (elemWriter.get()) { 
-      elemWriter->mergeRowGroupStatsIntoStripeStats(); 
-    } 
-  } 
- 
-  void MapColumnWriter::createRowIndexEntry() { 
-    ColumnWriter::createRowIndexEntry(); 
-    if (keyWriter.get()) { 
-      keyWriter->createRowIndexEntry(); 
-    } 
-    if (elemWriter.get()) { 
-      elemWriter->createRowIndexEntry(); 
-    } 
-  } 
- 
-  void MapColumnWriter::recordPosition() const { 
-    ColumnWriter::recordPosition(); 
-    lengthEncoder->recordPosition(rowIndexPosition.get()); 
-  } 
- 
-  void MapColumnWriter::reset() { 
-    ColumnWriter::reset(); 
-    if (keyWriter) { 
-      keyWriter->reset(); 
-    } 
-    if (elemWriter) { 
-      elemWriter->reset(); 
-    } 
-  } 
- 
-  void MapColumnWriter::writeDictionary() { 
-    if (keyWriter) { 
-      keyWriter->writeDictionary(); 
-    } 
-    if (elemWriter) { 
-      elemWriter->writeDictionary(); 
-    } 
-  } 
- 
-  class UnionColumnWriter : public ColumnWriter { 
-  public: 
-    UnionColumnWriter(const Type& type, 
-                      const StreamsFactory& factory, 
-                      const WriterOptions& options); 
-    ~UnionColumnWriter() override; 
- 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char* incomingMask) override; 
- 
-    virtual void flush(std::vector<proto::Stream>& streams) override; 
- 
-    virtual uint64_t getEstimatedSize() const override; 
- 
-    virtual void getColumnEncoding( 
-      std::vector<proto::ColumnEncoding>& encodings) const override; 
- 
-    virtual void getStripeStatistics( 
-      std::vector<proto::ColumnStatistics>& stats) const override; 
- 
-    virtual void getFileStatistics( 
-      std::vector<proto::ColumnStatistics>& stats) const override; 
- 
-    virtual void mergeStripeStatsIntoFileStats() override; 
- 
-    virtual void mergeRowGroupStatsIntoStripeStats() override; 
- 
-    virtual void createRowIndexEntry() override; 
- 
-    virtual void writeIndex( 
-      std::vector<proto::Stream> &streams) const override; 
- 
-    virtual void recordPosition() const override; 
- 
-    virtual void writeDictionary() override; 
- 
-    virtual void reset() override; 
- 
-  private: 
-    std::unique_ptr<ByteRleEncoder> rleEncoder; 
-    std::vector<ColumnWriter*> children; 
-  }; 
- 
-  UnionColumnWriter::UnionColumnWriter(const Type& type, 
-                                       const StreamsFactory& factory, 
-                                       const WriterOptions& options) : 
-    ColumnWriter(type, factory, options) { 
- 
-    std::unique_ptr<BufferedOutputStream> dataStream = 
-      factory.createStream(proto::Stream_Kind_DATA); 
-    rleEncoder = createByteRleEncoder(std::move(dataStream)); 
- 
-    for (uint64_t i = 0; i != type.getSubtypeCount(); ++i) { 
-      children.push_back(buildWriter(*type.getSubtype(i), 
-                                     factory, 
-                                     options).release()); 
-    } 
- 
-    if (enableIndex) { 
-      recordPosition(); 
-    } 
-  } 
- 
-  UnionColumnWriter::~UnionColumnWriter() { 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      delete children[i]; 
-    } 
-  } 
- 
-  void UnionColumnWriter::add(ColumnVectorBatch& rowBatch, 
-                              uint64_t offset, 
-                              uint64_t numValues, 
-                              const char* incomingMask) { 
-    UnionVectorBatch* unionBatch = dynamic_cast<UnionVectorBatch*>(&rowBatch); 
-    if (unionBatch == nullptr) { 
-      throw InvalidArgument("Failed to cast to UnionVectorBatch"); 
-    } 
- 
-    ColumnWriter::add(rowBatch, offset, numValues, incomingMask); 
- 
-    const char* notNull = unionBatch->hasNulls ? 
-                          unionBatch->notNull.data() + offset : nullptr; 
-    unsigned char * tags = unionBatch->tags.data() + offset; 
-    uint64_t * offsets = unionBatch->offsets.data() + offset; 
- 
-    std::vector<int64_t> childOffset(children.size(), -1); 
-    std::vector<uint64_t> childLength(children.size(), 0); 
- 
-    for (uint64_t i = 0; i != numValues; ++i) { 
-      if (childOffset[tags[i]] == -1) { 
-        childOffset[tags[i]] = static_cast<int64_t>(offsets[i]); 
-      } 
-      ++childLength[tags[i]]; 
-    } 
- 
-    rleEncoder->add(reinterpret_cast<char*>(tags), numValues, notNull); 
- 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      if (childLength[i] > 0) { 
-        children[i]->add(*unionBatch->children[i], 
-                         static_cast<uint64_t>(childOffset[i]), 
-                         childLength[i], nullptr); 
-      } 
-    } 
- 
-    // update stats 
-    if (enableIndex) { 
-      if (!notNull) { 
-        colIndexStatistics->increase(numValues); 
-      } else { 
-        uint64_t count = 0; 
-        for (uint64_t i = 0; i < numValues; ++i) { 
-          if (notNull[i]) { 
-            ++count; 
-            if (enableBloomFilter) { 
-              bloomFilter->addLong(tags[i]); 
-            } 
-          } 
-        } 
-        colIndexStatistics->increase(count); 
-        if (count < numValues) { 
-          colIndexStatistics->setHasNull(true); 
-        } 
-      } 
-    } 
-  } 
- 
-  void UnionColumnWriter::flush(std::vector<proto::Stream>& streams) { 
-    ColumnWriter::flush(streams); 
- 
-    proto::Stream stream; 
-    stream.set_kind(proto::Stream_Kind_DATA); 
-    stream.set_column(static_cast<uint32_t>(columnId)); 
-    stream.set_length(rleEncoder->flush()); 
-    streams.push_back(stream); 
- 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->flush(streams); 
-    } 
-  } 
- 
-  void UnionColumnWriter::writeIndex(std::vector<proto::Stream> &streams) const { 
-    ColumnWriter::writeIndex(streams); 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->writeIndex(streams); 
-    } 
-  } 
- 
-  uint64_t UnionColumnWriter::getEstimatedSize() const { 
-    uint64_t size = ColumnWriter::getEstimatedSize(); 
-    size += rleEncoder->getBufferSize(); 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      size += children[i]->getEstimatedSize(); 
-    } 
-    return size; 
-  } 
- 
-  void UnionColumnWriter::getColumnEncoding( 
-                     std::vector<proto::ColumnEncoding>& encodings) const { 
-    proto::ColumnEncoding encoding; 
-    encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT); 
-    encoding.set_dictionarysize(0); 
-    if (enableBloomFilter) { 
-      encoding.set_bloomencoding(BloomFilterVersion::UTF8); 
-    } 
-    encodings.push_back(encoding); 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->getColumnEncoding(encodings); 
-    } 
-  } 
- 
-  void UnionColumnWriter::getStripeStatistics( 
-                     std::vector<proto::ColumnStatistics>& stats) const { 
-    ColumnWriter::getStripeStatistics(stats); 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->getStripeStatistics(stats); 
-    } 
-  } 
- 
-  void UnionColumnWriter::mergeStripeStatsIntoFileStats() { 
-    ColumnWriter::mergeStripeStatsIntoFileStats(); 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->mergeStripeStatsIntoFileStats(); 
-    } 
-  } 
- 
-  void UnionColumnWriter::getFileStatistics( 
-                     std::vector<proto::ColumnStatistics>& stats) const { 
-    ColumnWriter::getFileStatistics(stats); 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->getFileStatistics(stats); 
-    } 
-  } 
- 
-  void UnionColumnWriter::mergeRowGroupStatsIntoStripeStats()  { 
-    ColumnWriter::mergeRowGroupStatsIntoStripeStats(); 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->mergeRowGroupStatsIntoStripeStats(); 
-    } 
-  } 
- 
-  void UnionColumnWriter::createRowIndexEntry() { 
-    ColumnWriter::createRowIndexEntry(); 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->createRowIndexEntry(); 
-    } 
-  } 
- 
-  void UnionColumnWriter::recordPosition() const { 
-    ColumnWriter::recordPosition(); 
-    rleEncoder->recordPosition(rowIndexPosition.get()); 
-  } 
- 
-  void UnionColumnWriter::reset() { 
-    ColumnWriter::reset(); 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->reset(); 
-    } 
-  } 
- 
-  void UnionColumnWriter::writeDictionary() { 
-    for (uint32_t i = 0; i < children.size(); ++i) { 
-      children[i]->writeDictionary(); 
-    } 
-  } 
- 
-  std::unique_ptr<ColumnWriter> buildWriter( 
-                                            const Type& type, 
-                                            const StreamsFactory& factory, 
-                                            const WriterOptions& options) { 
-    switch (static_cast<int64_t>(type.getKind())) { 
-      case STRUCT: 
-        return std::unique_ptr<ColumnWriter>( 
-          new StructColumnWriter( 
-                                 type, 
-                                 factory, 
-                                 options)); 
-      case INT: 
-      case LONG: 
-      case SHORT: 
-        return std::unique_ptr<ColumnWriter>( 
-          new IntegerColumnWriter( 
-                                  type, 
-                                  factory, 
-                                  options)); 
-      case BYTE: 
-        return std::unique_ptr<ColumnWriter>( 
-          new ByteColumnWriter( 
-                               type, 
-                               factory, 
-                               options)); 
-      case BOOLEAN: 
-        return std::unique_ptr<ColumnWriter>( 
-          new BooleanColumnWriter( 
-                                  type, 
-                                  factory, 
-                                  options)); 
-      case DOUBLE: 
-        return std::unique_ptr<ColumnWriter>( 
-          new DoubleColumnWriter( 
-                                 type, 
-                                 factory, 
-                                 options, 
-                                 false)); 
-      case FLOAT: 
-        return std::unique_ptr<ColumnWriter>( 
-          new DoubleColumnWriter( 
-                                 type, 
-                                 factory, 
-                                 options, 
-                                 true)); 
-      case BINARY: 
-        return std::unique_ptr<ColumnWriter>( 
-          new BinaryColumnWriter( 
-                                 type, 
-                                 factory, 
-                                 options)); 
-      case STRING: 
-        return std::unique_ptr<ColumnWriter>( 
-          new StringColumnWriter( 
-                                 type, 
-                                 factory, 
-                                 options)); 
-      case CHAR: 
-        return std::unique_ptr<ColumnWriter>( 
-          new CharColumnWriter( 
-                               type, 
-                               factory, 
-                               options)); 
-      case VARCHAR: 
-        return std::unique_ptr<ColumnWriter>( 
-          new VarCharColumnWriter( 
-                                  type, 
-                                  factory, 
-                                  options)); 
-      case DATE: 
-        return std::unique_ptr<ColumnWriter>( 
-          new DateColumnWriter( 
-                               type, 
-                               factory, 
-                               options)); 
-      case TIMESTAMP: 
-        return std::unique_ptr<ColumnWriter>( 
-          new TimestampColumnWriter( 
-                                    type, 
-                                    factory, 
-                                    options)); 
-      case DECIMAL: 
-        if (type.getPrecision() <= Decimal64ColumnWriter::MAX_PRECISION_64) { 
-          return std::unique_ptr<ColumnWriter>( 
-            new Decimal64ColumnWriter( 
-                                      type, 
-                                      factory, 
-                                      options)); 
-        } else if (type.getPrecision() <= Decimal64ColumnWriter::MAX_PRECISION_128) { 
-          return std::unique_ptr<ColumnWriter>( 
-            new Decimal128ColumnWriter( 
-                                       type, 
-                                       factory, 
-                                       options)); 
-        } else { 
-          throw NotImplementedYet("Decimal precision more than 38 is not " 
-                                    "supported"); 
-        } 
-      case LIST: 
-        return std::unique_ptr<ColumnWriter>( 
-          new ListColumnWriter( 
-                               type, 
-                               factory, 
-                               options)); 
-      case MAP: 
-        return std::unique_ptr<ColumnWriter>( 
-          new MapColumnWriter( 
-                              type, 
-                              factory, 
-                              options)); 
-      case UNION: 
-        return std::unique_ptr<ColumnWriter>( 
-          new UnionColumnWriter( 
-                                type, 
-                                factory, 
-                                options)); 
-      default: 
-        throw NotImplementedYet("Type is not supported yet for creating " 
-                                  "ColumnWriter."); 
-    } 
-  } 
-} 
+          secs[i] += 1;
+        }
+
+        secs[i] -= timezone.getEpoch();
+        nanos[i] = formatNano(nanos[i]);
+      }
+    }
+    tsStats->increase(count);
+    if (count < numValues) {
+      tsStats->setHasNull(true);
+    }
+
+    secRleEncoder->add(secs, numValues, notNull);
+    nanoRleEncoder->add(nanos, numValues, notNull);
+  }
+
+  void TimestampColumnWriter::flush(std::vector<proto::Stream>& streams) {
+    ColumnWriter::flush(streams);
+
+    proto::Stream dataStream;
+    dataStream.set_kind(proto::Stream_Kind_DATA);
+    dataStream.set_column(static_cast<uint32_t>(columnId));
+    dataStream.set_length(secRleEncoder->flush());
+    streams.push_back(dataStream);
+
+    proto::Stream secondaryStream;
+    secondaryStream.set_kind(proto::Stream_Kind_SECONDARY);
+    secondaryStream.set_column(static_cast<uint32_t>(columnId));
+    secondaryStream.set_length(nanoRleEncoder->flush());
+    streams.push_back(secondaryStream);
+  }
+
+  uint64_t TimestampColumnWriter::getEstimatedSize() const {
+    uint64_t size = ColumnWriter::getEstimatedSize();
+    size += secRleEncoder->getBufferSize();
+    size += nanoRleEncoder->getBufferSize();
+    return size;
+  }
+
+  void TimestampColumnWriter::getColumnEncoding(
+    std::vector<proto::ColumnEncoding>& encodings) const {
+    proto::ColumnEncoding encoding;
+    encoding.set_kind(RleVersionMapper(rleVersion));
+    encoding.set_dictionarysize(0);
+    if (enableBloomFilter) {
+      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+    }
+    encodings.push_back(encoding);
+  }
+
+  void TimestampColumnWriter::recordPosition() const {
+    ColumnWriter::recordPosition();
+    secRleEncoder->recordPosition(rowIndexPosition.get());
+    nanoRleEncoder->recordPosition(rowIndexPosition.get());
+  }
+
+  class DateColumnWriter : public IntegerColumnWriter {
+  public:
+    DateColumnWriter(const Type& type,
+                     const StreamsFactory& factory,
+                     const WriterOptions& options);
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+  };
+
+  DateColumnWriter::DateColumnWriter(
+                        const Type &type,
+                        const StreamsFactory &factory,
+                        const WriterOptions &options) :
+                            IntegerColumnWriter(type, factory, options) {
+    // PASS
+  }
+
+  void DateColumnWriter::add(ColumnVectorBatch& rowBatch,
+                             uint64_t offset,
+                             uint64_t numValues,
+                             const char* incomingMask) {
+    const LongVectorBatch* longBatch =
+      dynamic_cast<const LongVectorBatch*>(&rowBatch);
+    if (longBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to LongVectorBatch");
+    }
+
+    DateColumnStatisticsImpl* dateStats =
+        dynamic_cast<DateColumnStatisticsImpl*>(colIndexStatistics.get());
+    if (dateStats == nullptr) {
+      throw InvalidArgument("Failed to cast to DateColumnStatisticsImpl");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    const int64_t* data = longBatch->data.data() + offset;
+    const char* notNull = longBatch->hasNulls ?
+                          longBatch->notNull.data() + offset : nullptr;
+
+    rleEncoder->add(data, numValues, notNull);
+
+    uint64_t count = 0;
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!notNull || notNull[i]) {
+        ++count;
+        dateStats->update(static_cast<int32_t>(data[i]));
+        if (enableBloomFilter) {
+          bloomFilter->addLong(data[i]);
+        }
+      }
+    }
+    dateStats->increase(count);
+    if (count < numValues) {
+      dateStats->setHasNull(true);
+    }
+  }
+
+  class Decimal64ColumnWriter : public ColumnWriter {
+  public:
+    static const uint32_t MAX_PRECISION_64 = 18;
+    static const uint32_t MAX_PRECISION_128 = 38;
+
+    Decimal64ColumnWriter(const Type& type,
+                          const StreamsFactory& factory,
+                          const WriterOptions& options);
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+    virtual void flush(std::vector<proto::Stream>& streams) override;
+
+    virtual uint64_t getEstimatedSize() const override;
+
+    virtual void getColumnEncoding(
+        std::vector<proto::ColumnEncoding>& encodings) const override;
+
+    virtual void recordPosition() const override;
+
+  protected:
+    RleVersion rleVersion;
+    uint64_t precision;
+    uint64_t scale;
+    std::unique_ptr<AppendOnlyBufferedStream> valueStream;
+    std::unique_ptr<RleEncoder> scaleEncoder;
+
+  private:
+    char buffer[10];
+  };
+
+  Decimal64ColumnWriter::Decimal64ColumnWriter(
+                             const Type& type,
+                             const StreamsFactory& factory,
+                             const WriterOptions& options) :
+                                 ColumnWriter(type, factory, options),
+                                 rleVersion(options.getRleVersion()),
+                                 precision(type.getPrecision()),
+                                 scale(type.getScale()) {
+    valueStream.reset(new AppendOnlyBufferedStream(
+        factory.createStream(proto::Stream_Kind_DATA)));
+    std::unique_ptr<BufferedOutputStream> scaleStream =
+        factory.createStream(proto::Stream_Kind_SECONDARY);
+    scaleEncoder = createRleEncoder(std::move(scaleStream),
+                                    true,
+                                    rleVersion,
+                                    memPool,
+                                    options.getAlignedBitpacking());
+
+    if (enableIndex) {
+      recordPosition();
+    }
+  }
+
+  void Decimal64ColumnWriter::add(ColumnVectorBatch& rowBatch,
+                                  uint64_t offset,
+                                  uint64_t numValues,
+                                  const char* incomingMask) {
+    const Decimal64VectorBatch* decBatch =
+      dynamic_cast<const Decimal64VectorBatch*>(&rowBatch);
+    if (decBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to Decimal64VectorBatch");
+    }
+
+    DecimalColumnStatisticsImpl* decStats =
+      dynamic_cast<DecimalColumnStatisticsImpl*>(colIndexStatistics.get());
+    if (decStats == nullptr) {
+      throw InvalidArgument("Failed to cast to DecimalColumnStatisticsImpl");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    const char* notNull = decBatch->hasNulls ?
+                          decBatch->notNull.data() + offset : nullptr;
+    const int64_t* values = decBatch->values.data() + offset;
+
+    uint64_t count = 0;
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!notNull || notNull[i]) {
+        int64_t val = zigZag(values[i]);
+        char* data = buffer;
+        while (true) {
+          if ((val & ~0x7f) == 0) {
+            *(data++) = (static_cast<char>(val));
+            break;
+          } else {
+            *(data++) = static_cast<char>(0x80 | (val & 0x7f));
+            // cast val to unsigned so as to force 0-fill right shift
+            val = (static_cast<uint64_t>(val) >> 7);
+          }
+        }
+        valueStream->write(buffer, static_cast<size_t>(data - buffer));
+        ++count;
+        if (enableBloomFilter) {
+          std::string decimal = Decimal(
+            values[i], static_cast<int32_t>(scale)).toString();
+          bloomFilter->addBytes(
+            decimal.c_str(), static_cast<int64_t>(decimal.size()));
+        }
+        decStats->update(Decimal(values[i], static_cast<int32_t>(scale)));
+      }
+    }
+    decStats->increase(count);
+    if (count < numValues) {
+      decStats->setHasNull(true);
+    }
+    std::vector<int64_t> scales(numValues, static_cast<int64_t>(scale));
+    scaleEncoder->add(scales.data(), numValues, notNull);
+  }
+
+  void Decimal64ColumnWriter::flush(std::vector<proto::Stream>& streams) {
+    ColumnWriter::flush(streams);
+
+    proto::Stream dataStream;
+    dataStream.set_kind(proto::Stream_Kind_DATA);
+    dataStream.set_column(static_cast<uint32_t>(columnId));
+    dataStream.set_length(valueStream->flush());
+    streams.push_back(dataStream);
+
+    proto::Stream secondaryStream;
+    secondaryStream.set_kind(proto::Stream_Kind_SECONDARY);
+    secondaryStream.set_column(static_cast<uint32_t>(columnId));
+    secondaryStream.set_length(scaleEncoder->flush());
+    streams.push_back(secondaryStream);
+  }
+
+  uint64_t Decimal64ColumnWriter::getEstimatedSize() const {
+    uint64_t size = ColumnWriter::getEstimatedSize();
+    size += valueStream->getSize();
+    size += scaleEncoder->getBufferSize();
+    return size;
+  }
+
+  void Decimal64ColumnWriter::getColumnEncoding(
+    std::vector<proto::ColumnEncoding>& encodings) const {
+    proto::ColumnEncoding encoding;
+    encoding.set_kind(RleVersionMapper(rleVersion));
+    encoding.set_dictionarysize(0);
+    if (enableBloomFilter) {
+      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+    }
+    encodings.push_back(encoding);
+  }
+
+  void Decimal64ColumnWriter::recordPosition() const {
+    ColumnWriter::recordPosition();
+    valueStream->recordPosition(rowIndexPosition.get());
+    scaleEncoder->recordPosition(rowIndexPosition.get());
+  }
+
+  class Decimal128ColumnWriter : public Decimal64ColumnWriter {
+  public:
+    Decimal128ColumnWriter(const Type& type,
+                           const StreamsFactory& factory,
+                           const WriterOptions& options);
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+  private:
+    char buffer[20];
+  };
+
+  Decimal128ColumnWriter::Decimal128ColumnWriter(
+                              const Type& type,
+                              const StreamsFactory& factory,
+                              const WriterOptions& options) :
+                                Decimal64ColumnWriter(type, factory, options) {
+    // PASS
+  }
+
+  // Zigzag encoding moves the sign bit to the least significant bit using the
+  // expression (val « 1) ^ (val » 63) and derives its name from the fact that
+  // positive and negative numbers alternate once encoded.
+  Int128 zigZagInt128(const Int128& value) {
+    bool isNegative = value < 0;
+    Int128 val = value.abs();
+    val <<= 1;
+    if (isNegative) {
+      val -= 1;
+    }
+    return val;
+  }
+
+  void Decimal128ColumnWriter::add(ColumnVectorBatch& rowBatch,
+                                   uint64_t offset,
+                                   uint64_t numValues,
+                                   const char* incomingMask) {
+    const Decimal128VectorBatch* decBatch =
+      dynamic_cast<const Decimal128VectorBatch*>(&rowBatch);
+    if (decBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to Decimal128VectorBatch");
+    }
+
+    DecimalColumnStatisticsImpl* decStats =
+      dynamic_cast<DecimalColumnStatisticsImpl*>(colIndexStatistics.get());
+    if (decStats == nullptr) {
+      throw InvalidArgument("Failed to cast to DecimalColumnStatisticsImpl");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    const char* notNull = decBatch->hasNulls ?
+                          decBatch->notNull.data() + offset : nullptr;
+    const Int128* values = decBatch->values.data() + offset;
+
+    // The current encoding of decimal columns stores the integer representation
+    // of the value as an unbounded length zigzag encoded base 128 varint.
+    uint64_t count = 0;
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!notNull || notNull[i]) {
+        Int128 val = zigZagInt128(values[i]);
+        char* data = buffer;
+        while (true) {
+          if ((val & ~0x7f) == 0) {
+            *(data++) = (static_cast<char>(val.getLowBits()));
+            break;
+          } else {
+            *(data++) = static_cast<char>(0x80 | (val.getLowBits() & 0x7f));
+            val >>= 7;
+          }
+        }
+        valueStream->write(buffer, static_cast<size_t>(data - buffer));
+
+        ++count;
+        if (enableBloomFilter) {
+          std::string decimal = Decimal(
+            values[i], static_cast<int32_t>(scale)).toString();
+          bloomFilter->addBytes(
+            decimal.c_str(), static_cast<int64_t>(decimal.size()));
+        }
+        decStats->update(Decimal(values[i], static_cast<int32_t>(scale)));
+      }
+    }
+    decStats->increase(count);
+    if (count < numValues) {
+      decStats->setHasNull(true);
+    }
+    std::vector<int64_t> scales(numValues, static_cast<int64_t>(scale));
+    scaleEncoder->add(scales.data(), numValues, notNull);
+  }
+
+  class ListColumnWriter : public ColumnWriter {
+  public:
+    ListColumnWriter(const Type& type,
+                     const StreamsFactory& factory,
+                     const WriterOptions& options);
+    ~ListColumnWriter() override;
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+    virtual void flush(std::vector<proto::Stream>& streams) override;
+
+    virtual uint64_t getEstimatedSize() const override;
+
+    virtual void getColumnEncoding(
+      std::vector<proto::ColumnEncoding>& encodings) const override;
+
+    virtual void getStripeStatistics(
+      std::vector<proto::ColumnStatistics>& stats) const override;
+
+    virtual void getFileStatistics(
+      std::vector<proto::ColumnStatistics>& stats) const override;
+
+    virtual void mergeStripeStatsIntoFileStats() override;
+
+    virtual void mergeRowGroupStatsIntoStripeStats() override;
+
+    virtual void createRowIndexEntry() override;
+
+    virtual void writeIndex(
+      std::vector<proto::Stream> &streams) const override;
+
+    virtual void recordPosition() const override;
+
+    virtual void writeDictionary() override;
+
+    virtual void reset() override;
+
+  private:
+    std::unique_ptr<RleEncoder> lengthEncoder;
+    RleVersion rleVersion;
+    std::unique_ptr<ColumnWriter> child;
+  };
+
+  ListColumnWriter::ListColumnWriter(const Type& type,
+                                     const StreamsFactory& factory,
+                                     const WriterOptions& options) :
+                                       ColumnWriter(type, factory, options),
+                                       rleVersion(options.getRleVersion()){
+
+    std::unique_ptr<BufferedOutputStream> lengthStream =
+      factory.createStream(proto::Stream_Kind_LENGTH);
+    lengthEncoder = createRleEncoder(std::move(lengthStream),
+                                     false,
+                                     rleVersion,
+                                     memPool,
+                                     options.getAlignedBitpacking());
+
+    if (type.getSubtypeCount() == 1) {
+      child = buildWriter(*type.getSubtype(0), factory, options);
+    }
+
+    if (enableIndex) {
+      recordPosition();
+    }
+  }
+
+  ListColumnWriter::~ListColumnWriter() {
+    // PASS
+  }
+
+  void ListColumnWriter::add(ColumnVectorBatch& rowBatch,
+                             uint64_t offset,
+                             uint64_t numValues,
+                             const char* incomingMask) {
+    ListVectorBatch* listBatch = dynamic_cast<ListVectorBatch*>(&rowBatch);
+    if (listBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to ListVectorBatch");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    int64_t* offsets = listBatch->offsets.data() + offset;
+    const char* notNull = listBatch->hasNulls ?
+                          listBatch->notNull.data() + offset : nullptr;
+
+    uint64_t elemOffset = static_cast<uint64_t>(offsets[0]);
+    uint64_t totalNumValues = static_cast<uint64_t>(offsets[numValues] - offsets[0]);
+
+    // translate offsets to lengths
+    for (uint64_t i = 0; i != numValues; ++i) {
+      offsets[i] = offsets[i + 1] - offsets[i];
+    }
+
+    // unnecessary to deal with null as elements are packed together
+    if (child.get()) {
+      child->add(*listBatch->elements, elemOffset, totalNumValues, nullptr);
+    }
+    lengthEncoder->add(offsets, numValues, notNull);
+
+    if (enableIndex) {
+      if (!notNull) {
+        colIndexStatistics->increase(numValues);
+      } else {
+        uint64_t count = 0;
+        for (uint64_t i = 0; i < numValues; ++i) {
+          if (notNull[i]) {
+            ++count;
+            if (enableBloomFilter) {
+              bloomFilter->addLong(offsets[i]);
+            }
+          }
+        }
+        colIndexStatistics->increase(count);
+        if (count < numValues) {
+          colIndexStatistics->setHasNull(true);
+        }
+      }
+    }
+  }
+
+  void ListColumnWriter::flush(std::vector<proto::Stream>& streams) {
+    ColumnWriter::flush(streams);
+
+    proto::Stream stream;
+    stream.set_kind(proto::Stream_Kind_LENGTH);
+    stream.set_column(static_cast<uint32_t>(columnId));
+    stream.set_length(lengthEncoder->flush());
+    streams.push_back(stream);
+
+    if (child.get()) {
+      child->flush(streams);
+    }
+  }
+
+  void ListColumnWriter::writeIndex(std::vector<proto::Stream> &streams) const {
+    ColumnWriter::writeIndex(streams);
+    if (child.get()) {
+      child->writeIndex(streams);
+    }
+  }
+
+  uint64_t ListColumnWriter::getEstimatedSize() const {
+    uint64_t size = ColumnWriter::getEstimatedSize();
+    if (child.get()) {
+      size += lengthEncoder->getBufferSize();
+      size += child->getEstimatedSize();
+    }
+    return size;
+  }
+
+  void ListColumnWriter::getColumnEncoding(
+                    std::vector<proto::ColumnEncoding>& encodings) const {
+    proto::ColumnEncoding encoding;
+    encoding.set_kind(RleVersionMapper(rleVersion));
+    encoding.set_dictionarysize(0);
+    if (enableBloomFilter) {
+      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+    }
+    encodings.push_back(encoding);
+    if (child.get()) {
+      child->getColumnEncoding(encodings);
+    }
+  }
+
+  void ListColumnWriter::getStripeStatistics(
+                    std::vector<proto::ColumnStatistics>& stats) const {
+    ColumnWriter::getStripeStatistics(stats);
+    if (child.get()) {
+      child->getStripeStatistics(stats);
+    }
+  }
+
+  void ListColumnWriter::mergeStripeStatsIntoFileStats() {
+    ColumnWriter::mergeStripeStatsIntoFileStats();
+    if (child.get()) {
+      child->mergeStripeStatsIntoFileStats();
+    }
+  }
+
+  void ListColumnWriter::getFileStatistics(
+                    std::vector<proto::ColumnStatistics>& stats) const {
+    ColumnWriter::getFileStatistics(stats);
+    if (child.get()) {
+      child->getFileStatistics(stats);
+    }
+  }
+
+  void ListColumnWriter::mergeRowGroupStatsIntoStripeStats()  {
+    ColumnWriter::mergeRowGroupStatsIntoStripeStats();
+    if (child.get()) {
+      child->mergeRowGroupStatsIntoStripeStats();
+    }
+  }
+
+  void ListColumnWriter::createRowIndexEntry() {
+    ColumnWriter::createRowIndexEntry();
+    if (child.get()) {
+      child->createRowIndexEntry();
+    }
+  }
+
+  void ListColumnWriter::recordPosition() const {
+    ColumnWriter::recordPosition();
+    lengthEncoder->recordPosition(rowIndexPosition.get());
+  }
+
+  void ListColumnWriter::reset() {
+    ColumnWriter::reset();
+    if (child) {
+      child->reset();
+    }
+  }
+
+  void ListColumnWriter::writeDictionary() {
+    if (child) {
+      child->writeDictionary();
+    }
+  }
+
+  class MapColumnWriter : public ColumnWriter {
+  public:
+    MapColumnWriter(const Type& type,
+                    const StreamsFactory& factory,
+                    const WriterOptions& options);
+    ~MapColumnWriter() override;
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+    virtual void flush(std::vector<proto::Stream>& streams) override;
+
+    virtual uint64_t getEstimatedSize() const override;
+
+    virtual void getColumnEncoding(
+      std::vector<proto::ColumnEncoding>& encodings) const override;
+
+    virtual void getStripeStatistics(
+      std::vector<proto::ColumnStatistics>& stats) const override;
+
+    virtual void getFileStatistics(
+      std::vector<proto::ColumnStatistics>& stats) const override;
+
+    virtual void mergeStripeStatsIntoFileStats() override;
+
+    virtual void mergeRowGroupStatsIntoStripeStats() override;
+
+    virtual void createRowIndexEntry() override;
+
+    virtual void writeIndex(
+      std::vector<proto::Stream> &streams) const override;
+
+    virtual void recordPosition() const override;
+
+    virtual void writeDictionary() override;
+
+    virtual void reset() override;
+
+  private:
+    std::unique_ptr<ColumnWriter> keyWriter;
+    std::unique_ptr<ColumnWriter> elemWriter;
+    std::unique_ptr<RleEncoder> lengthEncoder;
+    RleVersion rleVersion;
+  };
+
+  MapColumnWriter::MapColumnWriter(const Type& type,
+                                   const StreamsFactory& factory,
+                                   const WriterOptions& options) :
+                                     ColumnWriter(type, factory, options),
+                                     rleVersion(options.getRleVersion()){
+    std::unique_ptr<BufferedOutputStream> lengthStream =
+      factory.createStream(proto::Stream_Kind_LENGTH);
+    lengthEncoder = createRleEncoder(std::move(lengthStream),
+                                     false,
+                                     rleVersion,
+                                     memPool,
+                                     options.getAlignedBitpacking());
+
+    if (type.getSubtypeCount() > 0) {
+      keyWriter = buildWriter(*type.getSubtype(0), factory, options);
+    }
+
+    if (type.getSubtypeCount() > 1) {
+      elemWriter = buildWriter(*type.getSubtype(1), factory, options);
+    }
+
+    if (enableIndex) {
+      recordPosition();
+    }
+  }
+
+  MapColumnWriter::~MapColumnWriter() {
+    // PASS
+  }
+
+  void MapColumnWriter::add(ColumnVectorBatch& rowBatch,
+                            uint64_t offset,
+                            uint64_t numValues,
+                            const char* incomingMask) {
+    MapVectorBatch* mapBatch = dynamic_cast<MapVectorBatch*>(&rowBatch);
+    if (mapBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to MapVectorBatch");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    int64_t* offsets = mapBatch->offsets.data() + offset;
+    const char* notNull = mapBatch->hasNulls ?
+                          mapBatch->notNull.data() + offset : nullptr;
+
+    uint64_t elemOffset = static_cast<uint64_t>(offsets[0]);
+    uint64_t totalNumValues = static_cast<uint64_t>(offsets[numValues] - offsets[0]);
+
+    // translate offsets to lengths
+    for (uint64_t i = 0; i != numValues; ++i) {
+      offsets[i] = offsets[i + 1] - offsets[i];
+    }
+
+    lengthEncoder->add(offsets, numValues, notNull);
+
+    // unnecessary to deal with null as keys and values are packed together
+    if (keyWriter.get()) {
+      keyWriter->add(*mapBatch->keys, elemOffset, totalNumValues, nullptr);
+    }
+    if (elemWriter.get()) {
+      elemWriter->add(*mapBatch->elements, elemOffset, totalNumValues, nullptr);
+    }
+
+    if (enableIndex) {
+      if (!notNull) {
+        colIndexStatistics->increase(numValues);
+      } else {
+        uint64_t count = 0;
+        for (uint64_t i = 0; i < numValues; ++i) {
+          if (notNull[i]) {
+            ++count;
+            if (enableBloomFilter) {
+              bloomFilter->addLong(offsets[i]);
+            }
+          }
+        }
+        colIndexStatistics->increase(count);
+        if (count < numValues) {
+          colIndexStatistics->setHasNull(true);
+        }
+      }
+    }
+  }
+
+  void MapColumnWriter::flush(std::vector<proto::Stream>& streams) {
+    ColumnWriter::flush(streams);
+
+    proto::Stream stream;
+    stream.set_kind(proto::Stream_Kind_LENGTH);
+    stream.set_column(static_cast<uint32_t>(columnId));
+    stream.set_length(lengthEncoder->flush());
+    streams.push_back(stream);
+
+    if (keyWriter.get()) {
+      keyWriter->flush(streams);
+    }
+    if (elemWriter.get()) {
+      elemWriter->flush(streams);
+    }
+  }
+
+  void MapColumnWriter::writeIndex(
+    std::vector<proto::Stream> &streams) const {
+    ColumnWriter::writeIndex(streams);
+    if (keyWriter.get()) {
+      keyWriter->writeIndex(streams);
+    }
+    if (elemWriter.get()) {
+      elemWriter->writeIndex(streams);
+    }
+  }
+
+  uint64_t MapColumnWriter::getEstimatedSize() const {
+    uint64_t size = ColumnWriter::getEstimatedSize();
+    size += lengthEncoder->getBufferSize();
+    if (keyWriter.get()) {
+      size += keyWriter->getEstimatedSize();
+    }
+    if (elemWriter.get()) {
+      size += elemWriter->getEstimatedSize();
+    }
+    return size;
+  }
+
+  void MapColumnWriter::getColumnEncoding(
+                   std::vector<proto::ColumnEncoding>& encodings) const {
+    proto::ColumnEncoding encoding;
+    encoding.set_kind(RleVersionMapper(rleVersion));
+    encoding.set_dictionarysize(0);
+    if (enableBloomFilter) {
+      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+    }
+    encodings.push_back(encoding);
+    if (keyWriter.get()) {
+      keyWriter->getColumnEncoding(encodings);
+    }
+    if (elemWriter.get()) {
+      elemWriter->getColumnEncoding(encodings);
+    }
+  }
+
+  void MapColumnWriter::getStripeStatistics(
+                   std::vector<proto::ColumnStatistics>& stats) const {
+    ColumnWriter::getStripeStatistics(stats);
+    if (keyWriter.get()) {
+      keyWriter->getStripeStatistics(stats);
+    }
+    if (elemWriter.get()) {
+      elemWriter->getStripeStatistics(stats);
+    }
+  }
+
+  void MapColumnWriter::mergeStripeStatsIntoFileStats() {
+    ColumnWriter::mergeStripeStatsIntoFileStats();
+    if (keyWriter.get()) {
+      keyWriter->mergeStripeStatsIntoFileStats();
+    }
+    if (elemWriter.get()) {
+      elemWriter->mergeStripeStatsIntoFileStats();
+    }
+  }
+
+  void MapColumnWriter::getFileStatistics(
+                   std::vector<proto::ColumnStatistics>& stats) const {
+    ColumnWriter::getFileStatistics(stats);
+    if (keyWriter.get()) {
+      keyWriter->getFileStatistics(stats);
+    }
+    if (elemWriter.get()) {
+      elemWriter->getFileStatistics(stats);
+    }
+  }
+
+  void MapColumnWriter::mergeRowGroupStatsIntoStripeStats()  {
+    ColumnWriter::mergeRowGroupStatsIntoStripeStats();
+    if (keyWriter.get()) {
+      keyWriter->mergeRowGroupStatsIntoStripeStats();
+    }
+    if (elemWriter.get()) {
+      elemWriter->mergeRowGroupStatsIntoStripeStats();
+    }
+  }
+
+  void MapColumnWriter::createRowIndexEntry() {
+    ColumnWriter::createRowIndexEntry();
+    if (keyWriter.get()) {
+      keyWriter->createRowIndexEntry();
+    }
+    if (elemWriter.get()) {
+      elemWriter->createRowIndexEntry();
+    }
+  }
+
+  void MapColumnWriter::recordPosition() const {
+    ColumnWriter::recordPosition();
+    lengthEncoder->recordPosition(rowIndexPosition.get());
+  }
+
+  void MapColumnWriter::reset() {
+    ColumnWriter::reset();
+    if (keyWriter) {
+      keyWriter->reset();
+    }
+    if (elemWriter) {
+      elemWriter->reset();
+    }
+  }
+
+  void MapColumnWriter::writeDictionary() {
+    if (keyWriter) {
+      keyWriter->writeDictionary();
+    }
+    if (elemWriter) {
+      elemWriter->writeDictionary();
+    }
+  }
+
+  class UnionColumnWriter : public ColumnWriter {
+  public:
+    UnionColumnWriter(const Type& type,
+                      const StreamsFactory& factory,
+                      const WriterOptions& options);
+    ~UnionColumnWriter() override;
+
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char* incomingMask) override;
+
+    virtual void flush(std::vector<proto::Stream>& streams) override;
+
+    virtual uint64_t getEstimatedSize() const override;
+
+    virtual void getColumnEncoding(
+      std::vector<proto::ColumnEncoding>& encodings) const override;
+
+    virtual void getStripeStatistics(
+      std::vector<proto::ColumnStatistics>& stats) const override;
+
+    virtual void getFileStatistics(
+      std::vector<proto::ColumnStatistics>& stats) const override;
+
+    virtual void mergeStripeStatsIntoFileStats() override;
+
+    virtual void mergeRowGroupStatsIntoStripeStats() override;
+
+    virtual void createRowIndexEntry() override;
+
+    virtual void writeIndex(
+      std::vector<proto::Stream> &streams) const override;
+
+    virtual void recordPosition() const override;
+
+    virtual void writeDictionary() override;
+
+    virtual void reset() override;
+
+  private:
+    std::unique_ptr<ByteRleEncoder> rleEncoder;
+    std::vector<ColumnWriter*> children;
+  };
+
+  UnionColumnWriter::UnionColumnWriter(const Type& type,
+                                       const StreamsFactory& factory,
+                                       const WriterOptions& options) :
+    ColumnWriter(type, factory, options) {
+
+    std::unique_ptr<BufferedOutputStream> dataStream =
+      factory.createStream(proto::Stream_Kind_DATA);
+    rleEncoder = createByteRleEncoder(std::move(dataStream));
+
+    for (uint64_t i = 0; i != type.getSubtypeCount(); ++i) {
+      children.push_back(buildWriter(*type.getSubtype(i),
+                                     factory,
+                                     options).release());
+    }
+
+    if (enableIndex) {
+      recordPosition();
+    }
+  }
+
+  UnionColumnWriter::~UnionColumnWriter() {
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      delete children[i];
+    }
+  }
+
+  void UnionColumnWriter::add(ColumnVectorBatch& rowBatch,
+                              uint64_t offset,
+                              uint64_t numValues,
+                              const char* incomingMask) {
+    UnionVectorBatch* unionBatch = dynamic_cast<UnionVectorBatch*>(&rowBatch);
+    if (unionBatch == nullptr) {
+      throw InvalidArgument("Failed to cast to UnionVectorBatch");
+    }
+
+    ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+    const char* notNull = unionBatch->hasNulls ?
+                          unionBatch->notNull.data() + offset : nullptr;
+    unsigned char * tags = unionBatch->tags.data() + offset;
+    uint64_t * offsets = unionBatch->offsets.data() + offset;
+
+    std::vector<int64_t> childOffset(children.size(), -1);
+    std::vector<uint64_t> childLength(children.size(), 0);
+
+    for (uint64_t i = 0; i != numValues; ++i) {
+      if (childOffset[tags[i]] == -1) {
+        childOffset[tags[i]] = static_cast<int64_t>(offsets[i]);
+      }
+      ++childLength[tags[i]];
+    }
+
+    rleEncoder->add(reinterpret_cast<char*>(tags), numValues, notNull);
+
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      if (childLength[i] > 0) {
+        children[i]->add(*unionBatch->children[i],
+                         static_cast<uint64_t>(childOffset[i]),
+                         childLength[i], nullptr);
+      }
+    }
+
+    // update stats
+    if (enableIndex) {
+      if (!notNull) {
+        colIndexStatistics->increase(numValues);
+      } else {
+        uint64_t count = 0;
+        for (uint64_t i = 0; i < numValues; ++i) {
+          if (notNull[i]) {
+            ++count;
+            if (enableBloomFilter) {
+              bloomFilter->addLong(tags[i]);
+            }
+          }
+        }
+        colIndexStatistics->increase(count);
+        if (count < numValues) {
+          colIndexStatistics->setHasNull(true);
+        }
+      }
+    }
+  }
+
+  void UnionColumnWriter::flush(std::vector<proto::Stream>& streams) {
+    ColumnWriter::flush(streams);
+
+    proto::Stream stream;
+    stream.set_kind(proto::Stream_Kind_DATA);
+    stream.set_column(static_cast<uint32_t>(columnId));
+    stream.set_length(rleEncoder->flush());
+    streams.push_back(stream);
+
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->flush(streams);
+    }
+  }
+
+  void UnionColumnWriter::writeIndex(std::vector<proto::Stream> &streams) const {
+    ColumnWriter::writeIndex(streams);
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->writeIndex(streams);
+    }
+  }
+
+  uint64_t UnionColumnWriter::getEstimatedSize() const {
+    uint64_t size = ColumnWriter::getEstimatedSize();
+    size += rleEncoder->getBufferSize();
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      size += children[i]->getEstimatedSize();
+    }
+    return size;
+  }
+
+  void UnionColumnWriter::getColumnEncoding(
+                     std::vector<proto::ColumnEncoding>& encodings) const {
+    proto::ColumnEncoding encoding;
+    encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
+    encoding.set_dictionarysize(0);
+    if (enableBloomFilter) {
+      encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+    }
+    encodings.push_back(encoding);
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->getColumnEncoding(encodings);
+    }
+  }
+
+  void UnionColumnWriter::getStripeStatistics(
+                     std::vector<proto::ColumnStatistics>& stats) const {
+    ColumnWriter::getStripeStatistics(stats);
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->getStripeStatistics(stats);
+    }
+  }
+
+  void UnionColumnWriter::mergeStripeStatsIntoFileStats() {
+    ColumnWriter::mergeStripeStatsIntoFileStats();
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->mergeStripeStatsIntoFileStats();
+    }
+  }
+
+  void UnionColumnWriter::getFileStatistics(
+                     std::vector<proto::ColumnStatistics>& stats) const {
+    ColumnWriter::getFileStatistics(stats);
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->getFileStatistics(stats);
+    }
+  }
+
+  void UnionColumnWriter::mergeRowGroupStatsIntoStripeStats()  {
+    ColumnWriter::mergeRowGroupStatsIntoStripeStats();
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->mergeRowGroupStatsIntoStripeStats();
+    }
+  }
+
+  void UnionColumnWriter::createRowIndexEntry() {
+    ColumnWriter::createRowIndexEntry();
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->createRowIndexEntry();
+    }
+  }
+
+  void UnionColumnWriter::recordPosition() const {
+    ColumnWriter::recordPosition();
+    rleEncoder->recordPosition(rowIndexPosition.get());
+  }
+
+  void UnionColumnWriter::reset() {
+    ColumnWriter::reset();
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->reset();
+    }
+  }
+
+  void UnionColumnWriter::writeDictionary() {
+    for (uint32_t i = 0; i < children.size(); ++i) {
+      children[i]->writeDictionary();
+    }
+  }
+
+  std::unique_ptr<ColumnWriter> buildWriter(
+                                            const Type& type,
+                                            const StreamsFactory& factory,
+                                            const WriterOptions& options) {
+    switch (static_cast<int64_t>(type.getKind())) {
+      case STRUCT:
+        return std::unique_ptr<ColumnWriter>(
+          new StructColumnWriter(
+                                 type,
+                                 factory,
+                                 options));
+      case INT:
+      case LONG:
+      case SHORT:
+        return std::unique_ptr<ColumnWriter>(
+          new IntegerColumnWriter(
+                                  type,
+                                  factory,
+                                  options));
+      case BYTE:
+        return std::unique_ptr<ColumnWriter>(
+          new ByteColumnWriter(
+                               type,
+                               factory,
+                               options));
+      case BOOLEAN:
+        return std::unique_ptr<ColumnWriter>(
+          new BooleanColumnWriter(
+                                  type,
+                                  factory,
+                                  options));
+      case DOUBLE:
+        return std::unique_ptr<ColumnWriter>(
+          new DoubleColumnWriter(
+                                 type,
+                                 factory,
+                                 options,
+                                 false));
+      case FLOAT:
+        return std::unique_ptr<ColumnWriter>(
+          new DoubleColumnWriter(
+                                 type,
+                                 factory,
+                                 options,
+                                 true));
+      case BINARY:
+        return std::unique_ptr<ColumnWriter>(
+          new BinaryColumnWriter(
+                                 type,
+                                 factory,
+                                 options));
+      case STRING:
+        return std::unique_ptr<ColumnWriter>(
+          new StringColumnWriter(
+                                 type,
+                                 factory,
+                                 options));
+      case CHAR:
+        return std::unique_ptr<ColumnWriter>(
+          new CharColumnWriter(
+                               type,
+                               factory,
+                               options));
+      case VARCHAR:
+        return std::unique_ptr<ColumnWriter>(
+          new VarCharColumnWriter(
+                                  type,
+                                  factory,
+                                  options));
+      case DATE:
+        return std::unique_ptr<ColumnWriter>(
+          new DateColumnWriter(
+                               type,
+                               factory,
+                               options));
+      case TIMESTAMP:
+        return std::unique_ptr<ColumnWriter>(
+          new TimestampColumnWriter(
+                                    type,
+                                    factory,
+                                    options));
+      case DECIMAL:
+        if (type.getPrecision() <= Decimal64ColumnWriter::MAX_PRECISION_64) {
+          return std::unique_ptr<ColumnWriter>(
+            new Decimal64ColumnWriter(
+                                      type,
+                                      factory,
+                                      options));
+        } else if (type.getPrecision() <= Decimal64ColumnWriter::MAX_PRECISION_128) {
+          return std::unique_ptr<ColumnWriter>(
+            new Decimal128ColumnWriter(
+                                       type,
+                                       factory,
+                                       options));
+        } else {
+          throw NotImplementedYet("Decimal precision more than 38 is not "
+                                    "supported");
+        }
+      case LIST:
+        return std::unique_ptr<ColumnWriter>(
+          new ListColumnWriter(
+                               type,
+                               factory,
+                               options));
+      case MAP:
+        return std::unique_ptr<ColumnWriter>(
+          new MapColumnWriter(
+                              type,
+                              factory,
+                              options));
+      case UNION:
+        return std::unique_ptr<ColumnWriter>(
+          new UnionColumnWriter(
+                                type,
+                                factory,
+                                options));
+      default:
+        throw NotImplementedYet("Type is not supported yet for creating "
+                                  "ColumnWriter.");
+    }
+  }
+}
diff --git a/contrib/libs/apache/orc/c++/src/ColumnWriter.hh b/contrib/libs/apache/orc/c++/src/ColumnWriter.hh
index 4d7d71cb37..cbbb5d00dc 100644
--- a/contrib/libs/apache/orc/c++/src/ColumnWriter.hh
+++ b/contrib/libs/apache/orc/c++/src/ColumnWriter.hh
@@ -1,221 +1,221 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_COLUMN_WRITER_HH 
-#define ORC_COLUMN_WRITER_HH 
- 
-#include "orc/Vector.hh" 
- 
-#include "BloomFilter.hh" 
-#include "ByteRLE.hh" 
-#include "Compression.hh" 
-#include "orc/Exceptions.hh" 
-#include "Statistics.hh" 
- 
-#include "wrap/orc-proto-wrapper.hh" 
- 
-namespace orc { 
- 
-  class StreamsFactory { 
-  public: 
-    virtual ~StreamsFactory(); 
- 
-    /** 
-     * Get the stream for the given column/kind in this stripe. 
-     * @param kind the kind of the stream 
-     * @return the buffered output stream 
-     */ 
-    virtual std::unique_ptr<BufferedOutputStream> 
-                    createStream(proto::Stream_Kind kind) const = 0; 
-  }; 
- 
-  std::unique_ptr<StreamsFactory> createStreamsFactory( 
-                                        const WriterOptions& options, 
-                                        OutputStream * outStream); 
- 
-  /** 
-   * record stream positions for row index 
-   */ 
-  class RowIndexPositionRecorder : public PositionRecorder { 
-  public: 
-    virtual ~RowIndexPositionRecorder() override; 
- 
-    RowIndexPositionRecorder(proto::RowIndexEntry& entry): 
-      rowIndexEntry(entry) {} 
- 
-    virtual void add(uint64_t pos) override { 
-      rowIndexEntry.add_positions(pos); 
-    } 
- 
-  private: 
-    proto::RowIndexEntry& rowIndexEntry; 
-  }; 
- 
-  /** 
-   * The interface for writing ORC data types. 
-   */ 
-  class ColumnWriter { 
-  protected: 
-    std::unique_ptr<ByteRleEncoder> notNullEncoder; 
-    uint64_t columnId; 
-    std::unique_ptr<MutableColumnStatistics> colIndexStatistics; 
-    std::unique_ptr<MutableColumnStatistics> colStripeStatistics; 
-    std::unique_ptr<MutableColumnStatistics> colFileStatistics; 
- 
-    bool enableIndex; 
-    // row index for this column, contains all RowIndexEntries in 1 stripe 
-    std::unique_ptr<proto::RowIndex> rowIndex; 
-    std::unique_ptr<proto::RowIndexEntry> rowIndexEntry; 
-    std::unique_ptr<RowIndexPositionRecorder> rowIndexPosition; 
- 
-    // bloom filters are recorded per row group 
-    bool enableBloomFilter; 
-    std::unique_ptr<BloomFilterImpl> bloomFilter; 
-    std::unique_ptr<proto::BloomFilterIndex> bloomFilterIndex; 
- 
-  public: 
-    ColumnWriter(const Type& type, const StreamsFactory& factory, 
-                 const WriterOptions& options); 
- 
-    virtual ~ColumnWriter(); 
- 
-    /** 
-     * Write the next group of values from this rowBatch. 
-     * @param rowBatch the row batch data to write 
-     * @param offset the starting point of row batch to write 
-     * @param numValues the number of values to write 
-     * @param incomingMask if null, all values are not null. Otherwise, it is 
-     *                     a mask (with at least numValues bytes) for which 
-     *                     values to write. 
-     */ 
-    virtual void add(ColumnVectorBatch& rowBatch, 
-                     uint64_t offset, 
-                     uint64_t numValues, 
-                     const char * incomingMask); 
-    /** 
-     * Flush column writer output streams. 
-     * @param streams vector to store streams generated by flush() 
-     */ 
-    virtual void flush(std::vector<proto::Stream>& streams); 
- 
-    /** 
-     * Get estimated size of buffer used. 
-     * @return estimated size of buffer used 
-     */ 
-    virtual uint64_t getEstimatedSize() const; 
- 
-    /** 
-     * Get the encoding used by the writer for this column. 
-     * @param encodings vector to store the returned ColumnEncoding info 
-     */ 
-    virtual void getColumnEncoding( 
-      std::vector<proto::ColumnEncoding>& encodings) const = 0; 
- 
-    /** 
-     * Get the stripe statistics for this column. 
-     * @param stats vector to store the returned stripe statistics 
-     */ 
-    virtual void getStripeStatistics( 
-      std::vector<proto::ColumnStatistics>& stats) const; 
- 
-    /** 
-     * Get the file statistics for this column. 
-     * @param stats vector to store the returned file statistics 
-     */ 
-    virtual void getFileStatistics( 
-      std::vector<proto::ColumnStatistics>& stats) const; 
- 
-    /** 
-     * Merge index stats into stripe stats and reset index stats. 
-     */ 
-    virtual void mergeRowGroupStatsIntoStripeStats(); 
- 
-    /** 
-     * Merge stripe stats into file stats and reset stripe stats. 
-     */ 
-    virtual void mergeStripeStatsIntoFileStats(); 
- 
-    /** 
-     * Create a row index entry with the previous location and the current 
-     * index statistics. Also merges the index statistics into the stripe 
-     * statistics before they are cleared. Finally, it records the start of the 
-     * next index and ensures all of the children columns also create an entry. 
-     */ 
-    virtual void createRowIndexEntry(); 
- 
-    /** 
-     * Create a new BloomFilter entry and add the previous one to BloomFilterIndex 
-     */ 
-    virtual void addBloomFilterEntry(); 
- 
-    /** 
-     * Write row index streams for this column. 
-     * @param streams output list of ROW_INDEX streams 
-     */ 
-    virtual void writeIndex(std::vector<proto::Stream> &streams) const; 
- 
-    /** 
-     * Record positions for index. 
-     * 
-     * This function is called by createRowIndexEntry() and ColumnWriter's 
-     * constructor. So base classes do not need to call inherited classes' 
-     * recordPosition() function. 
-     */ 
-    virtual void recordPosition() const; 
- 
-    /** 
-     * Reset positions for index. 
-     */ 
-    virtual void reset(); 
- 
-    /** 
-     * Write dictionary to streams for string columns 
-     */ 
-    virtual void writeDictionary(); 
- 
-  protected: 
-    /** 
-     * Utility function to translate ColumnStatistics into protobuf form and 
-     * add it to output list. 
-     * @param statsList output list for protobuf stats 
-     * @param stats ColumnStatistics to be transformed and added 
-     */ 
-     void getProtoBufStatistics( 
-                                std::vector<proto::ColumnStatistics>& statsList, 
-                                const MutableColumnStatistics* stats) const { 
-       proto::ColumnStatistics pbStats; 
-       stats->toProtoBuf(pbStats); 
-       statsList.push_back(pbStats); 
-     } 
- 
-  protected: 
-    MemoryPool& memPool; 
-    std::unique_ptr<BufferedOutputStream> indexStream; 
-    std::unique_ptr<BufferedOutputStream> bloomFilterStream; 
-  }; 
- 
-  /** 
-   * Create a writer for the given type. 
-   */ 
-  std::unique_ptr<ColumnWriter> buildWriter( 
-                                            const Type& type, 
-                                            const StreamsFactory& factory, 
-                                            const WriterOptions& options); 
-} 
- 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_COLUMN_WRITER_HH
+#define ORC_COLUMN_WRITER_HH
+
+#include "orc/Vector.hh"
+
+#include "BloomFilter.hh"
+#include "ByteRLE.hh"
+#include "Compression.hh"
+#include "orc/Exceptions.hh"
+#include "Statistics.hh"
+
+#include "wrap/orc-proto-wrapper.hh"
+
+namespace orc {
+
+  class StreamsFactory {
+  public:
+    virtual ~StreamsFactory();
+
+    /**
+     * Get the stream for the given column/kind in this stripe.
+     * @param kind the kind of the stream
+     * @return the buffered output stream
+     */
+    virtual std::unique_ptr<BufferedOutputStream>
+                    createStream(proto::Stream_Kind kind) const = 0;
+  };
+
+  std::unique_ptr<StreamsFactory> createStreamsFactory(
+                                        const WriterOptions& options,
+                                        OutputStream * outStream);
+
+  /**
+   * record stream positions for row index
+   */
+  class RowIndexPositionRecorder : public PositionRecorder {
+  public:
+    virtual ~RowIndexPositionRecorder() override;
+
+    RowIndexPositionRecorder(proto::RowIndexEntry& entry):
+      rowIndexEntry(entry) {}
+
+    virtual void add(uint64_t pos) override {
+      rowIndexEntry.add_positions(pos);
+    }
+
+  private:
+    proto::RowIndexEntry& rowIndexEntry;
+  };
+
+  /**
+   * The interface for writing ORC data types.
+   */
+  class ColumnWriter {
+  protected:
+    std::unique_ptr<ByteRleEncoder> notNullEncoder;
+    uint64_t columnId;
+    std::unique_ptr<MutableColumnStatistics> colIndexStatistics;
+    std::unique_ptr<MutableColumnStatistics> colStripeStatistics;
+    std::unique_ptr<MutableColumnStatistics> colFileStatistics;
+
+    bool enableIndex;
+    // row index for this column, contains all RowIndexEntries in 1 stripe
+    std::unique_ptr<proto::RowIndex> rowIndex;
+    std::unique_ptr<proto::RowIndexEntry> rowIndexEntry;
+    std::unique_ptr<RowIndexPositionRecorder> rowIndexPosition;
+
+    // bloom filters are recorded per row group
+    bool enableBloomFilter;
+    std::unique_ptr<BloomFilterImpl> bloomFilter;
+    std::unique_ptr<proto::BloomFilterIndex> bloomFilterIndex;
+
+  public:
+    ColumnWriter(const Type& type, const StreamsFactory& factory,
+                 const WriterOptions& options);
+
+    virtual ~ColumnWriter();
+
+    /**
+     * Write the next group of values from this rowBatch.
+     * @param rowBatch the row batch data to write
+     * @param offset the starting point of row batch to write
+     * @param numValues the number of values to write
+     * @param incomingMask if null, all values are not null. Otherwise, it is
+     *                     a mask (with at least numValues bytes) for which
+     *                     values to write.
+     */
+    virtual void add(ColumnVectorBatch& rowBatch,
+                     uint64_t offset,
+                     uint64_t numValues,
+                     const char * incomingMask);
+    /**
+     * Flush column writer output streams.
+     * @param streams vector to store streams generated by flush()
+     */
+    virtual void flush(std::vector<proto::Stream>& streams);
+
+    /**
+     * Get estimated size of buffer used.
+     * @return estimated size of buffer used
+     */
+    virtual uint64_t getEstimatedSize() const;
+
+    /**
+     * Get the encoding used by the writer for this column.
+     * @param encodings vector to store the returned ColumnEncoding info
+     */
+    virtual void getColumnEncoding(
+      std::vector<proto::ColumnEncoding>& encodings) const = 0;
+
+    /**
+     * Get the stripe statistics for this column.
+     * @param stats vector to store the returned stripe statistics
+     */
+    virtual void getStripeStatistics(
+      std::vector<proto::ColumnStatistics>& stats) const;
+
+    /**
+     * Get the file statistics for this column.
+     * @param stats vector to store the returned file statistics
+     */
+    virtual void getFileStatistics(
+      std::vector<proto::ColumnStatistics>& stats) const;
+
+    /**
+     * Merge index stats into stripe stats and reset index stats.
+     */
+    virtual void mergeRowGroupStatsIntoStripeStats();
+
+    /**
+     * Merge stripe stats into file stats and reset stripe stats.
+     */
+    virtual void mergeStripeStatsIntoFileStats();
+
+    /**
+     * Create a row index entry with the previous location and the current
+     * index statistics. Also merges the index statistics into the stripe
+     * statistics before they are cleared. Finally, it records the start of the
+     * next index and ensures all of the children columns also create an entry.
+     */
+    virtual void createRowIndexEntry();
+
+    /**
+     * Create a new BloomFilter entry and add the previous one to BloomFilterIndex
+     */
+    virtual void addBloomFilterEntry();
+
+    /**
+     * Write row index streams for this column.
+     * @param streams output list of ROW_INDEX streams
+     */
+    virtual void writeIndex(std::vector<proto::Stream> &streams) const;
+
+    /**
+     * Record positions for index.
+     *
+     * This function is called by createRowIndexEntry() and ColumnWriter's
+     * constructor. So base classes do not need to call inherited classes'
+     * recordPosition() function.
+     */
+    virtual void recordPosition() const;
+
+    /**
+     * Reset positions for index.
+     */
+    virtual void reset();
+
+    /**
+     * Write dictionary to streams for string columns
+     */
+    virtual void writeDictionary();
+
+  protected:
+    /**
+     * Utility function to translate ColumnStatistics into protobuf form and
+     * add it to output list.
+     * @param statsList output list for protobuf stats
+     * @param stats ColumnStatistics to be transformed and added
+     */
+     void getProtoBufStatistics(
+                                std::vector<proto::ColumnStatistics>& statsList,
+                                const MutableColumnStatistics* stats) const {
+       proto::ColumnStatistics pbStats;
+       stats->toProtoBuf(pbStats);
+       statsList.push_back(pbStats);
+     }
+
+  protected:
+    MemoryPool& memPool;
+    std::unique_ptr<BufferedOutputStream> indexStream;
+    std::unique_ptr<BufferedOutputStream> bloomFilterStream;
+  };
+
+  /**
+   * Create a writer for the given type.
+   */
+  std::unique_ptr<ColumnWriter> buildWriter(
+                                            const Type& type,
+                                            const StreamsFactory& factory,
+                                            const WriterOptions& options);
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/Common.cc b/contrib/libs/apache/orc/c++/src/Common.cc
index e50f085d30..dbf073797e 100644
--- a/contrib/libs/apache/orc/c++/src/Common.cc
+++ b/contrib/libs/apache/orc/c++/src/Common.cc
@@ -1,75 +1,75 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/Common.hh" 
- 
-#include <sstream> 
- 
-namespace orc { 
- 
-  std::string compressionKindToString(CompressionKind kind) { 
-    switch (static_cast<int>(kind)) { 
-      case CompressionKind_NONE: 
-        return "none"; 
-      case CompressionKind_ZLIB: 
-        return "zlib"; 
-      case CompressionKind_SNAPPY: 
-        return "snappy"; 
-      case CompressionKind_LZO: 
-        return "lzo"; 
-      case CompressionKind_LZ4: 
-        return "lz4"; 
-      case CompressionKind_ZSTD: 
-        return "zstd"; 
-    } 
-    std::stringstream buffer; 
-    buffer << "unknown - " << kind; 
-    return buffer.str(); 
-  } 
- 
-  std::string writerVersionToString(WriterVersion version) { 
-    switch (static_cast<int>(version)) { 
-      case WriterVersion_ORIGINAL: 
-        return "original"; 
-      case WriterVersion_HIVE_8732: 
-        return "HIVE-8732"; 
-      case WriterVersion_HIVE_4243: 
-        return "HIVE-4243"; 
-      case WriterVersion_HIVE_12055: 
-        return "HIVE-12055"; 
-      case WriterVersion_HIVE_13083: 
-        return "HIVE-13083"; 
-      case WriterVersion_ORC_101: 
-        return "ORC-101"; 
-      case WriterVersion_ORC_135: 
-        return "ORC-135"; 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Common.hh"
+
+#include <sstream>
+
+namespace orc {
+
+  std::string compressionKindToString(CompressionKind kind) {
+    switch (static_cast<int>(kind)) {
+      case CompressionKind_NONE:
+        return "none";
+      case CompressionKind_ZLIB:
+        return "zlib";
+      case CompressionKind_SNAPPY:
+        return "snappy";
+      case CompressionKind_LZO:
+        return "lzo";
+      case CompressionKind_LZ4:
+        return "lz4";
+      case CompressionKind_ZSTD:
+        return "zstd";
+    }
+    std::stringstream buffer;
+    buffer << "unknown - " << kind;
+    return buffer.str();
+  }
+
+  std::string writerVersionToString(WriterVersion version) {
+    switch (static_cast<int>(version)) {
+      case WriterVersion_ORIGINAL:
+        return "original";
+      case WriterVersion_HIVE_8732:
+        return "HIVE-8732";
+      case WriterVersion_HIVE_4243:
+        return "HIVE-4243";
+      case WriterVersion_HIVE_12055:
+        return "HIVE-12055";
+      case WriterVersion_HIVE_13083:
+        return "HIVE-13083";
+      case WriterVersion_ORC_101:
+        return "ORC-101";
+      case WriterVersion_ORC_135:
+        return "ORC-135";
       case WriterVersion_ORC_517:
         return "ORC-517";
       case WriterVersion_ORC_203:
         return "ORC-203";
       case WriterVersion_ORC_14:
         return "ORC-14";
-    } 
-    std::stringstream buffer; 
-    buffer << "future - " << version; 
-    return buffer.str(); 
-  } 
- 
+    }
+    std::stringstream buffer;
+    buffer << "future - " << version;
+    return buffer.str();
+  }
+
   std::string writerIdToString(uint32_t id) {
     switch (id) {
       case ORC_JAVA_WRITER:
@@ -90,59 +90,59 @@ namespace orc {
     }
   }
 
-  std::string streamKindToString(StreamKind kind) { 
-    switch (static_cast<int>(kind)) { 
-      case StreamKind_PRESENT: 
-        return "present"; 
-      case StreamKind_DATA: 
-        return "data"; 
-      case StreamKind_LENGTH: 
-        return "length"; 
-      case StreamKind_DICTIONARY_DATA: 
-        return "dictionary"; 
-      case StreamKind_DICTIONARY_COUNT: 
-        return "dictionary count"; 
-      case StreamKind_SECONDARY: 
-        return "secondary"; 
-      case StreamKind_ROW_INDEX: 
-        return "index"; 
-      case StreamKind_BLOOM_FILTER: 
-        return "bloom"; 
-    } 
-    std::stringstream buffer; 
-    buffer << "unknown - " << kind; 
-    return buffer.str(); 
-  } 
- 
-  std::string columnEncodingKindToString(ColumnEncodingKind kind) { 
-    switch (static_cast<int>(kind)) { 
-      case ColumnEncodingKind_DIRECT: 
-        return "direct"; 
-      case ColumnEncodingKind_DICTIONARY: 
-        return "dictionary"; 
-      case ColumnEncodingKind_DIRECT_V2: 
-        return "direct rle2"; 
-      case ColumnEncodingKind_DICTIONARY_V2: 
-        return "dictionary rle2"; 
-    } 
-    std::stringstream buffer; 
-    buffer << "unknown - " << kind; 
-    return buffer.str(); 
-  } 
- 
-  std::string FileVersion::toString() const { 
-    std::stringstream ss; 
-    ss << getMajor() << '.' << getMinor(); 
-    return ss.str(); 
-  } 
-   
-  const FileVersion& FileVersion::v_0_11(){ 
-    static FileVersion version(0,11); 
-    return version; 
-  } 
-   
-  const FileVersion& FileVersion::v_0_12(){ 
-    static FileVersion version(0,12); 
-    return version; 
-  } 
-} 
+  std::string streamKindToString(StreamKind kind) {
+    switch (static_cast<int>(kind)) {
+      case StreamKind_PRESENT:
+        return "present";
+      case StreamKind_DATA:
+        return "data";
+      case StreamKind_LENGTH:
+        return "length";
+      case StreamKind_DICTIONARY_DATA:
+        return "dictionary";
+      case StreamKind_DICTIONARY_COUNT:
+        return "dictionary count";
+      case StreamKind_SECONDARY:
+        return "secondary";
+      case StreamKind_ROW_INDEX:
+        return "index";
+      case StreamKind_BLOOM_FILTER:
+        return "bloom";
+    }
+    std::stringstream buffer;
+    buffer << "unknown - " << kind;
+    return buffer.str();
+  }
+
+  std::string columnEncodingKindToString(ColumnEncodingKind kind) {
+    switch (static_cast<int>(kind)) {
+      case ColumnEncodingKind_DIRECT:
+        return "direct";
+      case ColumnEncodingKind_DICTIONARY:
+        return "dictionary";
+      case ColumnEncodingKind_DIRECT_V2:
+        return "direct rle2";
+      case ColumnEncodingKind_DICTIONARY_V2:
+        return "dictionary rle2";
+    }
+    std::stringstream buffer;
+    buffer << "unknown - " << kind;
+    return buffer.str();
+  }
+
+  std::string FileVersion::toString() const {
+    std::stringstream ss;
+    ss << getMajor() << '.' << getMinor();
+    return ss.str();
+  }
+  
+  const FileVersion& FileVersion::v_0_11(){
+    static FileVersion version(0,11);
+    return version;
+  }
+  
+  const FileVersion& FileVersion::v_0_12(){
+    static FileVersion version(0,12);
+    return version;
+  }
+}
diff --git a/contrib/libs/apache/orc/c++/src/Compression.cc b/contrib/libs/apache/orc/c++/src/Compression.cc
index 057641ec1f..4278ed7aae 100644
--- a/contrib/libs/apache/orc/c++/src/Compression.cc
+++ b/contrib/libs/apache/orc/c++/src/Compression.cc
@@ -1,1071 +1,1071 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "Adaptor.hh" 
-#include "Compression.hh" 
-#include "orc/Exceptions.hh" 
-#include "LzoDecompressor.hh" 
-#include "lz4.h" 
- 
-#include <algorithm> 
-#include <iomanip> 
-#include <iostream> 
-#include <sstream> 
- 
-#include "zlib.h" 
-#include "zstd.h" 
- 
-#include "wrap/snappy-wrapper.h" 
- 
-#ifndef ZSTD_CLEVEL_DEFAULT 
-#define ZSTD_CLEVEL_DEFAULT 3 
-#endif 
- 
-namespace orc { 
- 
-  class CompressionStreamBase: public BufferedOutputStream { 
-  public: 
-    CompressionStreamBase(OutputStream * outStream, 
-                          int compressionLevel, 
-                          uint64_t capacity, 
-                          uint64_t blockSize, 
-                          MemoryPool& pool); 
- 
-    virtual bool Next(void** data, int*size) override = 0; 
-    virtual void BackUp(int count) override; 
- 
-    virtual std::string getName() const override = 0; 
-    virtual uint64_t flush() override; 
- 
-    virtual bool isCompressed() const override { return true; } 
-    virtual uint64_t getSize() const override; 
- 
-  protected: 
-    void writeHeader(char * buffer, size_t compressedSize, bool original) { 
-      buffer[0] = static_cast<char>((compressedSize << 1) + (original ? 1 : 0)); 
-      buffer[1] = static_cast<char>(compressedSize >> 7); 
-      buffer[2] = static_cast<char>(compressedSize >> 15); 
-    } 
- 
-    // ensure enough room for compression block header 
-    void ensureHeader(); 
- 
-    // Buffer to hold uncompressed data until user calls Next() 
-    DataBuffer<unsigned char> rawInputBuffer; 
- 
-    // Compress level 
-    int level; 
- 
-    // Compressed data output buffer 
-    char * outputBuffer; 
- 
-    // Size for compressionBuffer 
-    int bufferSize; 
- 
-    // Compress output position 
-    int outputPosition; 
- 
-    // Compress output buffer size 
-    int outputSize; 
-  }; 
- 
-  CompressionStreamBase::CompressionStreamBase(OutputStream * outStream, 
-                                               int compressionLevel, 
-                                               uint64_t capacity, 
-                                               uint64_t blockSize, 
-                                               MemoryPool& pool) : 
-                                                BufferedOutputStream(pool, 
-                                                                     outStream, 
-                                                                     capacity, 
-                                                                     blockSize), 
-                                                rawInputBuffer(pool, blockSize), 
-                                                level(compressionLevel), 
-                                                outputBuffer(nullptr), 
-                                                bufferSize(0), 
-                                                outputPosition(0), 
-                                                outputSize(0) { 
-    // PASS 
-  } 
- 
-  void CompressionStreamBase::BackUp(int count) { 
-    if (count > bufferSize) { 
-      throw std::logic_error("Can't backup that much!"); 
-    } 
-    bufferSize -= count; 
-  } 
- 
-  uint64_t CompressionStreamBase::flush() { 
-    void * data; 
-    int size; 
-    if (!Next(&data, &size)) { 
-      throw std::runtime_error("Failed to flush compression buffer."); 
-    } 
-    BufferedOutputStream::BackUp(outputSize - outputPosition); 
-    bufferSize = outputSize = outputPosition = 0; 
-    return BufferedOutputStream::flush(); 
-  } 
- 
-  uint64_t CompressionStreamBase::getSize() const { 
-    return BufferedOutputStream::getSize() - 
-           static_cast<uint64_t>(outputSize - outputPosition); 
-  } 
- 
-  void CompressionStreamBase::ensureHeader() { 
-    // adjust 3 bytes for the compression header 
-    if (outputPosition + 3 >= outputSize) { 
-      int newPosition = outputPosition + 3 - outputSize; 
-      if (!BufferedOutputStream::Next( 
-        reinterpret_cast<void **>(&outputBuffer), 
-        &outputSize)) { 
-        throw std::runtime_error( 
-          "Failed to get next output buffer from output stream."); 
-      } 
-      outputPosition = newPosition; 
-    } else { 
-      outputPosition += 3; 
-    } 
-  } 
- 
-  /** 
-   * Streaming compression base class 
-   */ 
-  class CompressionStream: public CompressionStreamBase { 
-  public: 
-    CompressionStream(OutputStream * outStream, 
-                          int compressionLevel, 
-                          uint64_t capacity, 
-                          uint64_t blockSize, 
-                          MemoryPool& pool); 
- 
-    virtual bool Next(void** data, int*size) override; 
-    virtual std::string getName() const override = 0; 
- 
-  protected: 
-    // return total compressed size 
-    virtual uint64_t doStreamingCompression() = 0; 
-  }; 
- 
-  CompressionStream::CompressionStream(OutputStream * outStream, 
-                                       int compressionLevel, 
-                                       uint64_t capacity, 
-                                       uint64_t blockSize, 
-                                       MemoryPool& pool) : 
-                                         CompressionStreamBase(outStream, 
-                                                               compressionLevel, 
-                                                               capacity, 
-                                                               blockSize, 
-                                                               pool) { 
-    // PASS 
-  } 
- 
-  bool CompressionStream::Next(void** data, int*size) { 
-    if (bufferSize != 0) { 
-      ensureHeader(); 
- 
-      uint64_t totalCompressedSize = doStreamingCompression(); 
- 
-      char * header = outputBuffer + outputPosition - totalCompressedSize - 3; 
-      if (totalCompressedSize >= static_cast<unsigned long>(bufferSize)) { 
-        writeHeader(header, static_cast<size_t>(bufferSize), true); 
-        memcpy( 
-          header + 3, 
-          rawInputBuffer.data(), 
-          static_cast<size_t>(bufferSize)); 
- 
-        int backup = static_cast<int>(totalCompressedSize) - bufferSize; 
-        BufferedOutputStream::BackUp(backup); 
-        outputPosition -= backup; 
-        outputSize -= backup; 
-      } else { 
-        writeHeader(header, totalCompressedSize, false); 
-      } 
-    } 
- 
-    *data = rawInputBuffer.data(); 
-    *size = static_cast<int>(rawInputBuffer.size()); 
-    bufferSize = *size; 
- 
-    return true; 
-  } 
- 
-  class ZlibCompressionStream: public CompressionStream { 
-  public: 
-    ZlibCompressionStream(OutputStream * outStream, 
-                          int compressionLevel, 
-                          uint64_t capacity, 
-                          uint64_t blockSize, 
-                          MemoryPool& pool); 
- 
-    virtual ~ZlibCompressionStream() override { 
-      end(); 
-    } 
- 
-    virtual std::string getName() const override; 
- 
-  protected: 
-    virtual uint64_t doStreamingCompression() override; 
- 
-  private: 
-    void init(); 
-    void end(); 
-    z_stream strm; 
-  }; 
- 
-  ZlibCompressionStream::ZlibCompressionStream( 
-                        OutputStream * outStream, 
-                        int compressionLevel, 
-                        uint64_t capacity, 
-                        uint64_t blockSize, 
-                        MemoryPool& pool) 
-                        : CompressionStream(outStream, 
-                                            compressionLevel, 
-                                            capacity, 
-                                            blockSize, 
-                                            pool) { 
-    init(); 
-  } 
- 
-  uint64_t ZlibCompressionStream::doStreamingCompression() { 
-    if (deflateReset(&strm) != Z_OK) { 
-      throw std::runtime_error("Failed to reset inflate."); 
-    } 
- 
-    strm.avail_in = static_cast<unsigned int>(bufferSize); 
-    strm.next_in = rawInputBuffer.data(); 
- 
-    do { 
-      if (outputPosition >= outputSize) { 
-        if (!BufferedOutputStream::Next( 
-          reinterpret_cast<void **>(&outputBuffer), 
-          &outputSize)) { 
-          throw std::runtime_error( 
-            "Failed to get next output buffer from output stream."); 
-        } 
-        outputPosition = 0; 
-      } 
-      strm.next_out = reinterpret_cast<unsigned char *> 
-      (outputBuffer + outputPosition); 
-      strm.avail_out = static_cast<unsigned int> 
-      (outputSize - outputPosition); 
- 
-      int ret = deflate(&strm, Z_FINISH); 
-      outputPosition = outputSize - static_cast<int>(strm.avail_out); 
- 
-      if (ret == Z_STREAM_END) { 
-        break; 
-      } else if (ret == Z_OK) { 
-        // needs more buffer so will continue the loop 
-      } else { 
-        throw std::runtime_error("Failed to deflate input data."); 
-      } 
-    } while (strm.avail_out == 0); 
- 
-    return strm.total_out; 
-  } 
- 
-  std::string ZlibCompressionStream::getName() const { 
-    return "ZlibCompressionStream"; 
-  } 
- 
-DIAGNOSTIC_PUSH 
- 
-#if defined(__GNUC__) || defined(__clang__) 
-  DIAGNOSTIC_IGNORE("-Wold-style-cast") 
-#endif 
- 
-  void ZlibCompressionStream::init() { 
-    strm.zalloc = nullptr; 
-    strm.zfree = nullptr; 
-    strm.opaque = nullptr; 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "Compression.hh"
+#include "orc/Exceptions.hh"
+#include "LzoDecompressor.hh"
+#include "lz4.h"
+
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+
+#include "zlib.h"
+#include "zstd.h"
+
+#include "wrap/snappy-wrapper.h"
+
+#ifndef ZSTD_CLEVEL_DEFAULT
+#define ZSTD_CLEVEL_DEFAULT 3
+#endif
+
+namespace orc {
+
+  class CompressionStreamBase: public BufferedOutputStream {
+  public:
+    CompressionStreamBase(OutputStream * outStream,
+                          int compressionLevel,
+                          uint64_t capacity,
+                          uint64_t blockSize,
+                          MemoryPool& pool);
+
+    virtual bool Next(void** data, int*size) override = 0;
+    virtual void BackUp(int count) override;
+
+    virtual std::string getName() const override = 0;
+    virtual uint64_t flush() override;
+
+    virtual bool isCompressed() const override { return true; }
+    virtual uint64_t getSize() const override;
+
+  protected:
+    void writeHeader(char * buffer, size_t compressedSize, bool original) {
+      buffer[0] = static_cast<char>((compressedSize << 1) + (original ? 1 : 0));
+      buffer[1] = static_cast<char>(compressedSize >> 7);
+      buffer[2] = static_cast<char>(compressedSize >> 15);
+    }
+
+    // ensure enough room for compression block header
+    void ensureHeader();
+
+    // Buffer to hold uncompressed data until user calls Next()
+    DataBuffer<unsigned char> rawInputBuffer;
+
+    // Compress level
+    int level;
+
+    // Compressed data output buffer
+    char * outputBuffer;
+
+    // Size for compressionBuffer
+    int bufferSize;
+
+    // Compress output position
+    int outputPosition;
+
+    // Compress output buffer size
+    int outputSize;
+  };
+
+  CompressionStreamBase::CompressionStreamBase(OutputStream * outStream,
+                                               int compressionLevel,
+                                               uint64_t capacity,
+                                               uint64_t blockSize,
+                                               MemoryPool& pool) :
+                                                BufferedOutputStream(pool,
+                                                                     outStream,
+                                                                     capacity,
+                                                                     blockSize),
+                                                rawInputBuffer(pool, blockSize),
+                                                level(compressionLevel),
+                                                outputBuffer(nullptr),
+                                                bufferSize(0),
+                                                outputPosition(0),
+                                                outputSize(0) {
+    // PASS
+  }
+
+  void CompressionStreamBase::BackUp(int count) {
+    if (count > bufferSize) {
+      throw std::logic_error("Can't backup that much!");
+    }
+    bufferSize -= count;
+  }
+
+  uint64_t CompressionStreamBase::flush() {
+    void * data;
+    int size;
+    if (!Next(&data, &size)) {
+      throw std::runtime_error("Failed to flush compression buffer.");
+    }
+    BufferedOutputStream::BackUp(outputSize - outputPosition);
+    bufferSize = outputSize = outputPosition = 0;
+    return BufferedOutputStream::flush();
+  }
+
+  uint64_t CompressionStreamBase::getSize() const {
+    return BufferedOutputStream::getSize() -
+           static_cast<uint64_t>(outputSize - outputPosition);
+  }
+
+  void CompressionStreamBase::ensureHeader() {
+    // adjust 3 bytes for the compression header
+    if (outputPosition + 3 >= outputSize) {
+      int newPosition = outputPosition + 3 - outputSize;
+      if (!BufferedOutputStream::Next(
+        reinterpret_cast<void **>(&outputBuffer),
+        &outputSize)) {
+        throw std::runtime_error(
+          "Failed to get next output buffer from output stream.");
+      }
+      outputPosition = newPosition;
+    } else {
+      outputPosition += 3;
+    }
+  }
+
+  /**
+   * Streaming compression base class
+   */
+  class CompressionStream: public CompressionStreamBase {
+  public:
+    CompressionStream(OutputStream * outStream,
+                          int compressionLevel,
+                          uint64_t capacity,
+                          uint64_t blockSize,
+                          MemoryPool& pool);
+
+    virtual bool Next(void** data, int*size) override;
+    virtual std::string getName() const override = 0;
+
+  protected:
+    // return total compressed size
+    virtual uint64_t doStreamingCompression() = 0;
+  };
+
+  CompressionStream::CompressionStream(OutputStream * outStream,
+                                       int compressionLevel,
+                                       uint64_t capacity,
+                                       uint64_t blockSize,
+                                       MemoryPool& pool) :
+                                         CompressionStreamBase(outStream,
+                                                               compressionLevel,
+                                                               capacity,
+                                                               blockSize,
+                                                               pool) {
+    // PASS
+  }
+
+  bool CompressionStream::Next(void** data, int*size) {
+    if (bufferSize != 0) {
+      ensureHeader();
+
+      uint64_t totalCompressedSize = doStreamingCompression();
+
+      char * header = outputBuffer + outputPosition - totalCompressedSize - 3;
+      if (totalCompressedSize >= static_cast<unsigned long>(bufferSize)) {
+        writeHeader(header, static_cast<size_t>(bufferSize), true);
+        memcpy(
+          header + 3,
+          rawInputBuffer.data(),
+          static_cast<size_t>(bufferSize));
+
+        int backup = static_cast<int>(totalCompressedSize) - bufferSize;
+        BufferedOutputStream::BackUp(backup);
+        outputPosition -= backup;
+        outputSize -= backup;
+      } else {
+        writeHeader(header, totalCompressedSize, false);
+      }
+    }
+
+    *data = rawInputBuffer.data();
+    *size = static_cast<int>(rawInputBuffer.size());
+    bufferSize = *size;
+
+    return true;
+  }
+
+  class ZlibCompressionStream: public CompressionStream {
+  public:
+    ZlibCompressionStream(OutputStream * outStream,
+                          int compressionLevel,
+                          uint64_t capacity,
+                          uint64_t blockSize,
+                          MemoryPool& pool);
+
+    virtual ~ZlibCompressionStream() override {
+      end();
+    }
+
+    virtual std::string getName() const override;
+
+  protected:
+    virtual uint64_t doStreamingCompression() override;
+
+  private:
+    void init();
+    void end();
+    z_stream strm;
+  };
+
+  ZlibCompressionStream::ZlibCompressionStream(
+                        OutputStream * outStream,
+                        int compressionLevel,
+                        uint64_t capacity,
+                        uint64_t blockSize,
+                        MemoryPool& pool)
+                        : CompressionStream(outStream,
+                                            compressionLevel,
+                                            capacity,
+                                            blockSize,
+                                            pool) {
+    init();
+  }
+
+  uint64_t ZlibCompressionStream::doStreamingCompression() {
+    if (deflateReset(&strm) != Z_OK) {
+      throw std::runtime_error("Failed to reset inflate.");
+    }
+
+    strm.avail_in = static_cast<unsigned int>(bufferSize);
+    strm.next_in = rawInputBuffer.data();
+
+    do {
+      if (outputPosition >= outputSize) {
+        if (!BufferedOutputStream::Next(
+          reinterpret_cast<void **>(&outputBuffer),
+          &outputSize)) {
+          throw std::runtime_error(
+            "Failed to get next output buffer from output stream.");
+        }
+        outputPosition = 0;
+      }
+      strm.next_out = reinterpret_cast<unsigned char *>
+      (outputBuffer + outputPosition);
+      strm.avail_out = static_cast<unsigned int>
+      (outputSize - outputPosition);
+
+      int ret = deflate(&strm, Z_FINISH);
+      outputPosition = outputSize - static_cast<int>(strm.avail_out);
+
+      if (ret == Z_STREAM_END) {
+        break;
+      } else if (ret == Z_OK) {
+        // needs more buffer so will continue the loop
+      } else {
+        throw std::runtime_error("Failed to deflate input data.");
+      }
+    } while (strm.avail_out == 0);
+
+    return strm.total_out;
+  }
+
+  std::string ZlibCompressionStream::getName() const {
+    return "ZlibCompressionStream";
+  }
+
+DIAGNOSTIC_PUSH
+
+#if defined(__GNUC__) || defined(__clang__)
+  DIAGNOSTIC_IGNORE("-Wold-style-cast")
+#endif
+
+  void ZlibCompressionStream::init() {
+    strm.zalloc = nullptr;
+    strm.zfree = nullptr;
+    strm.opaque = nullptr;
     strm.next_in = nullptr;
- 
-    if (deflateInit2(&strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) 
-        != Z_OK) { 
-      throw std::runtime_error("Error while calling deflateInit2() for zlib."); 
-    } 
-  } 
- 
-  void ZlibCompressionStream::end() { 
-    (void)deflateEnd(&strm); 
-  } 
- 
-DIAGNOSTIC_PUSH 
- 
-  enum DecompressState { DECOMPRESS_HEADER, 
-                         DECOMPRESS_START, 
-                         DECOMPRESS_CONTINUE, 
-                         DECOMPRESS_ORIGINAL, 
-                         DECOMPRESS_EOF}; 
- 
-  class ZlibDecompressionStream: public SeekableInputStream { 
-  public: 
-    ZlibDecompressionStream(std::unique_ptr<SeekableInputStream> inStream, 
-                            size_t blockSize, 
-                            MemoryPool& pool); 
-    virtual ~ZlibDecompressionStream() override; 
-    virtual bool Next(const void** data, int*size) override; 
-    virtual void BackUp(int count) override; 
-    virtual bool Skip(int count) override; 
-    virtual int64_t ByteCount() const override; 
-    virtual void seek(PositionProvider& position) override; 
-    virtual std::string getName() const override; 
- 
-  private: 
-    void readBuffer(bool failOnEof) { 
-      int length; 
-      if (!input->Next(reinterpret_cast<const void**>(&inputBuffer), 
-                       &length)) { 
-        if (failOnEof) { 
-          throw ParseError("Read past EOF in " 
-                           "ZlibDecompressionStream::readBuffer"); 
-        } 
-        state = DECOMPRESS_EOF; 
-        inputBuffer = nullptr; 
-        inputBufferEnd = nullptr; 
-      } else { 
-        inputBufferEnd = inputBuffer + length; 
-      } 
-    } 
- 
-    uint32_t readByte(bool failOnEof) { 
-      if (inputBuffer == inputBufferEnd) { 
-        readBuffer(failOnEof); 
-        if (state == DECOMPRESS_EOF) { 
-          return 0; 
-        } 
-      } 
-      return static_cast<unsigned char>(*(inputBuffer++)); 
-    } 
- 
-    void readHeader() { 
-      uint32_t header = readByte(false); 
-      if (state != DECOMPRESS_EOF) { 
-        header |= readByte(true) << 8; 
-        header |= readByte(true) << 16; 
-        if (header & 1) { 
-          state = DECOMPRESS_ORIGINAL; 
-        } else { 
-          state = DECOMPRESS_START; 
-        } 
-        remainingLength = header >> 1; 
-      } else { 
-        remainingLength = 0; 
-      } 
-    } 
- 
-    MemoryPool& pool; 
-    const size_t blockSize; 
-    std::unique_ptr<SeekableInputStream> input; 
-    z_stream zstream; 
-    DataBuffer<char> buffer; 
- 
-    // the current state 
-    DecompressState state; 
- 
-    // the start of the current buffer 
-    // This pointer is not owned by us. It is either owned by zstream or 
-    // the underlying stream. 
-    const char* outputBuffer; 
-    // the size of the current buffer 
-    size_t outputBufferLength; 
-    // the size of the current chunk 
-    size_t remainingLength; 
- 
-    // the last buffer returned from the input 
-    const char *inputBuffer; 
-    const char *inputBufferEnd; 
- 
-    // roughly the number of bytes returned 
-    off_t bytesReturned; 
-  }; 
- 
-DIAGNOSTIC_PUSH 
- 
-#if defined(__GNUC__) || defined(__clang__) 
-  DIAGNOSTIC_IGNORE("-Wold-style-cast") 
-#endif 
- 
-  ZlibDecompressionStream::ZlibDecompressionStream 
-                   (std::unique_ptr<SeekableInputStream> inStream, 
-                    size_t _blockSize, 
-                    MemoryPool& _pool 
-                    ): pool(_pool), 
-                       blockSize(_blockSize), 
-                       buffer(pool, _blockSize) { 
-    input.reset(inStream.release()); 
-    zstream.next_in = nullptr; 
-    zstream.avail_in = 0; 
-    zstream.zalloc = nullptr; 
-    zstream.zfree = nullptr; 
-    zstream.opaque = nullptr; 
-    zstream.next_out = reinterpret_cast<Bytef*>(buffer.data()); 
-    zstream.avail_out = static_cast<uInt>(blockSize); 
-    int64_t result = inflateInit2(&zstream, -15); 
-    switch (result) { 
-    case Z_OK: 
-      break; 
-    case Z_MEM_ERROR: 
-      throw std::logic_error("Memory error from inflateInit2"); 
-    case Z_VERSION_ERROR: 
-      throw std::logic_error("Version error from inflateInit2"); 
-    case Z_STREAM_ERROR: 
-      throw std::logic_error("Stream error from inflateInit2"); 
-    default: 
-      throw std::logic_error("Unknown error from inflateInit2"); 
-    } 
-    outputBuffer = nullptr; 
-    outputBufferLength = 0; 
-    remainingLength = 0; 
-    state = DECOMPRESS_HEADER; 
-    inputBuffer = nullptr; 
-    inputBufferEnd = nullptr; 
-    bytesReturned = 0; 
-  } 
- 
-DIAGNOSTIC_POP 
- 
-  ZlibDecompressionStream::~ZlibDecompressionStream() { 
-    int64_t result = inflateEnd(&zstream); 
-    if (result != Z_OK) { 
-      // really can't throw in destructors 
-      std::cout << "Error in ~ZlibDecompressionStream() " << result << "\n"; 
-    } 
-  } 
- 
-  bool ZlibDecompressionStream::Next(const void** data, int*size) { 
-    // if the user pushed back, return them the partial buffer 
-    if (outputBufferLength) { 
-      *data = outputBuffer; 
-      *size = static_cast<int>(outputBufferLength); 
-      outputBuffer += outputBufferLength; 
-      outputBufferLength = 0; 
-      return true; 
-    } 
-    if (state == DECOMPRESS_HEADER || remainingLength == 0) { 
-      readHeader(); 
-    } 
-    if (state == DECOMPRESS_EOF) { 
-      return false; 
-    } 
-    if (inputBuffer == inputBufferEnd) { 
-      readBuffer(true); 
-    } 
-    size_t availSize = 
-      std::min(static_cast<size_t>(inputBufferEnd - inputBuffer), 
-               remainingLength); 
-    if (state == DECOMPRESS_ORIGINAL) { 
-      *data = inputBuffer; 
-      *size = static_cast<int>(availSize); 
-      outputBuffer = inputBuffer + availSize; 
-      outputBufferLength = 0; 
-    } else if (state == DECOMPRESS_START) { 
-      zstream.next_in = 
-        reinterpret_cast<Bytef*>(const_cast<char*>(inputBuffer)); 
-      zstream.avail_in = static_cast<uInt>(availSize); 
-      outputBuffer = buffer.data(); 
-      zstream.next_out = 
-        reinterpret_cast<Bytef*>(const_cast<char*>(outputBuffer)); 
-      zstream.avail_out = static_cast<uInt>(blockSize); 
-      if (inflateReset(&zstream) != Z_OK) { 
-        throw std::logic_error("Bad inflateReset in " 
-                               "ZlibDecompressionStream::Next"); 
-      } 
-      int64_t result; 
-      do { 
-        result = inflate(&zstream, availSize == remainingLength ? Z_FINISH : 
-                         Z_SYNC_FLUSH); 
-        switch (result) { 
-        case Z_OK: 
-          remainingLength -= availSize; 
-          inputBuffer += availSize; 
-          readBuffer(true); 
-          availSize = 
-            std::min(static_cast<size_t>(inputBufferEnd - inputBuffer), 
-                     remainingLength); 
-          zstream.next_in = 
-            reinterpret_cast<Bytef*>(const_cast<char*>(inputBuffer)); 
-          zstream.avail_in = static_cast<uInt>(availSize); 
-          break; 
-        case Z_STREAM_END: 
-          break; 
-        case Z_BUF_ERROR: 
-          throw std::logic_error("Buffer error in " 
-                                 "ZlibDecompressionStream::Next"); 
-        case Z_DATA_ERROR: 
-          throw std::logic_error("Data error in " 
-                                 "ZlibDecompressionStream::Next"); 
-        case Z_STREAM_ERROR: 
-          throw std::logic_error("Stream error in " 
-                                 "ZlibDecompressionStream::Next"); 
-        default: 
-          throw std::logic_error("Unknown error in " 
-                                 "ZlibDecompressionStream::Next"); 
-        } 
-      } while (result != Z_STREAM_END); 
-      *size = static_cast<int>(blockSize - zstream.avail_out); 
-      *data = outputBuffer; 
-      outputBufferLength = 0; 
-      outputBuffer += *size; 
-    } else { 
-      throw std::logic_error("Unknown compression state in " 
-                             "ZlibDecompressionStream::Next"); 
-    } 
-    inputBuffer += availSize; 
-    remainingLength -= availSize; 
-    bytesReturned += *size; 
-    return true; 
-  } 
- 
-  void ZlibDecompressionStream::BackUp(int count) { 
-    if (outputBuffer == nullptr || outputBufferLength != 0) { 
-      throw std::logic_error("Backup without previous Next in " 
-                             "ZlibDecompressionStream"); 
-    } 
-    outputBuffer -= static_cast<size_t>(count); 
-    outputBufferLength = static_cast<size_t>(count); 
-    bytesReturned -= count; 
-  } 
- 
-  bool ZlibDecompressionStream::Skip(int count) { 
-    bytesReturned += count; 
-    // this is a stupid implementation for now. 
-    // should skip entire blocks without decompressing 
-    while (count > 0) { 
-      const void *ptr; 
-      int len; 
-      if (!Next(&ptr, &len)) { 
-        return false; 
-      } 
-      if (len > count) { 
-        BackUp(len - count); 
-        count = 0; 
-      } else { 
-        count -= len; 
-      } 
-    } 
-    return true; 
-  } 
- 
-  int64_t ZlibDecompressionStream::ByteCount() const { 
-    return bytesReturned; 
-  } 
- 
-  void ZlibDecompressionStream::seek(PositionProvider& position) { 
-    // clear state to force seek to read from the right position 
-    state = DECOMPRESS_HEADER; 
-    outputBuffer = nullptr; 
-    outputBufferLength = 0; 
-    remainingLength = 0; 
-    inputBuffer = nullptr; 
-    inputBufferEnd = nullptr; 
- 
-    input->seek(position); 
-    bytesReturned = static_cast<off_t>(input->ByteCount()); 
-    if (!Skip(static_cast<int>(position.next()))) { 
-      throw ParseError("Bad skip in ZlibDecompressionStream::seek"); 
-    } 
-  } 
- 
-  std::string ZlibDecompressionStream::getName() const { 
-    std::ostringstream result; 
-    result << "zlib(" << input->getName() << ")"; 
-    return result.str(); 
-  } 
- 
-  class BlockDecompressionStream: public SeekableInputStream { 
-  public: 
-    BlockDecompressionStream(std::unique_ptr<SeekableInputStream> inStream, 
-                             size_t blockSize, 
-                             MemoryPool& pool); 
- 
-    virtual ~BlockDecompressionStream() override {} 
-    virtual bool Next(const void** data, int*size) override; 
-    virtual void BackUp(int count) override; 
-    virtual bool Skip(int count) override; 
-    virtual int64_t ByteCount() const override; 
-    virtual void seek(PositionProvider& position) override; 
-    virtual std::string getName() const override = 0; 
- 
-  protected: 
-    virtual uint64_t decompress(const char *input, uint64_t length, 
-                                char *output, size_t maxOutputLength) = 0; 
- 
-    std::string getStreamName() const { 
-      return input->getName(); 
-    } 
- 
-  private: 
-    void readBuffer(bool failOnEof) { 
-      int length; 
-      if (!input->Next(reinterpret_cast<const void**>(&inputBufferPtr), 
-                       &length)) { 
-        if (failOnEof) { 
-          throw ParseError(getName() + "read past EOF"); 
-        } 
-        state = DECOMPRESS_EOF; 
-        inputBufferPtr = nullptr; 
-        inputBufferPtrEnd = nullptr; 
-      } else { 
-        inputBufferPtrEnd = inputBufferPtr + length; 
-      } 
-    } 
- 
-    uint32_t readByte(bool failOnEof) { 
-      if (inputBufferPtr == inputBufferPtrEnd) { 
-        readBuffer(failOnEof); 
-        if (state == DECOMPRESS_EOF) { 
-          return 0; 
-        } 
-      } 
-      return static_cast<unsigned char>(*(inputBufferPtr++)); 
-    } 
- 
-    void readHeader() { 
-      uint32_t header = readByte(false); 
-      if (state != DECOMPRESS_EOF) { 
-        header |= readByte(true) << 8; 
-        header |= readByte(true) << 16; 
-        if (header & 1) { 
-          state = DECOMPRESS_ORIGINAL; 
-        } else { 
-          state = DECOMPRESS_START; 
-        } 
-        remainingLength = header >> 1; 
-      } else { 
-        remainingLength = 0; 
-      } 
-    } 
- 
-    std::unique_ptr<SeekableInputStream> input; 
-    MemoryPool& pool; 
- 
-    // may need to stitch together multiple input buffers; 
-    // to give snappy a contiguous block 
-    DataBuffer<char> inputBuffer; 
- 
-    // uncompressed output 
-    DataBuffer<char> outputBuffer; 
- 
-    // the current state 
-    DecompressState state; 
- 
-    // the start of the current output buffer 
-    const char* outputBufferPtr; 
-    // the size of the current output buffer 
-    size_t outputBufferLength; 
- 
-    // the size of the current chunk 
-    size_t remainingLength; 
- 
-    // the last buffer returned from the input 
-    const char *inputBufferPtr; 
-    const char *inputBufferPtrEnd; 
- 
-    // bytes returned by this stream 
-    off_t bytesReturned; 
-  }; 
- 
-  BlockDecompressionStream::BlockDecompressionStream 
-                   (std::unique_ptr<SeekableInputStream> inStream, 
-                    size_t bufferSize, 
-                    MemoryPool& _pool 
-                    ) : pool(_pool), 
-                        inputBuffer(pool, bufferSize), 
-                        outputBuffer(pool, bufferSize), 
-                        state(DECOMPRESS_HEADER), 
-                        outputBufferPtr(nullptr), 
-                        outputBufferLength(0), 
-                        remainingLength(0), 
-                        inputBufferPtr(nullptr), 
-                        inputBufferPtrEnd(nullptr), 
-                        bytesReturned(0) { 
-    input.reset(inStream.release()); 
-  } 
- 
-  bool BlockDecompressionStream::Next(const void** data, int*size) { 
-    // if the user pushed back, return them the partial buffer 
-    if (outputBufferLength) { 
-      *data = outputBufferPtr; 
-      *size = static_cast<int>(outputBufferLength); 
-      outputBufferPtr += outputBufferLength; 
-      bytesReturned += static_cast<off_t>(outputBufferLength); 
-      outputBufferLength = 0; 
-      return true; 
-    } 
-    if (state == DECOMPRESS_HEADER || remainingLength == 0) { 
-      readHeader(); 
-    } 
-    if (state == DECOMPRESS_EOF) { 
-      return false; 
-    } 
-    if (inputBufferPtr == inputBufferPtrEnd) { 
-      readBuffer(true); 
-    } 
- 
-    size_t availSize = 
-      std::min(static_cast<size_t>(inputBufferPtrEnd - inputBufferPtr), 
-               remainingLength); 
-    if (state == DECOMPRESS_ORIGINAL) { 
-      *data = inputBufferPtr; 
-      *size = static_cast<int>(availSize); 
-      outputBufferPtr = inputBufferPtr + availSize; 
-      outputBufferLength = 0; 
-      inputBufferPtr += availSize; 
-      remainingLength -= availSize; 
-    } else if (state == DECOMPRESS_START) { 
-      // Get contiguous bytes of compressed block. 
-      const char *compressed = inputBufferPtr; 
-      if (remainingLength == availSize) { 
-          inputBufferPtr += availSize; 
-      } else { 
-        // Did not read enough from input. 
-        if (inputBuffer.capacity() < remainingLength) { 
-          inputBuffer.resize(remainingLength); 
-        } 
-        ::memcpy(inputBuffer.data(), inputBufferPtr, availSize); 
-        inputBufferPtr += availSize; 
-        compressed = inputBuffer.data(); 
- 
-        for (size_t pos = availSize; pos < remainingLength; ) { 
-          readBuffer(true); 
-          size_t avail = 
-              std::min(static_cast<size_t>(inputBufferPtrEnd - 
-                                           inputBufferPtr), 
-                       remainingLength - pos); 
-          ::memcpy(inputBuffer.data() + pos, inputBufferPtr, avail); 
-          pos += avail; 
-          inputBufferPtr += avail; 
-        } 
-      } 
- 
-      outputBufferLength = decompress(compressed, remainingLength, 
-                                      outputBuffer.data(), 
-                                      outputBuffer.capacity()); 
- 
-      remainingLength = 0; 
-      state = DECOMPRESS_HEADER; 
-      *data = outputBuffer.data(); 
-      *size = static_cast<int>(outputBufferLength); 
-      outputBufferPtr = outputBuffer.data() + outputBufferLength; 
-      outputBufferLength = 0; 
-    } 
- 
-    bytesReturned += *size; 
-    return true; 
-  } 
- 
-  void BlockDecompressionStream::BackUp(int count) { 
-    if (outputBufferPtr == nullptr || outputBufferLength != 0) { 
-      throw std::logic_error("Backup without previous Next in "+getName()); 
-    } 
-    outputBufferPtr -= static_cast<size_t>(count); 
-    outputBufferLength = static_cast<size_t>(count); 
-    bytesReturned -= count; 
-  } 
- 
-  bool BlockDecompressionStream::Skip(int count) { 
-    bytesReturned += count; 
-    // this is a stupid implementation for now. 
-    // should skip entire blocks without decompressing 
-    while (count > 0) { 
-      const void *ptr; 
-      int len; 
-      if (!Next(&ptr, &len)) { 
-        return false; 
-      } 
-      if (len > count) { 
-        BackUp(len - count); 
-        count = 0; 
-      } else { 
-        count -= len; 
-      } 
-    } 
-    return true; 
-  } 
- 
-  int64_t BlockDecompressionStream::ByteCount() const { 
-    return bytesReturned; 
-  } 
- 
-  void BlockDecompressionStream::seek(PositionProvider& position) { 
-    // clear state to force seek to read from the right position 
-    state = DECOMPRESS_HEADER; 
-    outputBufferPtr = nullptr; 
-    outputBufferLength = 0; 
-    remainingLength = 0; 
-    inputBufferPtr = nullptr; 
-    inputBufferPtrEnd = nullptr; 
- 
-    input->seek(position); 
-    if (!Skip(static_cast<int>(position.next()))) { 
-      throw ParseError("Bad skip in " + getName()); 
-    } 
-  } 
- 
-  class SnappyDecompressionStream: public BlockDecompressionStream { 
-  public: 
-    SnappyDecompressionStream(std::unique_ptr<SeekableInputStream> inStream, 
-                              size_t blockSize, 
-                              MemoryPool& pool 
-                              ): BlockDecompressionStream 
-                                 (std::move(inStream), 
-                                  blockSize, 
-                                  pool) { 
-      // PASS 
-    } 
- 
-    std::string getName() const override { 
-      std::ostringstream result; 
-      result << "snappy(" << getStreamName() << ")"; 
-      return result.str(); 
-    } 
- 
-  protected: 
-    virtual uint64_t decompress(const char *input, uint64_t length, 
-                                char *output, size_t maxOutputLength 
-                                ) override; 
-  }; 
- 
-  uint64_t SnappyDecompressionStream::decompress(const char *input, 
-                                                 uint64_t length, 
-                                                 char *output, 
-                                                 size_t maxOutputLength) { 
-    size_t outLength; 
-    if (!snappy::GetUncompressedLength(input, length, &outLength)) { 
-      throw ParseError("SnappyDecompressionStream choked on corrupt input"); 
-    } 
- 
-    if (outLength > maxOutputLength) { 
-      throw std::logic_error("Snappy length exceeds block size"); 
-    } 
- 
-    if (!snappy::RawUncompress(input, length, output)) { 
-      throw ParseError("SnappyDecompressionStream choked on corrupt input"); 
-    } 
-    return outLength; 
-  } 
- 
-  class LzoDecompressionStream: public BlockDecompressionStream { 
-  public: 
-    LzoDecompressionStream(std::unique_ptr<SeekableInputStream> inStream, 
-                           size_t blockSize, 
-                           MemoryPool& pool 
-                           ): BlockDecompressionStream 
-                              (std::move(inStream), 
-                               blockSize, 
-                               pool) { 
-      // PASS 
-    } 
- 
-    std::string getName() const override { 
-      std::ostringstream result; 
-      result << "lzo(" << getStreamName() << ")"; 
-      return result.str(); 
-    } 
- 
-  protected: 
-    virtual uint64_t decompress(const char *input, uint64_t length, 
-                                char *output, size_t maxOutputLength 
-                                ) override; 
-  }; 
- 
-  uint64_t LzoDecompressionStream::decompress(const char *input, 
-                                              uint64_t length, 
-                                              char *output, 
-                                              size_t maxOutputLength) { 
-    return lzoDecompress(input, input + length, output, 
-                         output + maxOutputLength); 
-  } 
- 
-  class Lz4DecompressionStream: public BlockDecompressionStream { 
-  public: 
-    Lz4DecompressionStream(std::unique_ptr<SeekableInputStream> inStream, 
-                           size_t blockSize, 
-                           MemoryPool& pool 
-                           ): BlockDecompressionStream 
-                              (std::move(inStream), 
-                               blockSize, 
-                               pool) { 
-      // PASS 
-    } 
- 
-    std::string getName() const override { 
-      std::ostringstream result; 
-      result << "lz4(" << getStreamName() << ")"; 
-      return result.str(); 
-    } 
- 
-  protected: 
-    virtual uint64_t decompress(const char *input, uint64_t length, 
-                                char *output, size_t maxOutputLength 
-                                ) override; 
-  }; 
- 
-  uint64_t Lz4DecompressionStream::decompress(const char *input, 
-                                              uint64_t length, 
-                                              char *output, 
-                                              size_t maxOutputLength) { 
-    int result = LZ4_decompress_safe(input, output, static_cast<int>(length), 
-                                     static_cast<int>(maxOutputLength)); 
-    if (result < 0) { 
-      throw ParseError(getName() + " - failed to decompress"); 
-    } 
-    return static_cast<uint64_t>(result); 
-  } 
- 
-  /** 
-   * Block compression base class 
-   */ 
-  class BlockCompressionStream: public CompressionStreamBase { 
-  public: 
-    BlockCompressionStream(OutputStream * outStream, 
-                           int compressionLevel, 
-                           uint64_t capacity, 
-                           uint64_t blockSize, 
-                           MemoryPool& pool) 
-                           : CompressionStreamBase(outStream, 
-                                                   compressionLevel, 
-                                                   capacity, 
-                                                   blockSize, 
-                                                   pool) 
-                           , compressorBuffer(pool) { 
-      // PASS 
-    } 
- 
-    virtual bool Next(void** data, int*size) override; 
-    virtual std::string getName() const override = 0; 
- 
-  protected: 
-    // compresses a block and returns the compressed size 
-    virtual uint64_t doBlockCompression() = 0; 
- 
-    // return maximum possible compression size for allocating space for 
-    // compressorBuffer below 
-    virtual uint64_t estimateMaxCompressionSize() = 0; 
- 
-    // should allocate max possible compressed size 
-    DataBuffer<unsigned char> compressorBuffer; 
-  }; 
- 
-  bool BlockCompressionStream::Next(void** data, int*size) { 
-    if (bufferSize != 0) { 
-      ensureHeader(); 
- 
-      // perform compression 
-      size_t totalCompressedSize = doBlockCompression(); 
- 
-      const unsigned char * dataToWrite = nullptr; 
-      int totalSizeToWrite = 0; 
-      char * header = outputBuffer + outputPosition - 3; 
- 
-      if (totalCompressedSize >= static_cast<size_t>(bufferSize)) { 
-        writeHeader(header, static_cast<size_t>(bufferSize), true); 
-        dataToWrite = rawInputBuffer.data(); 
-        totalSizeToWrite = bufferSize; 
-      } else { 
-        writeHeader(header, totalCompressedSize, false); 
-        dataToWrite = compressorBuffer.data(); 
-        totalSizeToWrite = static_cast<int>(totalCompressedSize); 
-      } 
- 
-      char * dst = header + 3; 
-      while (totalSizeToWrite > 0) { 
-        if (outputPosition == outputSize) { 
-          if (!BufferedOutputStream::Next(reinterpret_cast<void **>(&outputBuffer), 
-                                          &outputSize)) { 
-            throw std::logic_error( 
-              "Failed to get next output buffer from output stream."); 
-          } 
-          outputPosition = 0; 
-          dst = outputBuffer; 
-        } else if (outputPosition > outputSize) { 
-          // this will unlikely happen, but we have seen a few on zstd v1.1.0 
-          throw std::logic_error("Write to an out-of-bound place!"); 
-        } 
- 
-        int sizeToWrite = std::min(totalSizeToWrite, outputSize - outputPosition); 
-        std::memcpy(dst, dataToWrite, static_cast<size_t>(sizeToWrite)); 
- 
-        outputPosition += sizeToWrite; 
-        dataToWrite += sizeToWrite; 
-        totalSizeToWrite -= sizeToWrite; 
-        dst += sizeToWrite; 
-      } 
-    } 
- 
-    *data = rawInputBuffer.data(); 
-    *size = static_cast<int>(rawInputBuffer.size()); 
-    bufferSize = *size; 
-    compressorBuffer.resize(estimateMaxCompressionSize()); 
- 
-    return true; 
-  } 
- 
-  /** 
-   * ZSTD block compression 
-   */ 
-  class ZSTDCompressionStream: public BlockCompressionStream { 
-  public: 
-    ZSTDCompressionStream(OutputStream * outStream, 
-                          int compressionLevel, 
-                          uint64_t capacity, 
-                          uint64_t blockSize, 
-                          MemoryPool& pool) 
-                          : BlockCompressionStream(outStream, 
-                                                   compressionLevel, 
-                                                   capacity, 
-                                                   blockSize, 
-                                                   pool) { 
+
+    if (deflateInit2(&strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY)
+        != Z_OK) {
+      throw std::runtime_error("Error while calling deflateInit2() for zlib.");
+    }
+  }
+
+  void ZlibCompressionStream::end() {
+    (void)deflateEnd(&strm);
+  }
+
+DIAGNOSTIC_PUSH
+
+  enum DecompressState { DECOMPRESS_HEADER,
+                         DECOMPRESS_START,
+                         DECOMPRESS_CONTINUE,
+                         DECOMPRESS_ORIGINAL,
+                         DECOMPRESS_EOF};
+
+  class ZlibDecompressionStream: public SeekableInputStream {
+  public:
+    ZlibDecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
+                            size_t blockSize,
+                            MemoryPool& pool);
+    virtual ~ZlibDecompressionStream() override;
+    virtual bool Next(const void** data, int*size) override;
+    virtual void BackUp(int count) override;
+    virtual bool Skip(int count) override;
+    virtual int64_t ByteCount() const override;
+    virtual void seek(PositionProvider& position) override;
+    virtual std::string getName() const override;
+
+  private:
+    void readBuffer(bool failOnEof) {
+      int length;
+      if (!input->Next(reinterpret_cast<const void**>(&inputBuffer),
+                       &length)) {
+        if (failOnEof) {
+          throw ParseError("Read past EOF in "
+                           "ZlibDecompressionStream::readBuffer");
+        }
+        state = DECOMPRESS_EOF;
+        inputBuffer = nullptr;
+        inputBufferEnd = nullptr;
+      } else {
+        inputBufferEnd = inputBuffer + length;
+      }
+    }
+
+    uint32_t readByte(bool failOnEof) {
+      if (inputBuffer == inputBufferEnd) {
+        readBuffer(failOnEof);
+        if (state == DECOMPRESS_EOF) {
+          return 0;
+        }
+      }
+      return static_cast<unsigned char>(*(inputBuffer++));
+    }
+
+    void readHeader() {
+      uint32_t header = readByte(false);
+      if (state != DECOMPRESS_EOF) {
+        header |= readByte(true) << 8;
+        header |= readByte(true) << 16;
+        if (header & 1) {
+          state = DECOMPRESS_ORIGINAL;
+        } else {
+          state = DECOMPRESS_START;
+        }
+        remainingLength = header >> 1;
+      } else {
+        remainingLength = 0;
+      }
+    }
+
+    MemoryPool& pool;
+    const size_t blockSize;
+    std::unique_ptr<SeekableInputStream> input;
+    z_stream zstream;
+    DataBuffer<char> buffer;
+
+    // the current state
+    DecompressState state;
+
+    // the start of the current buffer
+    // This pointer is not owned by us. It is either owned by zstream or
+    // the underlying stream.
+    const char* outputBuffer;
+    // the size of the current buffer
+    size_t outputBufferLength;
+    // the size of the current chunk
+    size_t remainingLength;
+
+    // the last buffer returned from the input
+    const char *inputBuffer;
+    const char *inputBufferEnd;
+
+    // roughly the number of bytes returned
+    off_t bytesReturned;
+  };
+
+DIAGNOSTIC_PUSH
+
+#if defined(__GNUC__) || defined(__clang__)
+  DIAGNOSTIC_IGNORE("-Wold-style-cast")
+#endif
+
+  ZlibDecompressionStream::ZlibDecompressionStream
+                   (std::unique_ptr<SeekableInputStream> inStream,
+                    size_t _blockSize,
+                    MemoryPool& _pool
+                    ): pool(_pool),
+                       blockSize(_blockSize),
+                       buffer(pool, _blockSize) {
+    input.reset(inStream.release());
+    zstream.next_in = nullptr;
+    zstream.avail_in = 0;
+    zstream.zalloc = nullptr;
+    zstream.zfree = nullptr;
+    zstream.opaque = nullptr;
+    zstream.next_out = reinterpret_cast<Bytef*>(buffer.data());
+    zstream.avail_out = static_cast<uInt>(blockSize);
+    int64_t result = inflateInit2(&zstream, -15);
+    switch (result) {
+    case Z_OK:
+      break;
+    case Z_MEM_ERROR:
+      throw std::logic_error("Memory error from inflateInit2");
+    case Z_VERSION_ERROR:
+      throw std::logic_error("Version error from inflateInit2");
+    case Z_STREAM_ERROR:
+      throw std::logic_error("Stream error from inflateInit2");
+    default:
+      throw std::logic_error("Unknown error from inflateInit2");
+    }
+    outputBuffer = nullptr;
+    outputBufferLength = 0;
+    remainingLength = 0;
+    state = DECOMPRESS_HEADER;
+    inputBuffer = nullptr;
+    inputBufferEnd = nullptr;
+    bytesReturned = 0;
+  }
+
+DIAGNOSTIC_POP
+
+  ZlibDecompressionStream::~ZlibDecompressionStream() {
+    int64_t result = inflateEnd(&zstream);
+    if (result != Z_OK) {
+      // really can't throw in destructors
+      std::cout << "Error in ~ZlibDecompressionStream() " << result << "\n";
+    }
+  }
+
+  bool ZlibDecompressionStream::Next(const void** data, int*size) {
+    // if the user pushed back, return them the partial buffer
+    if (outputBufferLength) {
+      *data = outputBuffer;
+      *size = static_cast<int>(outputBufferLength);
+      outputBuffer += outputBufferLength;
+      outputBufferLength = 0;
+      return true;
+    }
+    if (state == DECOMPRESS_HEADER || remainingLength == 0) {
+      readHeader();
+    }
+    if (state == DECOMPRESS_EOF) {
+      return false;
+    }
+    if (inputBuffer == inputBufferEnd) {
+      readBuffer(true);
+    }
+    size_t availSize =
+      std::min(static_cast<size_t>(inputBufferEnd - inputBuffer),
+               remainingLength);
+    if (state == DECOMPRESS_ORIGINAL) {
+      *data = inputBuffer;
+      *size = static_cast<int>(availSize);
+      outputBuffer = inputBuffer + availSize;
+      outputBufferLength = 0;
+    } else if (state == DECOMPRESS_START) {
+      zstream.next_in =
+        reinterpret_cast<Bytef*>(const_cast<char*>(inputBuffer));
+      zstream.avail_in = static_cast<uInt>(availSize);
+      outputBuffer = buffer.data();
+      zstream.next_out =
+        reinterpret_cast<Bytef*>(const_cast<char*>(outputBuffer));
+      zstream.avail_out = static_cast<uInt>(blockSize);
+      if (inflateReset(&zstream) != Z_OK) {
+        throw std::logic_error("Bad inflateReset in "
+                               "ZlibDecompressionStream::Next");
+      }
+      int64_t result;
+      do {
+        result = inflate(&zstream, availSize == remainingLength ? Z_FINISH :
+                         Z_SYNC_FLUSH);
+        switch (result) {
+        case Z_OK:
+          remainingLength -= availSize;
+          inputBuffer += availSize;
+          readBuffer(true);
+          availSize =
+            std::min(static_cast<size_t>(inputBufferEnd - inputBuffer),
+                     remainingLength);
+          zstream.next_in =
+            reinterpret_cast<Bytef*>(const_cast<char*>(inputBuffer));
+          zstream.avail_in = static_cast<uInt>(availSize);
+          break;
+        case Z_STREAM_END:
+          break;
+        case Z_BUF_ERROR:
+          throw std::logic_error("Buffer error in "
+                                 "ZlibDecompressionStream::Next");
+        case Z_DATA_ERROR:
+          throw std::logic_error("Data error in "
+                                 "ZlibDecompressionStream::Next");
+        case Z_STREAM_ERROR:
+          throw std::logic_error("Stream error in "
+                                 "ZlibDecompressionStream::Next");
+        default:
+          throw std::logic_error("Unknown error in "
+                                 "ZlibDecompressionStream::Next");
+        }
+      } while (result != Z_STREAM_END);
+      *size = static_cast<int>(blockSize - zstream.avail_out);
+      *data = outputBuffer;
+      outputBufferLength = 0;
+      outputBuffer += *size;
+    } else {
+      throw std::logic_error("Unknown compression state in "
+                             "ZlibDecompressionStream::Next");
+    }
+    inputBuffer += availSize;
+    remainingLength -= availSize;
+    bytesReturned += *size;
+    return true;
+  }
+
+  void ZlibDecompressionStream::BackUp(int count) {
+    if (outputBuffer == nullptr || outputBufferLength != 0) {
+      throw std::logic_error("Backup without previous Next in "
+                             "ZlibDecompressionStream");
+    }
+    outputBuffer -= static_cast<size_t>(count);
+    outputBufferLength = static_cast<size_t>(count);
+    bytesReturned -= count;
+  }
+
+  bool ZlibDecompressionStream::Skip(int count) {
+    bytesReturned += count;
+    // this is a stupid implementation for now.
+    // should skip entire blocks without decompressing
+    while (count > 0) {
+      const void *ptr;
+      int len;
+      if (!Next(&ptr, &len)) {
+        return false;
+      }
+      if (len > count) {
+        BackUp(len - count);
+        count = 0;
+      } else {
+        count -= len;
+      }
+    }
+    return true;
+  }
+
+  int64_t ZlibDecompressionStream::ByteCount() const {
+    return bytesReturned;
+  }
+
+  void ZlibDecompressionStream::seek(PositionProvider& position) {
+    // clear state to force seek to read from the right position
+    state = DECOMPRESS_HEADER;
+    outputBuffer = nullptr;
+    outputBufferLength = 0;
+    remainingLength = 0;
+    inputBuffer = nullptr;
+    inputBufferEnd = nullptr;
+
+    input->seek(position);
+    bytesReturned = static_cast<off_t>(input->ByteCount());
+    if (!Skip(static_cast<int>(position.next()))) {
+      throw ParseError("Bad skip in ZlibDecompressionStream::seek");
+    }
+  }
+
+  std::string ZlibDecompressionStream::getName() const {
+    std::ostringstream result;
+    result << "zlib(" << input->getName() << ")";
+    return result.str();
+  }
+
+  class BlockDecompressionStream: public SeekableInputStream {
+  public:
+    BlockDecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
+                             size_t blockSize,
+                             MemoryPool& pool);
+
+    virtual ~BlockDecompressionStream() override {}
+    virtual bool Next(const void** data, int*size) override;
+    virtual void BackUp(int count) override;
+    virtual bool Skip(int count) override;
+    virtual int64_t ByteCount() const override;
+    virtual void seek(PositionProvider& position) override;
+    virtual std::string getName() const override = 0;
+
+  protected:
+    virtual uint64_t decompress(const char *input, uint64_t length,
+                                char *output, size_t maxOutputLength) = 0;
+
+    std::string getStreamName() const {
+      return input->getName();
+    }
+
+  private:
+    void readBuffer(bool failOnEof) {
+      int length;
+      if (!input->Next(reinterpret_cast<const void**>(&inputBufferPtr),
+                       &length)) {
+        if (failOnEof) {
+          throw ParseError(getName() + "read past EOF");
+        }
+        state = DECOMPRESS_EOF;
+        inputBufferPtr = nullptr;
+        inputBufferPtrEnd = nullptr;
+      } else {
+        inputBufferPtrEnd = inputBufferPtr + length;
+      }
+    }
+
+    uint32_t readByte(bool failOnEof) {
+      if (inputBufferPtr == inputBufferPtrEnd) {
+        readBuffer(failOnEof);
+        if (state == DECOMPRESS_EOF) {
+          return 0;
+        }
+      }
+      return static_cast<unsigned char>(*(inputBufferPtr++));
+    }
+
+    void readHeader() {
+      uint32_t header = readByte(false);
+      if (state != DECOMPRESS_EOF) {
+        header |= readByte(true) << 8;
+        header |= readByte(true) << 16;
+        if (header & 1) {
+          state = DECOMPRESS_ORIGINAL;
+        } else {
+          state = DECOMPRESS_START;
+        }
+        remainingLength = header >> 1;
+      } else {
+        remainingLength = 0;
+      }
+    }
+
+    std::unique_ptr<SeekableInputStream> input;
+    MemoryPool& pool;
+
+    // may need to stitch together multiple input buffers;
+    // to give snappy a contiguous block
+    DataBuffer<char> inputBuffer;
+
+    // uncompressed output
+    DataBuffer<char> outputBuffer;
+
+    // the current state
+    DecompressState state;
+
+    // the start of the current output buffer
+    const char* outputBufferPtr;
+    // the size of the current output buffer
+    size_t outputBufferLength;
+
+    // the size of the current chunk
+    size_t remainingLength;
+
+    // the last buffer returned from the input
+    const char *inputBufferPtr;
+    const char *inputBufferPtrEnd;
+
+    // bytes returned by this stream
+    off_t bytesReturned;
+  };
+
+  BlockDecompressionStream::BlockDecompressionStream
+                   (std::unique_ptr<SeekableInputStream> inStream,
+                    size_t bufferSize,
+                    MemoryPool& _pool
+                    ) : pool(_pool),
+                        inputBuffer(pool, bufferSize),
+                        outputBuffer(pool, bufferSize),
+                        state(DECOMPRESS_HEADER),
+                        outputBufferPtr(nullptr),
+                        outputBufferLength(0),
+                        remainingLength(0),
+                        inputBufferPtr(nullptr),
+                        inputBufferPtrEnd(nullptr),
+                        bytesReturned(0) {
+    input.reset(inStream.release());
+  }
+
+  bool BlockDecompressionStream::Next(const void** data, int*size) {
+    // if the user pushed back, return them the partial buffer
+    if (outputBufferLength) {
+      *data = outputBufferPtr;
+      *size = static_cast<int>(outputBufferLength);
+      outputBufferPtr += outputBufferLength;
+      bytesReturned += static_cast<off_t>(outputBufferLength);
+      outputBufferLength = 0;
+      return true;
+    }
+    if (state == DECOMPRESS_HEADER || remainingLength == 0) {
+      readHeader();
+    }
+    if (state == DECOMPRESS_EOF) {
+      return false;
+    }
+    if (inputBufferPtr == inputBufferPtrEnd) {
+      readBuffer(true);
+    }
+
+    size_t availSize =
+      std::min(static_cast<size_t>(inputBufferPtrEnd - inputBufferPtr),
+               remainingLength);
+    if (state == DECOMPRESS_ORIGINAL) {
+      *data = inputBufferPtr;
+      *size = static_cast<int>(availSize);
+      outputBufferPtr = inputBufferPtr + availSize;
+      outputBufferLength = 0;
+      inputBufferPtr += availSize;
+      remainingLength -= availSize;
+    } else if (state == DECOMPRESS_START) {
+      // Get contiguous bytes of compressed block.
+      const char *compressed = inputBufferPtr;
+      if (remainingLength == availSize) {
+          inputBufferPtr += availSize;
+      } else {
+        // Did not read enough from input.
+        if (inputBuffer.capacity() < remainingLength) {
+          inputBuffer.resize(remainingLength);
+        }
+        ::memcpy(inputBuffer.data(), inputBufferPtr, availSize);
+        inputBufferPtr += availSize;
+        compressed = inputBuffer.data();
+
+        for (size_t pos = availSize; pos < remainingLength; ) {
+          readBuffer(true);
+          size_t avail =
+              std::min(static_cast<size_t>(inputBufferPtrEnd -
+                                           inputBufferPtr),
+                       remainingLength - pos);
+          ::memcpy(inputBuffer.data() + pos, inputBufferPtr, avail);
+          pos += avail;
+          inputBufferPtr += avail;
+        }
+      }
+
+      outputBufferLength = decompress(compressed, remainingLength,
+                                      outputBuffer.data(),
+                                      outputBuffer.capacity());
+
+      remainingLength = 0;
+      state = DECOMPRESS_HEADER;
+      *data = outputBuffer.data();
+      *size = static_cast<int>(outputBufferLength);
+      outputBufferPtr = outputBuffer.data() + outputBufferLength;
+      outputBufferLength = 0;
+    }
+
+    bytesReturned += *size;
+    return true;
+  }
+
+  void BlockDecompressionStream::BackUp(int count) {
+    if (outputBufferPtr == nullptr || outputBufferLength != 0) {
+      throw std::logic_error("Backup without previous Next in "+getName());
+    }
+    outputBufferPtr -= static_cast<size_t>(count);
+    outputBufferLength = static_cast<size_t>(count);
+    bytesReturned -= count;
+  }
+
+  bool BlockDecompressionStream::Skip(int count) {
+    bytesReturned += count;
+    // this is a stupid implementation for now.
+    // should skip entire blocks without decompressing
+    while (count > 0) {
+      const void *ptr;
+      int len;
+      if (!Next(&ptr, &len)) {
+        return false;
+      }
+      if (len > count) {
+        BackUp(len - count);
+        count = 0;
+      } else {
+        count -= len;
+      }
+    }
+    return true;
+  }
+
+  int64_t BlockDecompressionStream::ByteCount() const {
+    return bytesReturned;
+  }
+
+  void BlockDecompressionStream::seek(PositionProvider& position) {
+    // clear state to force seek to read from the right position
+    state = DECOMPRESS_HEADER;
+    outputBufferPtr = nullptr;
+    outputBufferLength = 0;
+    remainingLength = 0;
+    inputBufferPtr = nullptr;
+    inputBufferPtrEnd = nullptr;
+
+    input->seek(position);
+    if (!Skip(static_cast<int>(position.next()))) {
+      throw ParseError("Bad skip in " + getName());
+    }
+  }
+
+  class SnappyDecompressionStream: public BlockDecompressionStream {
+  public:
+    SnappyDecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
+                              size_t blockSize,
+                              MemoryPool& pool
+                              ): BlockDecompressionStream
+                                 (std::move(inStream),
+                                  blockSize,
+                                  pool) {
+      // PASS
+    }
+
+    std::string getName() const override {
+      std::ostringstream result;
+      result << "snappy(" << getStreamName() << ")";
+      return result.str();
+    }
+
+  protected:
+    virtual uint64_t decompress(const char *input, uint64_t length,
+                                char *output, size_t maxOutputLength
+                                ) override;
+  };
+
+  uint64_t SnappyDecompressionStream::decompress(const char *input,
+                                                 uint64_t length,
+                                                 char *output,
+                                                 size_t maxOutputLength) {
+    size_t outLength;
+    if (!snappy::GetUncompressedLength(input, length, &outLength)) {
+      throw ParseError("SnappyDecompressionStream choked on corrupt input");
+    }
+
+    if (outLength > maxOutputLength) {
+      throw std::logic_error("Snappy length exceeds block size");
+    }
+
+    if (!snappy::RawUncompress(input, length, output)) {
+      throw ParseError("SnappyDecompressionStream choked on corrupt input");
+    }
+    return outLength;
+  }
+
+  class LzoDecompressionStream: public BlockDecompressionStream {
+  public:
+    LzoDecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
+                           size_t blockSize,
+                           MemoryPool& pool
+                           ): BlockDecompressionStream
+                              (std::move(inStream),
+                               blockSize,
+                               pool) {
+      // PASS
+    }
+
+    std::string getName() const override {
+      std::ostringstream result;
+      result << "lzo(" << getStreamName() << ")";
+      return result.str();
+    }
+
+  protected:
+    virtual uint64_t decompress(const char *input, uint64_t length,
+                                char *output, size_t maxOutputLength
+                                ) override;
+  };
+
+  uint64_t LzoDecompressionStream::decompress(const char *input,
+                                              uint64_t length,
+                                              char *output,
+                                              size_t maxOutputLength) {
+    return lzoDecompress(input, input + length, output,
+                         output + maxOutputLength);
+  }
+
+  class Lz4DecompressionStream: public BlockDecompressionStream {
+  public:
+    Lz4DecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
+                           size_t blockSize,
+                           MemoryPool& pool
+                           ): BlockDecompressionStream
+                              (std::move(inStream),
+                               blockSize,
+                               pool) {
+      // PASS
+    }
+
+    std::string getName() const override {
+      std::ostringstream result;
+      result << "lz4(" << getStreamName() << ")";
+      return result.str();
+    }
+
+  protected:
+    virtual uint64_t decompress(const char *input, uint64_t length,
+                                char *output, size_t maxOutputLength
+                                ) override;
+  };
+
+  uint64_t Lz4DecompressionStream::decompress(const char *input,
+                                              uint64_t length,
+                                              char *output,
+                                              size_t maxOutputLength) {
+    int result = LZ4_decompress_safe(input, output, static_cast<int>(length),
+                                     static_cast<int>(maxOutputLength));
+    if (result < 0) {
+      throw ParseError(getName() + " - failed to decompress");
+    }
+    return static_cast<uint64_t>(result);
+  }
+
+  /**
+   * Block compression base class
+   */
+  class BlockCompressionStream: public CompressionStreamBase {
+  public:
+    BlockCompressionStream(OutputStream * outStream,
+                           int compressionLevel,
+                           uint64_t capacity,
+                           uint64_t blockSize,
+                           MemoryPool& pool)
+                           : CompressionStreamBase(outStream,
+                                                   compressionLevel,
+                                                   capacity,
+                                                   blockSize,
+                                                   pool)
+                           , compressorBuffer(pool) {
+      // PASS
+    }
+
+    virtual bool Next(void** data, int*size) override;
+    virtual std::string getName() const override = 0;
+
+  protected:
+    // compresses a block and returns the compressed size
+    virtual uint64_t doBlockCompression() = 0;
+
+    // return maximum possible compression size for allocating space for
+    // compressorBuffer below
+    virtual uint64_t estimateMaxCompressionSize() = 0;
+
+    // should allocate max possible compressed size
+    DataBuffer<unsigned char> compressorBuffer;
+  };
+
+  bool BlockCompressionStream::Next(void** data, int*size) {
+    if (bufferSize != 0) {
+      ensureHeader();
+
+      // perform compression
+      size_t totalCompressedSize = doBlockCompression();
+
+      const unsigned char * dataToWrite = nullptr;
+      int totalSizeToWrite = 0;
+      char * header = outputBuffer + outputPosition - 3;
+
+      if (totalCompressedSize >= static_cast<size_t>(bufferSize)) {
+        writeHeader(header, static_cast<size_t>(bufferSize), true);
+        dataToWrite = rawInputBuffer.data();
+        totalSizeToWrite = bufferSize;
+      } else {
+        writeHeader(header, totalCompressedSize, false);
+        dataToWrite = compressorBuffer.data();
+        totalSizeToWrite = static_cast<int>(totalCompressedSize);
+      }
+
+      char * dst = header + 3;
+      while (totalSizeToWrite > 0) {
+        if (outputPosition == outputSize) {
+          if (!BufferedOutputStream::Next(reinterpret_cast<void **>(&outputBuffer),
+                                          &outputSize)) {
+            throw std::logic_error(
+              "Failed to get next output buffer from output stream.");
+          }
+          outputPosition = 0;
+          dst = outputBuffer;
+        } else if (outputPosition > outputSize) {
+          // this will unlikely happen, but we have seen a few on zstd v1.1.0
+          throw std::logic_error("Write to an out-of-bound place!");
+        }
+
+        int sizeToWrite = std::min(totalSizeToWrite, outputSize - outputPosition);
+        std::memcpy(dst, dataToWrite, static_cast<size_t>(sizeToWrite));
+
+        outputPosition += sizeToWrite;
+        dataToWrite += sizeToWrite;
+        totalSizeToWrite -= sizeToWrite;
+        dst += sizeToWrite;
+      }
+    }
+
+    *data = rawInputBuffer.data();
+    *size = static_cast<int>(rawInputBuffer.size());
+    bufferSize = *size;
+    compressorBuffer.resize(estimateMaxCompressionSize());
+
+    return true;
+  }
+
+  /**
+   * ZSTD block compression
+   */
+  class ZSTDCompressionStream: public BlockCompressionStream {
+  public:
+    ZSTDCompressionStream(OutputStream * outStream,
+                          int compressionLevel,
+                          uint64_t capacity,
+                          uint64_t blockSize,
+                          MemoryPool& pool)
+                          : BlockCompressionStream(outStream,
+                                                   compressionLevel,
+                                                   capacity,
+                                                   blockSize,
+                                                   pool) {
       this->init();
-    } 
- 
-    virtual std::string getName() const override { 
-      return "ZstdCompressionStream"; 
-    } 
+    }
+
+    virtual std::string getName() const override {
+      return "ZstdCompressionStream";
+    }
     
     virtual ~ZSTDCompressionStream() override {
       this->end();
     }
- 
-  protected: 
-    virtual uint64_t doBlockCompression() override; 
- 
-    virtual uint64_t estimateMaxCompressionSize() override { 
-      return ZSTD_compressBound(static_cast<size_t>(bufferSize)); 
-    } 
+
+  protected:
+    virtual uint64_t doBlockCompression() override;
+
+    virtual uint64_t estimateMaxCompressionSize() override {
+      return ZSTD_compressBound(static_cast<size_t>(bufferSize));
+    }
     
   private:
     void init();
     void end();
     ZSTD_CCtx *cctx;
-  }; 
- 
-  uint64_t ZSTDCompressionStream::doBlockCompression() { 
+  };
+
+  uint64_t ZSTDCompressionStream::doBlockCompression() {
     return ZSTD_compressCCtx(cctx,
                              compressorBuffer.data(),
                              compressorBuffer.size(),
                              rawInputBuffer.data(),
                              static_cast<size_t>(bufferSize),
                              level);
-  } 
+  }
   
 DIAGNOSTIC_PUSH
- 
+
 #if defined(__GNUC__) || defined(__clang__)
   DIAGNOSTIC_IGNORE("-Wold-style-cast")
 #endif
@@ -1086,53 +1086,53 @@ DIAGNOSTIC_PUSH
 
 DIAGNOSTIC_PUSH
 
-  /** 
-   * ZSTD block decompression 
-   */ 
-  class ZSTDDecompressionStream: public BlockDecompressionStream { 
-  public: 
-    ZSTDDecompressionStream(std::unique_ptr<SeekableInputStream> inStream, 
-                            size_t blockSize, 
-                            MemoryPool& pool) 
-                            : BlockDecompressionStream(std::move(inStream), 
-                                                       blockSize, 
-                                                       pool) { 
+  /**
+   * ZSTD block decompression
+   */
+  class ZSTDDecompressionStream: public BlockDecompressionStream {
+  public:
+    ZSTDDecompressionStream(std::unique_ptr<SeekableInputStream> inStream,
+                            size_t blockSize,
+                            MemoryPool& pool)
+                            : BlockDecompressionStream(std::move(inStream),
+                                                       blockSize,
+                                                       pool) {
       this->init();
-    } 
- 
+    }
+
     virtual ~ZSTDDecompressionStream() override {
       this->end();
     }
 
-    std::string getName() const override { 
-      std::ostringstream result; 
-      result << "zstd(" << getStreamName() << ")"; 
-      return result.str(); 
-    } 
- 
-  protected: 
-    virtual uint64_t decompress(const char *input, 
-                                uint64_t length, 
-                                char *output, 
-                                size_t maxOutputLength) override; 
+    std::string getName() const override {
+      std::ostringstream result;
+      result << "zstd(" << getStreamName() << ")";
+      return result.str();
+    }
+
+  protected:
+    virtual uint64_t decompress(const char *input,
+                                uint64_t length,
+                                char *output,
+                                size_t maxOutputLength) override;
 
   private:
     void init();
     void end();
     ZSTD_DCtx *dctx;
-  }; 
- 
-  uint64_t ZSTDDecompressionStream::decompress(const char *input, 
-                                               uint64_t length, 
-                                               char *output, 
-                                               size_t maxOutputLength) { 
+  };
+
+  uint64_t ZSTDDecompressionStream::decompress(const char *input,
+                                               uint64_t length,
+                                               char *output,
+                                               size_t maxOutputLength) {
     return static_cast<uint64_t>(ZSTD_decompressDCtx(dctx,
                                                      output,
                                                      maxOutputLength,
                                                      input,
                                                      length));
-  } 
- 
+  }
+
 DIAGNOSTIC_PUSH
 
 #if defined(__GNUC__) || defined(__clang__)
@@ -1155,71 +1155,71 @@ DIAGNOSTIC_PUSH
 
 DIAGNOSTIC_PUSH
 
-  std::unique_ptr<BufferedOutputStream> 
-     createCompressor( 
-                      CompressionKind kind, 
-                      OutputStream * outStream, 
-                      CompressionStrategy strategy, 
-                      uint64_t bufferCapacity, 
-                      uint64_t compressionBlockSize, 
-                      MemoryPool& pool) { 
-    switch (static_cast<int64_t>(kind)) { 
-    case CompressionKind_NONE: { 
-      return std::unique_ptr<BufferedOutputStream> 
-        (new BufferedOutputStream( 
-                pool, outStream, bufferCapacity, compressionBlockSize)); 
-    } 
-    case CompressionKind_ZLIB: { 
-      int level = (strategy == CompressionStrategy_SPEED) ? 
-              Z_BEST_SPEED + 1 : Z_DEFAULT_COMPRESSION; 
-      return std::unique_ptr<BufferedOutputStream> 
-        (new ZlibCompressionStream( 
-                outStream, level, bufferCapacity, compressionBlockSize, pool)); 
-    } 
-    case CompressionKind_ZSTD: { 
-      int level = (strategy == CompressionStrategy_SPEED) ? 
-              1 : ZSTD_CLEVEL_DEFAULT; 
-      return std::unique_ptr<BufferedOutputStream> 
-        (new ZSTDCompressionStream( 
-          outStream, level, bufferCapacity, compressionBlockSize, pool)); 
-    } 
-    case CompressionKind_SNAPPY: 
-    case CompressionKind_LZO: 
-    case CompressionKind_LZ4: 
-    default: 
-      throw NotImplementedYet("compression codec"); 
-    } 
-  } 
- 
-  std::unique_ptr<SeekableInputStream> 
-     createDecompressor(CompressionKind kind, 
-                        std::unique_ptr<SeekableInputStream> input, 
-                        uint64_t blockSize, 
-                        MemoryPool& pool) { 
-    switch (static_cast<int64_t>(kind)) { 
-    case CompressionKind_NONE: 
-      return REDUNDANT_MOVE(input); 
-    case CompressionKind_ZLIB: 
-      return std::unique_ptr<SeekableInputStream> 
-        (new ZlibDecompressionStream(std::move(input), blockSize, pool)); 
-    case CompressionKind_SNAPPY: 
-      return std::unique_ptr<SeekableInputStream> 
-        (new SnappyDecompressionStream(std::move(input), blockSize, pool)); 
-    case CompressionKind_LZO: 
-      return std::unique_ptr<SeekableInputStream> 
-        (new LzoDecompressionStream(std::move(input), blockSize, pool)); 
-    case CompressionKind_LZ4: 
-      return std::unique_ptr<SeekableInputStream> 
-        (new Lz4DecompressionStream(std::move(input), blockSize, pool)); 
-    case CompressionKind_ZSTD: 
-      return std::unique_ptr<SeekableInputStream> 
-        (new ZSTDDecompressionStream(std::move(input), blockSize, pool)); 
-    default: { 
-      std::ostringstream buffer; 
-      buffer << "Unknown compression codec " << kind; 
-      throw NotImplementedYet(buffer.str()); 
-    } 
-    } 
-  } 
- 
-} 
+  std::unique_ptr<BufferedOutputStream>
+     createCompressor(
+                      CompressionKind kind,
+                      OutputStream * outStream,
+                      CompressionStrategy strategy,
+                      uint64_t bufferCapacity,
+                      uint64_t compressionBlockSize,
+                      MemoryPool& pool) {
+    switch (static_cast<int64_t>(kind)) {
+    case CompressionKind_NONE: {
+      return std::unique_ptr<BufferedOutputStream>
+        (new BufferedOutputStream(
+                pool, outStream, bufferCapacity, compressionBlockSize));
+    }
+    case CompressionKind_ZLIB: {
+      int level = (strategy == CompressionStrategy_SPEED) ?
+              Z_BEST_SPEED + 1 : Z_DEFAULT_COMPRESSION;
+      return std::unique_ptr<BufferedOutputStream>
+        (new ZlibCompressionStream(
+                outStream, level, bufferCapacity, compressionBlockSize, pool));
+    }
+    case CompressionKind_ZSTD: {
+      int level = (strategy == CompressionStrategy_SPEED) ?
+              1 : ZSTD_CLEVEL_DEFAULT;
+      return std::unique_ptr<BufferedOutputStream>
+        (new ZSTDCompressionStream(
+          outStream, level, bufferCapacity, compressionBlockSize, pool));
+    }
+    case CompressionKind_SNAPPY:
+    case CompressionKind_LZO:
+    case CompressionKind_LZ4:
+    default:
+      throw NotImplementedYet("compression codec");
+    }
+  }
+
+  std::unique_ptr<SeekableInputStream>
+     createDecompressor(CompressionKind kind,
+                        std::unique_ptr<SeekableInputStream> input,
+                        uint64_t blockSize,
+                        MemoryPool& pool) {
+    switch (static_cast<int64_t>(kind)) {
+    case CompressionKind_NONE:
+      return REDUNDANT_MOVE(input);
+    case CompressionKind_ZLIB:
+      return std::unique_ptr<SeekableInputStream>
+        (new ZlibDecompressionStream(std::move(input), blockSize, pool));
+    case CompressionKind_SNAPPY:
+      return std::unique_ptr<SeekableInputStream>
+        (new SnappyDecompressionStream(std::move(input), blockSize, pool));
+    case CompressionKind_LZO:
+      return std::unique_ptr<SeekableInputStream>
+        (new LzoDecompressionStream(std::move(input), blockSize, pool));
+    case CompressionKind_LZ4:
+      return std::unique_ptr<SeekableInputStream>
+        (new Lz4DecompressionStream(std::move(input), blockSize, pool));
+    case CompressionKind_ZSTD:
+      return std::unique_ptr<SeekableInputStream>
+        (new ZSTDDecompressionStream(std::move(input), blockSize, pool));
+    default: {
+      std::ostringstream buffer;
+      buffer << "Unknown compression codec " << kind;
+      throw NotImplementedYet(buffer.str());
+    }
+    }
+  }
+
+}
diff --git a/contrib/libs/apache/orc/c++/src/Compression.hh b/contrib/libs/apache/orc/c++/src/Compression.hh
index 84e85bddaf..ff79377d83 100644
--- a/contrib/libs/apache/orc/c++/src/Compression.hh
+++ b/contrib/libs/apache/orc/c++/src/Compression.hh
@@ -1,58 +1,58 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_COMPRESSION_HH 
-#define ORC_COMPRESSION_HH 
- 
-#include "io/InputStream.hh" 
-#include "io/OutputStream.hh" 
- 
-namespace orc { 
- 
-  /** 
-   * Create a decompressor for the given compression kind. 
-   * @param kind the compression type to implement 
-   * @param input the input stream that is the underlying source 
-   * @param bufferSize the maximum size of the buffer 
-   * @param pool the memory pool 
-   */ 
-  std::unique_ptr<SeekableInputStream> 
-     createDecompressor(CompressionKind kind, 
-                        std::unique_ptr<SeekableInputStream> input, 
-                        uint64_t bufferSize, 
-                        MemoryPool& pool); 
- 
-  /** 
-   * Create a compressor for the given compression kind. 
-   * @param kind the compression type to implement 
-   * @param outStream the output stream that is the underlying target 
-   * @param strategy compression strategy 
-   * @param bufferCapacity compression stream buffer total capacity 
-   * @param compressionBlockSize compression buffer block size 
-   * @param pool the memory pool 
-   */ 
-  std::unique_ptr<BufferedOutputStream> 
-     createCompressor(CompressionKind kind, 
-                      OutputStream * outStream, 
-                      CompressionStrategy strategy, 
-                      uint64_t bufferCapacity, 
-                      uint64_t compressionBlockSize, 
-                      MemoryPool& pool); 
-} 
- 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_COMPRESSION_HH
+#define ORC_COMPRESSION_HH
+
+#include "io/InputStream.hh"
+#include "io/OutputStream.hh"
+
+namespace orc {
+
+  /**
+   * Create a decompressor for the given compression kind.
+   * @param kind the compression type to implement
+   * @param input the input stream that is the underlying source
+   * @param bufferSize the maximum size of the buffer
+   * @param pool the memory pool
+   */
+  std::unique_ptr<SeekableInputStream>
+     createDecompressor(CompressionKind kind,
+                        std::unique_ptr<SeekableInputStream> input,
+                        uint64_t bufferSize,
+                        MemoryPool& pool);
+
+  /**
+   * Create a compressor for the given compression kind.
+   * @param kind the compression type to implement
+   * @param outStream the output stream that is the underlying target
+   * @param strategy compression strategy
+   * @param bufferCapacity compression stream buffer total capacity
+   * @param compressionBlockSize compression buffer block size
+   * @param pool the memory pool
+   */
+  std::unique_ptr<BufferedOutputStream>
+     createCompressor(CompressionKind kind,
+                      OutputStream * outStream,
+                      CompressionStrategy strategy,
+                      uint64_t bufferCapacity,
+                      uint64_t compressionBlockSize,
+                      MemoryPool& pool);
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/Exceptions.cc b/contrib/libs/apache/orc/c++/src/Exceptions.cc
index f721c05a88..2077b27df4 100644
--- a/contrib/libs/apache/orc/c++/src/Exceptions.cc
+++ b/contrib/libs/apache/orc/c++/src/Exceptions.cc
@@ -1,78 +1,78 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/Exceptions.hh" 
- 
-namespace orc { 
- 
-  NotImplementedYet::NotImplementedYet(const std::string& what_arg 
-                                       ) : logic_error(what_arg) { 
-    // PASS 
-  } 
- 
-  NotImplementedYet::NotImplementedYet(const char* what_arg 
-                                       ) :logic_error(what_arg) { 
-    // PASS 
-  } 
- 
-  NotImplementedYet::NotImplementedYet(const NotImplementedYet& error 
-                                       ): logic_error(error) { 
-    // PASS 
-  } 
- 
-  NotImplementedYet::~NotImplementedYet() ORC_NOEXCEPT { 
-    // PASS 
-  } 
- 
-  ParseError::ParseError(const std::string& what_arg 
-                         ): runtime_error(what_arg) { 
-    // PASS 
-  } 
- 
-  ParseError::ParseError(const char* what_arg 
-                         ): runtime_error(what_arg) { 
-    // PASS 
-  } 
- 
-  ParseError::ParseError(const ParseError& error): runtime_error(error) { 
-    // PASS 
-  } 
- 
-  ParseError::~ParseError() ORC_NOEXCEPT { 
-    // PASS 
-  } 
- 
-  InvalidArgument::InvalidArgument(const std::string& what_arg 
-                                   ): runtime_error(what_arg) { 
-    // PASS 
-  } 
- 
-  InvalidArgument::InvalidArgument(const char* what_arg 
-                                   ): runtime_error(what_arg) { 
-    // PASS 
-  } 
- 
-  InvalidArgument::InvalidArgument(const InvalidArgument& error 
-                                   ): runtime_error(error) { 
-    // PASS 
-  } 
- 
-  InvalidArgument::~InvalidArgument() ORC_NOEXCEPT { 
-    // PASS 
-  } 
-} 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Exceptions.hh"
+
+namespace orc {
+
+  NotImplementedYet::NotImplementedYet(const std::string& what_arg
+                                       ) : logic_error(what_arg) {
+    // PASS
+  }
+
+  NotImplementedYet::NotImplementedYet(const char* what_arg
+                                       ) :logic_error(what_arg) {
+    // PASS
+  }
+
+  NotImplementedYet::NotImplementedYet(const NotImplementedYet& error
+                                       ): logic_error(error) {
+    // PASS
+  }
+
+  NotImplementedYet::~NotImplementedYet() ORC_NOEXCEPT {
+    // PASS
+  }
+
+  ParseError::ParseError(const std::string& what_arg
+                         ): runtime_error(what_arg) {
+    // PASS
+  }
+
+  ParseError::ParseError(const char* what_arg
+                         ): runtime_error(what_arg) {
+    // PASS
+  }
+
+  ParseError::ParseError(const ParseError& error): runtime_error(error) {
+    // PASS
+  }
+
+  ParseError::~ParseError() ORC_NOEXCEPT {
+    // PASS
+  }
+
+  InvalidArgument::InvalidArgument(const std::string& what_arg
+                                   ): runtime_error(what_arg) {
+    // PASS
+  }
+
+  InvalidArgument::InvalidArgument(const char* what_arg
+                                   ): runtime_error(what_arg) {
+    // PASS
+  }
+
+  InvalidArgument::InvalidArgument(const InvalidArgument& error
+                                   ): runtime_error(error) {
+    // PASS
+  }
+
+  InvalidArgument::~InvalidArgument() ORC_NOEXCEPT {
+    // PASS
+  }
+}
diff --git a/contrib/libs/apache/orc/c++/src/Int128.cc b/contrib/libs/apache/orc/c++/src/Int128.cc
index 96266e855c..433e6fa193 100644
--- a/contrib/libs/apache/orc/c++/src/Int128.cc
+++ b/contrib/libs/apache/orc/c++/src/Int128.cc
@@ -1,494 +1,494 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/Int128.hh" 
-#include "Adaptor.hh" 
- 
-#include <algorithm> 
-#include <iomanip> 
-#include <iostream> 
-#include <sstream> 
- 
-namespace orc { 
- 
-  Int128 Int128::maximumValue() { 
-    return Int128(0x7fffffffffffffff, 0xfffffffffffffff); 
-  } 
- 
-  Int128 Int128::minimumValue() { 
-    return Int128(static_cast<int64_t>(0x8000000000000000), 0x0); 
-  } 
- 
-  Int128::Int128(const std::string& str) { 
-    lowbits = 0; 
-    highbits = 0; 
-    size_t length = str.length(); 
-    if (length > 0) { 
-      bool isNegative = str[0] == '-'; 
-      size_t posn = isNegative ? 1 : 0; 
-      while (posn < length) { 
-        size_t group = std::min(static_cast<size_t>(18), length - posn); 
-        int64_t chunk = std::stoll(str.substr(posn, group)); 
-        int64_t multiple = 1; 
-        for(size_t i=0; i < group; ++i) { 
-          multiple *= 10; 
-        } 
-        *this *= multiple; 
-        *this += chunk; 
-        posn += group; 
-      } 
-      if (isNegative) { 
-        negate(); 
-      } 
-    } 
-  } 
- 
-  Int128& Int128::operator*=(const Int128 &right) { 
-    const uint64_t INT_MASK = 0xffffffff; 
-    const uint64_t CARRY_BIT = INT_MASK + 1; 
- 
-    // Break the left and right numbers into 32 bit chunks 
-    // so that we can multiply them without overflow. 
-    uint64_t L0 = static_cast<uint64_t>(highbits) >> 32; 
-    uint64_t L1 = static_cast<uint64_t>(highbits) & INT_MASK; 
-    uint64_t L2 = lowbits >> 32; 
-    uint64_t L3 = lowbits & INT_MASK; 
-    uint64_t R0 = static_cast<uint64_t>(right.highbits) >> 32; 
-    uint64_t R1 = static_cast<uint64_t>(right.highbits) & INT_MASK; 
-    uint64_t R2 = right.lowbits >> 32; 
-    uint64_t R3 = right.lowbits & INT_MASK; 
- 
-    uint64_t product = L3 * R3; 
-    lowbits = product & INT_MASK; 
-    uint64_t sum = product >> 32; 
-    product = L2 * R3; 
-    sum += product; 
-    highbits = sum < product ? CARRY_BIT : 0; 
-    product = L3 * R2; 
-    sum += product; 
-    if (sum < product) { 
-      highbits += CARRY_BIT; 
-    } 
-    lowbits += sum << 32; 
-    highbits += static_cast<int64_t>(sum >> 32); 
-    highbits += L1 * R3 + L2 * R2 + L3 * R1; 
-    highbits += (L0 * R3 + L1 * R2 + L2 * R1 + L3 * R0) << 32; 
-    return *this; 
-  } 
- 
-  /** 
-   * Expands the given value into an array of ints so that we can work on 
-   * it. The array will be converted to an absolute value and the wasNegative 
-   * flag will be set appropriately. The array will remove leading zeros from 
-   * the value. 
-   * @param array an array of length 4 to set with the value 
-   * @param wasNegative a flag for whether the value was original negative 
-   * @result the output length of the array 
-   */ 
-  int64_t Int128::fillInArray(uint32_t* array, bool &wasNegative) const { 
-    uint64_t high; 
-    uint64_t low; 
-    if (highbits < 0) { 
-      low = ~lowbits + 1; 
-      high = static_cast<uint64_t>(~highbits); 
-      if (low == 0) { 
-        high += 1; 
-      } 
-      wasNegative = true; 
-    } else { 
-      low = lowbits; 
-      high = static_cast<uint64_t>(highbits); 
-      wasNegative = false; 
-    } 
-    if (high != 0) { 
-      if (high > UINT32_MAX) { 
-        array[0] = static_cast<uint32_t>(high >> 32); 
-        array[1] = static_cast<uint32_t>(high); 
-        array[2] = static_cast<uint32_t>(low >> 32); 
-        array[3] = static_cast<uint32_t>(low); 
-        return 4; 
-      } else { 
-        array[0] = static_cast<uint32_t>(high); 
-        array[1] = static_cast<uint32_t>(low >> 32); 
-        array[2] = static_cast<uint32_t>(low); 
-        return 3; 
-      } 
-    } else if (low >= UINT32_MAX) { 
-      array[0] = static_cast<uint32_t>(low >> 32); 
-      array[1] = static_cast<uint32_t>(low); 
-      return 2; 
-    } else if (low == 0) { 
-      return 0; 
-    } else { 
-      array[0] = static_cast<uint32_t>(low); 
-      return 1; 
-    } 
-  } 
- 
- 
-  /** 
-   * Find last set bit in a 32 bit integer. Bit 1 is the LSB and bit 32 is 
-   * the MSB. We can replace this with bsrq asm instruction on x64. 
-   */ 
-  int64_t fls(uint32_t x) { 
-    int64_t bitpos = 0; 
-    while (x) { 
-      x >>= 1; 
-      bitpos += 1; 
-    } 
-    return bitpos; 
-  } 
- 
-  /** 
-   * Shift the number in the array left by bits positions. 
-   * @param array the number to shift, must have length elements 
-   * @param length the number of entries in the array 
-   * @param bits the number of bits to shift (0 <= bits < 32) 
-   */ 
-  void shiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) { 
-    if (length > 0 && bits != 0) { 
-      for(int64_t i=0; i < length-1; ++i) { 
-        array[i] = (array[i] << bits) | (array[i+1] >> (32 - bits)); 
-      } 
-      array[length-1] <<= bits; 
-    } 
-  } 
- 
-  /** 
-   * Shift the number in the array right by bits positions. 
-   * @param array the number to shift, must have length elements 
-   * @param length the number of entries in the array 
-   * @param bits the number of bits to shift (0 <= bits < 32) 
-   */ 
-  void shiftArrayRight(uint32_t* array, int64_t length, int64_t bits) { 
-    if (length > 0 && bits != 0) { 
-      for(int64_t i=length-1; i > 0; --i) { 
-        array[i] = (array[i] >> bits) | (array[i-1] << (32 - bits)); 
-      } 
-      array[0] >>= bits; 
-    } 
-  } 
- 
-  /** 
-   * Fix the signs of the result and remainder at the end of the division 
-   * based on the signs of the dividend and divisor. 
-   */ 
-  void fixDivisionSigns(Int128 &result, Int128 &remainder, 
-                        bool dividendWasNegative, bool divisorWasNegative) { 
-    if (dividendWasNegative != divisorWasNegative) { 
-      result.negate(); 
-    } 
-    if (dividendWasNegative) { 
-      remainder.negate(); 
-    } 
-  } 
- 
-  /** 
-   * Build a Int128 from a list of ints. 
-   */ 
-  void buildFromArray(Int128& value, uint32_t* array, int64_t length) { 
-    switch (length) { 
-    case 0: 
-      value = 0; 
-      break; 
-    case 1: 
-      value = array[0]; 
-      break; 
-    case 2: 
-      value = Int128(0, (static_cast<uint64_t>(array[0]) << 32) + array[1]); 
-      break; 
-    case 3: 
-      value = Int128(array[0], 
-                     (static_cast<uint64_t>(array[1]) << 32) + array[2]); 
-      break; 
-    case 4: 
-      value = Int128((static_cast<int64_t>(array[0]) << 32) + array[1], 
-                     (static_cast<uint64_t>(array[2]) << 32) + array[3]); 
-      break; 
-    case 5: 
-      if (array[0] != 0) { 
-        throw std::logic_error("Can't build Int128 with 5 ints."); 
-      } 
-      value = Int128((static_cast<int64_t>(array[1]) << 32) + array[2], 
-                     (static_cast<uint64_t>(array[3]) << 32) + array[4]); 
-      break; 
-    default: 
-      throw std::logic_error("Unsupported length for building Int128"); 
-    } 
-  } 
- 
-  /** 
-   * Do a division where the divisor fits into a single 32 bit value. 
-   */ 
-  Int128 singleDivide(uint32_t* dividend, int64_t dividendLength, 
-                      uint32_t divisor, Int128& remainder, 
-                      bool dividendWasNegative, bool divisorWasNegative) { 
-    uint64_t r = 0; 
-    uint32_t resultArray[5]; 
-    for(int64_t j=0; j < dividendLength; j++) { 
-      r <<= 32; 
-      r += dividend[j]; 
-      resultArray[j] = static_cast<uint32_t>(r / divisor); 
-      r %= divisor; 
-    } 
-    Int128 result; 
-    buildFromArray(result, resultArray, dividendLength); 
-    remainder = static_cast<int64_t>(r); 
-    fixDivisionSigns(result, remainder, dividendWasNegative, 
-                     divisorWasNegative); 
-    return result; 
-  } 
- 
-  Int128 Int128::divide(const Int128 &divisor, Int128 &remainder) const { 
-    // Split the dividend and divisor into integer pieces so that we can 
-    // work on them. 
-    uint32_t dividendArray[5]; 
-    uint32_t divisorArray[4]; 
-    bool dividendWasNegative; 
-    bool divisorWasNegative; 
-    // leave an extra zero before the dividend 
-    dividendArray[0] = 0; 
-    int64_t dividendLength = fillInArray(dividendArray + 1, dividendWasNegative)+1; 
-    int64_t divisorLength = divisor.fillInArray(divisorArray, divisorWasNegative); 
- 
-    // Handle some of the easy cases. 
-    if (dividendLength <= divisorLength) { 
-      remainder = *this; 
-      return 0; 
-    } else if (divisorLength == 0) { 
-      throw std::range_error("Division by 0 in Int128"); 
-    } else if (divisorLength == 1) { 
-      return singleDivide(dividendArray, dividendLength, divisorArray[0], 
-                          remainder, dividendWasNegative, divisorWasNegative); 
-    } 
- 
-    int64_t resultLength = dividendLength - divisorLength; 
-    uint32_t resultArray[4]; 
- 
-    // Normalize by shifting both by a multiple of 2 so that 
-    // the digit guessing is better. The requirement is that 
-    // divisorArray[0] is greater than 2**31. 
-    int64_t normalizeBits = 32 - fls(divisorArray[0]); 
-    shiftArrayLeft(divisorArray, divisorLength, normalizeBits); 
-    shiftArrayLeft(dividendArray, dividendLength, normalizeBits); 
- 
-    // compute each digit in the result 
-    for(int64_t j=0; j < resultLength; ++j) { 
-      // Guess the next digit. At worst it is two too large 
-      uint32_t guess = UINT32_MAX; 
-      uint64_t highDividend = static_cast<uint64_t>(dividendArray[j]) << 32 | 
-        dividendArray[j+1]; 
-      if (dividendArray[j] != divisorArray[0]) { 
-        guess = static_cast<uint32_t>(highDividend / divisorArray[0]); 
-      } 
- 
-      // catch all of the cases where guess is two too large and most of the 
-      // cases where it is one too large 
-      uint32_t rhat = 
-        static_cast<uint32_t>(highDividend - guess * 
-                              static_cast<uint64_t>(divisorArray[0])); 
-      while (static_cast<uint64_t>(divisorArray[1]) * guess > 
-             (static_cast<uint64_t>(rhat) << 32) + dividendArray[j+2]) { 
-        guess -= 1; 
-        rhat += divisorArray[0]; 
-        if (static_cast<uint64_t>(rhat) < divisorArray[0]) { 
-          break; 
-        } 
-      } 
- 
-      // subtract off the guess * divisor from the dividend 
-      uint64_t mult = 0; 
-      for(int64_t i=divisorLength-1; i >= 0; --i) { 
-        mult += static_cast<uint64_t>(guess) * divisorArray[i]; 
-        uint32_t prev = dividendArray[j+i+1]; 
-        dividendArray[j+i+1] -= static_cast<uint32_t>(mult); 
-        mult >>= 32; 
-        if (dividendArray[j+i+1] > prev) { 
-          mult += 1; 
-        } 
-      } 
-      uint32_t prev = dividendArray[j]; 
-      dividendArray[j] -= static_cast<uint32_t>(mult); 
- 
-      // if guess was too big, we add back divisor 
-      if (dividendArray[j] > prev) { 
-        guess -= 1; 
-        uint32_t carry = 0; 
-        for(int64_t i=divisorLength-1; i >= 0; --i) { 
-          uint64_t sum = static_cast<uint64_t>(divisorArray[i]) + 
-            dividendArray[j+i+1] + carry; 
-          dividendArray[j+i+1] = static_cast<uint32_t>(sum); 
-          carry = static_cast<uint32_t>(sum >> 32); 
-        } 
-        dividendArray[j] += carry; 
-      } 
- 
-      resultArray[j] = guess; 
-    } 
- 
-    // denormalize the remainder 
-    shiftArrayRight(dividendArray, dividendLength, normalizeBits); 
- 
-    // return result and remainder 
-    Int128 result; 
-    buildFromArray(result, resultArray, resultLength); 
-    buildFromArray(remainder, dividendArray, dividendLength); 
-    fixDivisionSigns(result, remainder, 
-                     dividendWasNegative, divisorWasNegative); 
-    return result; 
-  } 
- 
-  std::string Int128::toString() const { 
-    // 10**18 - the largest power of 10 less than 63 bits 
-    const Int128 tenTo18(0xde0b6b3a7640000); 
-    // 10**36 
-    const Int128 tenTo36(0xc097ce7bc90715, 0xb34b9f1000000000); 
-    Int128 remainder; 
-    std::stringstream buf; 
-    bool needFill = false; 
- 
-    // get anything above 10**36 and print it 
-    Int128 top = divide(tenTo36, remainder); 
-    if (top != 0) { 
-      buf << top.toLong(); 
-      remainder.abs(); 
-      needFill = true; 
-    } 
- 
-    // now get anything above 10**18 and print it 
-    Int128 tail; 
-    top = remainder.divide(tenTo18, tail); 
-    if (needFill || top != 0) { 
-      if (needFill) { 
-        buf << std::setw(18) << std::setfill('0'); 
-      } else { 
-        needFill = true; 
-        tail.abs(); 
-      } 
-      buf << top.toLong(); 
-    } 
- 
-    // finally print the tail, which is less than 10**18 
-    if (needFill) { 
-      buf << std::setw(18) << std::setfill('0'); 
-    } 
-    buf << tail.toLong(); 
-    return buf.str(); 
-  } 
- 
-  std::string Int128::toDecimalString(int32_t scale) const { 
-    std::string str = toString(); 
-    if (scale == 0) { 
-      return str; 
-    } else if (*this < 0) { 
-      int32_t len = static_cast<int32_t>(str.length()); 
-      if (len - 1 > scale) { 
-        return str.substr(0, static_cast<size_t>(len - scale)) + "." + 
-          str.substr(static_cast<size_t>(len - scale), 
-                     static_cast<size_t>(scale)); 
-      } else if (len - 1 == scale) { 
-        return "-0." + str.substr(1, std::string::npos); 
-      } else { 
-        std::string result = "-0."; 
-        for(int32_t i=0; i < scale - len + 1; ++i) { 
-          result += "0"; 
-        } 
-        return result + str.substr(1, std::string::npos); 
-      } 
-    } else { 
-      int32_t len = static_cast<int32_t>(str.length()); 
-      if (len > scale) { 
-        return str.substr(0, static_cast<size_t>(len - scale)) + "." + 
-          str.substr(static_cast<size_t>(len - scale), 
-                     static_cast<size_t>(scale)); 
-      } else if (len == scale) { 
-        return "0." + str; 
-      } else { 
-        std::string result = "0."; 
-        for(int32_t i=0; i < scale - len; ++i) { 
-          result += "0"; 
-        } 
-        return result + str; 
-      } 
-    } 
-  } 
- 
-  std::string Int128::toHexString() const { 
-    std::stringstream buf; 
-    buf << std::hex << "0x" 
-        << std::setw(16) << std::setfill('0') << highbits 
-        << std::setw(16) << std::setfill('0') << lowbits; 
-    return buf.str(); 
-  } 
- 
-  const static int32_t MAX_PRECISION_64 = 18; 
-  const static int64_t POWERS_OF_TEN[MAX_PRECISION_64 + 1] = 
-    {1, 
-     10, 
-     100, 
-     1000, 
-     10000, 
-     100000, 
-     1000000, 
-     10000000, 
-     100000000, 
-     1000000000, 
-     10000000000, 
-     100000000000, 
-     1000000000000, 
-     10000000000000, 
-     100000000000000, 
-     1000000000000000, 
-     10000000000000000, 
-     100000000000000000, 
-     1000000000000000000}; 
- 
-  Int128 scaleUpInt128ByPowerOfTen(Int128 value, 
-                                   int32_t power, 
-                                   bool &overflow) { 
-    overflow = false; 
-    Int128 remainder; 
- 
-    while (power > 0) { 
-      int32_t step = std::min(power, MAX_PRECISION_64); 
-      if (value > 0 && Int128::maximumValue().divide(POWERS_OF_TEN[step], remainder) < value) { 
-        overflow = true; 
-        return Int128::maximumValue(); 
-      } else if (value < 0 && Int128::minimumValue().divide(POWERS_OF_TEN[step], remainder) > value) { 
-        overflow = true; 
-        return Int128::minimumValue(); 
-      } 
- 
-      value *= POWERS_OF_TEN[step]; 
-      power -= step; 
-    } 
- 
-    return value; 
-  } 
- 
-  Int128 scaleDownInt128ByPowerOfTen(Int128 value, int32_t power) { 
-    Int128 remainder; 
-    while (power > 0) { 
-      int32_t step = std::min(std::abs(power), MAX_PRECISION_64); 
-      value = value.divide(POWERS_OF_TEN[step], remainder); 
-      power -= step; 
-    } 
-    return value; 
-  } 
- 
-} 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Int128.hh"
+#include "Adaptor.hh"
+
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+
+namespace orc {
+
+  Int128 Int128::maximumValue() {
+    return Int128(0x7fffffffffffffff, 0xfffffffffffffff);
+  }
+
+  Int128 Int128::minimumValue() {
+    return Int128(static_cast<int64_t>(0x8000000000000000), 0x0);
+  }
+
+  Int128::Int128(const std::string& str) {
+    lowbits = 0;
+    highbits = 0;
+    size_t length = str.length();
+    if (length > 0) {
+      bool isNegative = str[0] == '-';
+      size_t posn = isNegative ? 1 : 0;
+      while (posn < length) {
+        size_t group = std::min(static_cast<size_t>(18), length - posn);
+        int64_t chunk = std::stoll(str.substr(posn, group));
+        int64_t multiple = 1;
+        for(size_t i=0; i < group; ++i) {
+          multiple *= 10;
+        }
+        *this *= multiple;
+        *this += chunk;
+        posn += group;
+      }
+      if (isNegative) {
+        negate();
+      }
+    }
+  }
+
+  Int128& Int128::operator*=(const Int128 &right) {
+    const uint64_t INT_MASK = 0xffffffff;
+    const uint64_t CARRY_BIT = INT_MASK + 1;
+
+    // Break the left and right numbers into 32 bit chunks
+    // so that we can multiply them without overflow.
+    uint64_t L0 = static_cast<uint64_t>(highbits) >> 32;
+    uint64_t L1 = static_cast<uint64_t>(highbits) & INT_MASK;
+    uint64_t L2 = lowbits >> 32;
+    uint64_t L3 = lowbits & INT_MASK;
+    uint64_t R0 = static_cast<uint64_t>(right.highbits) >> 32;
+    uint64_t R1 = static_cast<uint64_t>(right.highbits) & INT_MASK;
+    uint64_t R2 = right.lowbits >> 32;
+    uint64_t R3 = right.lowbits & INT_MASK;
+
+    uint64_t product = L3 * R3;
+    lowbits = product & INT_MASK;
+    uint64_t sum = product >> 32;
+    product = L2 * R3;
+    sum += product;
+    highbits = sum < product ? CARRY_BIT : 0;
+    product = L3 * R2;
+    sum += product;
+    if (sum < product) {
+      highbits += CARRY_BIT;
+    }
+    lowbits += sum << 32;
+    highbits += static_cast<int64_t>(sum >> 32);
+    highbits += L1 * R3 + L2 * R2 + L3 * R1;
+    highbits += (L0 * R3 + L1 * R2 + L2 * R1 + L3 * R0) << 32;
+    return *this;
+  }
+
+  /**
+   * Expands the given value into an array of ints so that we can work on
+   * it. The array will be converted to an absolute value and the wasNegative
+   * flag will be set appropriately. The array will remove leading zeros from
+   * the value.
+   * @param array an array of length 4 to set with the value
+   * @param wasNegative a flag for whether the value was original negative
+   * @result the output length of the array
+   */
+  int64_t Int128::fillInArray(uint32_t* array, bool &wasNegative) const {
+    uint64_t high;
+    uint64_t low;
+    if (highbits < 0) {
+      low = ~lowbits + 1;
+      high = static_cast<uint64_t>(~highbits);
+      if (low == 0) {
+        high += 1;
+      }
+      wasNegative = true;
+    } else {
+      low = lowbits;
+      high = static_cast<uint64_t>(highbits);
+      wasNegative = false;
+    }
+    if (high != 0) {
+      if (high > UINT32_MAX) {
+        array[0] = static_cast<uint32_t>(high >> 32);
+        array[1] = static_cast<uint32_t>(high);
+        array[2] = static_cast<uint32_t>(low >> 32);
+        array[3] = static_cast<uint32_t>(low);
+        return 4;
+      } else {
+        array[0] = static_cast<uint32_t>(high);
+        array[1] = static_cast<uint32_t>(low >> 32);
+        array[2] = static_cast<uint32_t>(low);
+        return 3;
+      }
+    } else if (low >= UINT32_MAX) {
+      array[0] = static_cast<uint32_t>(low >> 32);
+      array[1] = static_cast<uint32_t>(low);
+      return 2;
+    } else if (low == 0) {
+      return 0;
+    } else {
+      array[0] = static_cast<uint32_t>(low);
+      return 1;
+    }
+  }
+
+
+  /**
+   * Find last set bit in a 32 bit integer. Bit 1 is the LSB and bit 32 is
+   * the MSB. We can replace this with bsrq asm instruction on x64.
+   */
+  int64_t fls(uint32_t x) {
+    int64_t bitpos = 0;
+    while (x) {
+      x >>= 1;
+      bitpos += 1;
+    }
+    return bitpos;
+  }
+
+  /**
+   * Shift the number in the array left by bits positions.
+   * @param array the number to shift, must have length elements
+   * @param length the number of entries in the array
+   * @param bits the number of bits to shift (0 <= bits < 32)
+   */
+  void shiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) {
+    if (length > 0 && bits != 0) {
+      for(int64_t i=0; i < length-1; ++i) {
+        array[i] = (array[i] << bits) | (array[i+1] >> (32 - bits));
+      }
+      array[length-1] <<= bits;
+    }
+  }
+
+  /**
+   * Shift the number in the array right by bits positions.
+   * @param array the number to shift, must have length elements
+   * @param length the number of entries in the array
+   * @param bits the number of bits to shift (0 <= bits < 32)
+   */
+  void shiftArrayRight(uint32_t* array, int64_t length, int64_t bits) {
+    if (length > 0 && bits != 0) {
+      for(int64_t i=length-1; i > 0; --i) {
+        array[i] = (array[i] >> bits) | (array[i-1] << (32 - bits));
+      }
+      array[0] >>= bits;
+    }
+  }
+
+  /**
+   * Fix the signs of the result and remainder at the end of the division
+   * based on the signs of the dividend and divisor.
+   */
+  void fixDivisionSigns(Int128 &result, Int128 &remainder,
+                        bool dividendWasNegative, bool divisorWasNegative) {
+    if (dividendWasNegative != divisorWasNegative) {
+      result.negate();
+    }
+    if (dividendWasNegative) {
+      remainder.negate();
+    }
+  }
+
+  /**
+   * Build a Int128 from a list of ints.
+   */
+  void buildFromArray(Int128& value, uint32_t* array, int64_t length) {
+    switch (length) {
+    case 0:
+      value = 0;
+      break;
+    case 1:
+      value = array[0];
+      break;
+    case 2:
+      value = Int128(0, (static_cast<uint64_t>(array[0]) << 32) + array[1]);
+      break;
+    case 3:
+      value = Int128(array[0],
+                     (static_cast<uint64_t>(array[1]) << 32) + array[2]);
+      break;
+    case 4:
+      value = Int128((static_cast<int64_t>(array[0]) << 32) + array[1],
+                     (static_cast<uint64_t>(array[2]) << 32) + array[3]);
+      break;
+    case 5:
+      if (array[0] != 0) {
+        throw std::logic_error("Can't build Int128 with 5 ints.");
+      }
+      value = Int128((static_cast<int64_t>(array[1]) << 32) + array[2],
+                     (static_cast<uint64_t>(array[3]) << 32) + array[4]);
+      break;
+    default:
+      throw std::logic_error("Unsupported length for building Int128");
+    }
+  }
+
+  /**
+   * Do a division where the divisor fits into a single 32 bit value.
+   */
+  Int128 singleDivide(uint32_t* dividend, int64_t dividendLength,
+                      uint32_t divisor, Int128& remainder,
+                      bool dividendWasNegative, bool divisorWasNegative) {
+    uint64_t r = 0;
+    uint32_t resultArray[5];
+    for(int64_t j=0; j < dividendLength; j++) {
+      r <<= 32;
+      r += dividend[j];
+      resultArray[j] = static_cast<uint32_t>(r / divisor);
+      r %= divisor;
+    }
+    Int128 result;
+    buildFromArray(result, resultArray, dividendLength);
+    remainder = static_cast<int64_t>(r);
+    fixDivisionSigns(result, remainder, dividendWasNegative,
+                     divisorWasNegative);
+    return result;
+  }
+
+  Int128 Int128::divide(const Int128 &divisor, Int128 &remainder) const {
+    // Split the dividend and divisor into integer pieces so that we can
+    // work on them.
+    uint32_t dividendArray[5];
+    uint32_t divisorArray[4];
+    bool dividendWasNegative;
+    bool divisorWasNegative;
+    // leave an extra zero before the dividend
+    dividendArray[0] = 0;
+    int64_t dividendLength = fillInArray(dividendArray + 1, dividendWasNegative)+1;
+    int64_t divisorLength = divisor.fillInArray(divisorArray, divisorWasNegative);
+
+    // Handle some of the easy cases.
+    if (dividendLength <= divisorLength) {
+      remainder = *this;
+      return 0;
+    } else if (divisorLength == 0) {
+      throw std::range_error("Division by 0 in Int128");
+    } else if (divisorLength == 1) {
+      return singleDivide(dividendArray, dividendLength, divisorArray[0],
+                          remainder, dividendWasNegative, divisorWasNegative);
+    }
+
+    int64_t resultLength = dividendLength - divisorLength;
+    uint32_t resultArray[4];
+
+    // Normalize by shifting both by a multiple of 2 so that
+    // the digit guessing is better. The requirement is that
+    // divisorArray[0] is greater than 2**31.
+    int64_t normalizeBits = 32 - fls(divisorArray[0]);
+    shiftArrayLeft(divisorArray, divisorLength, normalizeBits);
+    shiftArrayLeft(dividendArray, dividendLength, normalizeBits);
+
+    // compute each digit in the result
+    for(int64_t j=0; j < resultLength; ++j) {
+      // Guess the next digit. At worst it is two too large
+      uint32_t guess = UINT32_MAX;
+      uint64_t highDividend = static_cast<uint64_t>(dividendArray[j]) << 32 |
+        dividendArray[j+1];
+      if (dividendArray[j] != divisorArray[0]) {
+        guess = static_cast<uint32_t>(highDividend / divisorArray[0]);
+      }
+
+      // catch all of the cases where guess is two too large and most of the
+      // cases where it is one too large
+      uint32_t rhat =
+        static_cast<uint32_t>(highDividend - guess *
+                              static_cast<uint64_t>(divisorArray[0]));
+      while (static_cast<uint64_t>(divisorArray[1]) * guess >
+             (static_cast<uint64_t>(rhat) << 32) + dividendArray[j+2]) {
+        guess -= 1;
+        rhat += divisorArray[0];
+        if (static_cast<uint64_t>(rhat) < divisorArray[0]) {
+          break;
+        }
+      }
+
+      // subtract off the guess * divisor from the dividend
+      uint64_t mult = 0;
+      for(int64_t i=divisorLength-1; i >= 0; --i) {
+        mult += static_cast<uint64_t>(guess) * divisorArray[i];
+        uint32_t prev = dividendArray[j+i+1];
+        dividendArray[j+i+1] -= static_cast<uint32_t>(mult);
+        mult >>= 32;
+        if (dividendArray[j+i+1] > prev) {
+          mult += 1;
+        }
+      }
+      uint32_t prev = dividendArray[j];
+      dividendArray[j] -= static_cast<uint32_t>(mult);
+
+      // if guess was too big, we add back divisor
+      if (dividendArray[j] > prev) {
+        guess -= 1;
+        uint32_t carry = 0;
+        for(int64_t i=divisorLength-1; i >= 0; --i) {
+          uint64_t sum = static_cast<uint64_t>(divisorArray[i]) +
+            dividendArray[j+i+1] + carry;
+          dividendArray[j+i+1] = static_cast<uint32_t>(sum);
+          carry = static_cast<uint32_t>(sum >> 32);
+        }
+        dividendArray[j] += carry;
+      }
+
+      resultArray[j] = guess;
+    }
+
+    // denormalize the remainder
+    shiftArrayRight(dividendArray, dividendLength, normalizeBits);
+
+    // return result and remainder
+    Int128 result;
+    buildFromArray(result, resultArray, resultLength);
+    buildFromArray(remainder, dividendArray, dividendLength);
+    fixDivisionSigns(result, remainder,
+                     dividendWasNegative, divisorWasNegative);
+    return result;
+  }
+
+  std::string Int128::toString() const {
+    // 10**18 - the largest power of 10 less than 63 bits
+    const Int128 tenTo18(0xde0b6b3a7640000);
+    // 10**36
+    const Int128 tenTo36(0xc097ce7bc90715, 0xb34b9f1000000000);
+    Int128 remainder;
+    std::stringstream buf;
+    bool needFill = false;
+
+    // get anything above 10**36 and print it
+    Int128 top = divide(tenTo36, remainder);
+    if (top != 0) {
+      buf << top.toLong();
+      remainder.abs();
+      needFill = true;
+    }
+
+    // now get anything above 10**18 and print it
+    Int128 tail;
+    top = remainder.divide(tenTo18, tail);
+    if (needFill || top != 0) {
+      if (needFill) {
+        buf << std::setw(18) << std::setfill('0');
+      } else {
+        needFill = true;
+        tail.abs();
+      }
+      buf << top.toLong();
+    }
+
+    // finally print the tail, which is less than 10**18
+    if (needFill) {
+      buf << std::setw(18) << std::setfill('0');
+    }
+    buf << tail.toLong();
+    return buf.str();
+  }
+
+  std::string Int128::toDecimalString(int32_t scale) const {
+    std::string str = toString();
+    if (scale == 0) {
+      return str;
+    } else if (*this < 0) {
+      int32_t len = static_cast<int32_t>(str.length());
+      if (len - 1 > scale) {
+        return str.substr(0, static_cast<size_t>(len - scale)) + "." +
+          str.substr(static_cast<size_t>(len - scale),
+                     static_cast<size_t>(scale));
+      } else if (len - 1 == scale) {
+        return "-0." + str.substr(1, std::string::npos);
+      } else {
+        std::string result = "-0.";
+        for(int32_t i=0; i < scale - len + 1; ++i) {
+          result += "0";
+        }
+        return result + str.substr(1, std::string::npos);
+      }
+    } else {
+      int32_t len = static_cast<int32_t>(str.length());
+      if (len > scale) {
+        return str.substr(0, static_cast<size_t>(len - scale)) + "." +
+          str.substr(static_cast<size_t>(len - scale),
+                     static_cast<size_t>(scale));
+      } else if (len == scale) {
+        return "0." + str;
+      } else {
+        std::string result = "0.";
+        for(int32_t i=0; i < scale - len; ++i) {
+          result += "0";
+        }
+        return result + str;
+      }
+    }
+  }
+
+  std::string Int128::toHexString() const {
+    std::stringstream buf;
+    buf << std::hex << "0x"
+        << std::setw(16) << std::setfill('0') << highbits
+        << std::setw(16) << std::setfill('0') << lowbits;
+    return buf.str();
+  }
+
+  const static int32_t MAX_PRECISION_64 = 18;
+  const static int64_t POWERS_OF_TEN[MAX_PRECISION_64 + 1] =
+    {1,
+     10,
+     100,
+     1000,
+     10000,
+     100000,
+     1000000,
+     10000000,
+     100000000,
+     1000000000,
+     10000000000,
+     100000000000,
+     1000000000000,
+     10000000000000,
+     100000000000000,
+     1000000000000000,
+     10000000000000000,
+     100000000000000000,
+     1000000000000000000};
+
+  Int128 scaleUpInt128ByPowerOfTen(Int128 value,
+                                   int32_t power,
+                                   bool &overflow) {
+    overflow = false;
+    Int128 remainder;
+
+    while (power > 0) {
+      int32_t step = std::min(power, MAX_PRECISION_64);
+      if (value > 0 && Int128::maximumValue().divide(POWERS_OF_TEN[step], remainder) < value) {
+        overflow = true;
+        return Int128::maximumValue();
+      } else if (value < 0 && Int128::minimumValue().divide(POWERS_OF_TEN[step], remainder) > value) {
+        overflow = true;
+        return Int128::minimumValue();
+      }
+
+      value *= POWERS_OF_TEN[step];
+      power -= step;
+    }
+
+    return value;
+  }
+
+  Int128 scaleDownInt128ByPowerOfTen(Int128 value, int32_t power) {
+    Int128 remainder;
+    while (power > 0) {
+      int32_t step = std::min(std::abs(power), MAX_PRECISION_64);
+      value = value.divide(POWERS_OF_TEN[step], remainder);
+      power -= step;
+    }
+    return value;
+  }
+
+}
diff --git a/contrib/libs/apache/orc/c++/src/LzoDecompressor.cc b/contrib/libs/apache/orc/c++/src/LzoDecompressor.cc
index 7bf91dee13..d1ba183aeb 100644
--- a/contrib/libs/apache/orc/c++/src/LzoDecompressor.cc
+++ b/contrib/libs/apache/orc/c++/src/LzoDecompressor.cc
@@ -1,391 +1,391 @@
-/* 
- * Licensed under the Apache License, Version 2.0 (the "License"); 
- * you may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "Adaptor.hh" 
-#include "Compression.hh" 
-#include "orc/Exceptions.hh" 
- 
-#include <string> 
- 
-namespace orc { 
- 
-  static const int32_t DEC_32_TABLE[] = {4, 1, 2, 1, 4, 4, 4, 4}; 
-  static const int32_t DEC_64_TABLE[] = {0, 0, 0, -1, 0, 1, 2, 3}; 
- 
-  static const int32_t SIZE_OF_SHORT = 2; 
-  static const int32_t SIZE_OF_INT   = 4; 
-  static const int32_t SIZE_OF_LONG  = 8; 
- 
-  static std::string toHex(uint64_t val) { 
-    std::ostringstream out; 
-    out << "0x" << std::hex << val; 
-    return out.str(); 
-  } 
- 
-  static std::string toString(int64_t val) { 
-    std::ostringstream out; 
-    out << val; 
-    return out.str(); 
-  } 
- 
-  class MalformedInputException: public ParseError { 
-  public: 
-    MalformedInputException(int64_t off 
-                            ) :ParseError("MalformedInputException at " + 
-                                          toString(off)) { 
-    } 
- 
-    MalformedInputException(int64_t off, const std::string& msg 
-                            ): ParseError("MalformedInputException " + msg + 
-                                          " at " + toString(off)) { 
-    } 
- 
-    MalformedInputException(const MalformedInputException& other 
-                            ): ParseError(other.what()) { 
-    } 
- 
-    virtual ~MalformedInputException() noexcept; 
-  }; 
- 
-  MalformedInputException::~MalformedInputException() noexcept { 
-    // PASS 
-  } 
- 
-  uint64_t lzoDecompress(const char *inputAddress, 
-                         const char *inputLimit, 
-                         char *outputAddress, 
-                         char *outputLimit) { 
-    // nothing compresses to nothing 
-    if (inputAddress == inputLimit) { 
-      return 0; 
-    } 
- 
-    // maximum offset in buffers to which it's safe to write long-at-a-time 
-    char * const fastOutputLimit = outputLimit - SIZE_OF_LONG; 
- 
-    // LZO can concat two blocks together so, decode until the input data is 
-    // consumed 
-    const char *input = inputAddress; 
-    char *output = outputAddress; 
-    while (input < inputLimit) { 
-      // 
-      // Note: For safety some of the code below may stop decoding early or 
-      // skip decoding, because input is not available.  This makes the code 
-      // safe, and since LZO requires an explicit "stop" command, the decoder 
-      // will still throw a exception. 
-      // 
- 
-      bool firstCommand = true; 
-      uint32_t lastLiteralLength = 0; 
-      while (true) { 
-        if (input >= inputLimit) { 
-          throw MalformedInputException(input - inputAddress); 
-        } 
-        uint32_t command = *(input++) & 0xFF; 
-        if (command == 0x11) { 
-          break; 
-        } 
- 
-        // Commands are described using a bit pattern notation: 
-        // 0: bit is not set 
-        // 1: bit is set 
-        // L: part of literal length 
-        // P: part of match offset position 
-        // M: part of match length 
-        // ?: see documentation in command decoder 
- 
-        int32_t matchLength; 
-        int32_t matchOffset; 
-        uint32_t literalLength; 
-        if ((command & 0xf0) == 0) { 
-          if (lastLiteralLength == 0) { 
-            // 0b0000_LLLL (0bLLLL_LLLL)* 
- 
-            // copy length :: fixed 
-            //   0 
-            matchOffset = 0; 
- 
-            // copy offset :: fixed 
-            //   0 
-            matchLength = 0; 
- 
-            // literal length - 3 :: variable bits :: valid range [4..] 
-            //   3 + variableLength(command bits [0..3], 4) 
-            literalLength = command & 0xf; 
-            if (literalLength == 0) { 
-              literalLength = 0xf; 
- 
-              uint32_t nextByte = 0; 
-              while (input < inputLimit && 
-                     (nextByte = *(input++) & 0xFF) == 0) { 
-                literalLength += 0xff; 
-              } 
-              literalLength += nextByte; 
-            } 
-            literalLength += 3; 
-          } else if (lastLiteralLength <= 3) { 
-            // 0b0000_PPLL 0bPPPP_PPPP 
- 
-            // copy length: fixed 
-            //   3 
-            matchLength = 3; 
- 
-            // copy offset :: 12 bits :: valid range [2048..3071] 
-            //   [0..1] from command [2..3] 
-            //   [2..9] from trailer [0..7] 
-            //   [10] unset 
-            //   [11] set 
-            if (input >= inputLimit) { 
-              throw MalformedInputException(input - inputAddress); 
-            } 
-            matchOffset = (command & 0xc) >> 2; 
-            matchOffset |= (*(input++) & 0xFF) << 2; 
-            matchOffset |= 0x800; 
- 
-            // literal length :: 2 bits :: valid range [0..3] 
-            //   [0..1] from command [0..1] 
-            literalLength = (command & 0x3); 
-          } else { 
-            // 0b0000_PPLL 0bPPPP_PPPP 
- 
-            // copy length :: fixed 
-            //   2 
-            matchLength = 2; 
- 
-            // copy offset :: 10 bits :: valid range [0..1023] 
-            //   [0..1] from command [2..3] 
-            //   [2..9] from trailer [0..7] 
-            if (input >= inputLimit) { 
-              throw MalformedInputException(input - inputAddress); 
-            } 
-            matchOffset = (command & 0xc) >> 2; 
-            matchOffset |= (*(input++) & 0xFF) << 2; 
- 
-            // literal length :: 2 bits :: valid range [0..3] 
-            //   [0..1] from command [0..1] 
-            literalLength = (command & 0x3); 
-          } 
-        } else if (firstCommand) { 
-          // first command has special handling when high nibble is set 
-          matchLength = 0; 
-          matchOffset = 0; 
-          literalLength = command - 17; 
-        } else if ((command & 0xf0) == 0x10) { 
-          // 0b0001_?MMM (0bMMMM_MMMM)* 0bPPPP_PPPP_PPPP_PPLL 
- 
-          // copy length - 2 :: variable bits :: valid range [3..] 
-          //   2 + variableLength(command bits [0..2], 3) 
-          matchLength = command & 0x7; 
-          if (matchLength == 0) { 
-            matchLength = 0x7; 
- 
-            int32_t nextByte = 0; 
-            while (input < inputLimit && 
-                   (nextByte = *(input++) & 0xFF) == 0) { 
-              matchLength += 0xff; 
-            } 
-            matchLength += nextByte; 
-          } 
-          matchLength += 2; 
- 
-          // read trailer 
-          if (input + SIZE_OF_SHORT > inputLimit) { 
-            throw MalformedInputException(input - inputAddress); 
-          } 
-          uint32_t trailer = *reinterpret_cast<const uint16_t*>(input) & 0xFFFF; 
-          input += SIZE_OF_SHORT; 
- 
-          // copy offset :: 16 bits :: valid range [32767..49151] 
-          //   [0..13] from trailer [2..15] 
-          //   [14] if command bit [3] unset 
-          //   [15] if command bit [3] set 
-          matchOffset = trailer >> 2; 
-          if ((command & 0x8) == 0) { 
-            matchOffset |= 0x4000; 
-          } else { 
-            matchOffset |= 0x8000; 
-          } 
-          matchOffset--; 
- 
-          // literal length :: 2 bits :: valid range [0..3] 
-          //   [0..1] from trailer [0..1] 
-          literalLength = trailer & 0x3; 
-        } else if ((command & 0xe0) == 0x20) { 
-          // 0b001M_MMMM (0bMMMM_MMMM)* 0bPPPP_PPPP_PPPP_PPLL 
- 
-          // copy length - 2 :: variable bits :: valid range [3..] 
-          //   2 + variableLength(command bits [0..4], 5) 
-          matchLength = command & 0x1f; 
-          if (matchLength == 0) { 
-            matchLength = 0x1f; 
- 
-            int nextByte = 0; 
-            while (input < inputLimit && 
-                   (nextByte = *(input++) & 0xFF) == 0) { 
-              matchLength += 0xff; 
-            } 
-            matchLength += nextByte; 
-          } 
-          matchLength += 2; 
- 
-          // read trailer 
-          if (input + SIZE_OF_SHORT > inputLimit) { 
-            throw MalformedInputException(input - inputAddress); 
-          } 
-          int32_t trailer = *reinterpret_cast<const int16_t*>(input) & 0xFFFF; 
-          input += SIZE_OF_SHORT; 
- 
-          // copy offset :: 14 bits :: valid range [0..16383] 
-          //  [0..13] from trailer [2..15] 
-          matchOffset = trailer >> 2; 
- 
-          // literal length :: 2 bits :: valid range [0..3] 
-          //   [0..1] from trailer [0..1] 
-          literalLength = trailer & 0x3; 
-        } else if ((command & 0xc0) != 0) { 
-          // 0bMMMP_PPLL 0bPPPP_PPPP 
- 
-          // copy length - 1 :: 3 bits :: valid range [1..8] 
-          //   [0..2] from command [5..7] 
-          //   add 1 
-          matchLength = (command & 0xe0) >> 5; 
-          matchLength += 1; 
- 
-          // copy offset :: 11 bits :: valid range [0..4095] 
-          //   [0..2] from command [2..4] 
-          //   [3..10] from trailer [0..7] 
-          if (input >= inputLimit) { 
-            throw MalformedInputException(input - inputAddress); 
-          } 
-          matchOffset = (command & 0x1c) >> 2; 
-          matchOffset |= (*(input++) & 0xFF) << 3; 
- 
-          // literal length :: 2 bits :: valid range [0..3] 
-          //   [0..1] from command [0..1] 
-          literalLength = (command & 0x3); 
-        } else { 
-          throw MalformedInputException(input - inputAddress - 1, 
-                                        "Invalid LZO command " + 
-                                        toHex(command)); 
-        } 
-        firstCommand = false; 
- 
-        // copy match 
-        if (matchLength != 0) { 
-          // lzo encodes match offset minus one 
-          matchOffset++; 
- 
-          char *matchAddress = output - matchOffset; 
-          if (matchAddress < outputAddress || 
-              output + matchLength > outputLimit) { 
-            throw MalformedInputException(input - inputAddress); 
-          } 
-          char *matchOutputLimit = output + matchLength; 
- 
-          if (output > fastOutputLimit) { 
-            // slow match copy 
-            while (output < matchOutputLimit) { 
-              *(output++) = *(matchAddress++); 
-            } 
-          } else { 
-            // copy repeated sequence 
-            if (matchOffset < SIZE_OF_LONG) { 
-              // 8 bytes apart so that we can copy long-at-a-time below 
-              int32_t increment32 = DEC_32_TABLE[matchOffset]; 
-              int32_t decrement64 = DEC_64_TABLE[matchOffset]; 
- 
-              output[0] = *matchAddress; 
-              output[1] = *(matchAddress + 1); 
-              output[2] = *(matchAddress + 2); 
-              output[3] = *(matchAddress + 3); 
-              output += SIZE_OF_INT; 
-              matchAddress += increment32; 
- 
-              *reinterpret_cast<int32_t*>(output) = 
-                *reinterpret_cast<int32_t*>(matchAddress); 
-              output += SIZE_OF_INT; 
-              matchAddress -= decrement64; 
-            } else { 
-              *reinterpret_cast<int64_t*>(output) = 
-                *reinterpret_cast<int64_t*>(matchAddress); 
-              matchAddress += SIZE_OF_LONG; 
-              output += SIZE_OF_LONG; 
-            } 
- 
-            if (matchOutputLimit >= fastOutputLimit) { 
-              if (matchOutputLimit > outputLimit) { 
-                throw MalformedInputException(input - inputAddress); 
-              } 
- 
-              while (output < fastOutputLimit) { 
-                *reinterpret_cast<int64_t*>(output) = 
-                  *reinterpret_cast<int64_t*>(matchAddress); 
-                matchAddress += SIZE_OF_LONG; 
-                output += SIZE_OF_LONG; 
-              } 
- 
-              while (output < matchOutputLimit) { 
-                *(output++) = *(matchAddress++); 
-              } 
-            } else { 
-              while (output < matchOutputLimit) { 
-                *reinterpret_cast<int64_t*>(output) = 
-                  *reinterpret_cast<int64_t*>(matchAddress); 
-                matchAddress += SIZE_OF_LONG; 
-                output += SIZE_OF_LONG; 
-              } 
-            } 
-          } 
-          output = matchOutputLimit; // correction in case we over-copied 
-        } 
- 
-        // copy literal 
-        char *literalOutputLimit = output + literalLength; 
-        if (literalOutputLimit > fastOutputLimit || 
-            input + literalLength > inputLimit - SIZE_OF_LONG) { 
-          if (literalOutputLimit > outputLimit) { 
-            throw MalformedInputException(input - inputAddress); 
-          } 
- 
-          // slow, precise copy 
-          memcpy(output, input, literalLength); 
-          input += literalLength; 
-          output += literalLength; 
-        } else { 
-          // fast copy. We may over-copy but there's enough room in input 
-          // and output to not overrun them 
-          do { 
-            *reinterpret_cast<int64_t*>(output) = 
-              *reinterpret_cast<const int64_t*>(input); 
-            input += SIZE_OF_LONG; 
-            output += SIZE_OF_LONG; 
-          } while (output < literalOutputLimit); 
-          // adjust index if we over-copied 
-          input -= (output - literalOutputLimit); 
-          output = literalOutputLimit; 
-        } 
-        lastLiteralLength = literalLength; 
-      } 
- 
-      if (input + SIZE_OF_SHORT > inputLimit && 
-          *reinterpret_cast<const int16_t*>(input) != 0) { 
-        throw MalformedInputException(input - inputAddress); 
-      } 
-      input += SIZE_OF_SHORT; 
-    } 
- 
-    return static_cast<uint64_t>(output - outputAddress); 
-  } 
- 
-} 
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "Compression.hh"
+#include "orc/Exceptions.hh"
+
+#include <string>
+
+namespace orc {
+
+  static const int32_t DEC_32_TABLE[] = {4, 1, 2, 1, 4, 4, 4, 4};
+  static const int32_t DEC_64_TABLE[] = {0, 0, 0, -1, 0, 1, 2, 3};
+
+  static const int32_t SIZE_OF_SHORT = 2;
+  static const int32_t SIZE_OF_INT   = 4;
+  static const int32_t SIZE_OF_LONG  = 8;
+
+  static std::string toHex(uint64_t val) {
+    std::ostringstream out;
+    out << "0x" << std::hex << val;
+    return out.str();
+  }
+
+  static std::string toString(int64_t val) {
+    std::ostringstream out;
+    out << val;
+    return out.str();
+  }
+
+  class MalformedInputException: public ParseError {
+  public:
+    MalformedInputException(int64_t off
+                            ) :ParseError("MalformedInputException at " +
+                                          toString(off)) {
+    }
+
+    MalformedInputException(int64_t off, const std::string& msg
+                            ): ParseError("MalformedInputException " + msg +
+                                          " at " + toString(off)) {
+    }
+
+    MalformedInputException(const MalformedInputException& other
+                            ): ParseError(other.what()) {
+    }
+
+    virtual ~MalformedInputException() noexcept;
+  };
+
+  MalformedInputException::~MalformedInputException() noexcept {
+    // PASS
+  }
+
+  uint64_t lzoDecompress(const char *inputAddress,
+                         const char *inputLimit,
+                         char *outputAddress,
+                         char *outputLimit) {
+    // nothing compresses to nothing
+    if (inputAddress == inputLimit) {
+      return 0;
+    }
+
+    // maximum offset in buffers to which it's safe to write long-at-a-time
+    char * const fastOutputLimit = outputLimit - SIZE_OF_LONG;
+
+    // LZO can concat two blocks together so, decode until the input data is
+    // consumed
+    const char *input = inputAddress;
+    char *output = outputAddress;
+    while (input < inputLimit) {
+      //
+      // Note: For safety some of the code below may stop decoding early or
+      // skip decoding, because input is not available.  This makes the code
+      // safe, and since LZO requires an explicit "stop" command, the decoder
+      // will still throw a exception.
+      //
+
+      bool firstCommand = true;
+      uint32_t lastLiteralLength = 0;
+      while (true) {
+        if (input >= inputLimit) {
+          throw MalformedInputException(input - inputAddress);
+        }
+        uint32_t command = *(input++) & 0xFF;
+        if (command == 0x11) {
+          break;
+        }
+
+        // Commands are described using a bit pattern notation:
+        // 0: bit is not set
+        // 1: bit is set
+        // L: part of literal length
+        // P: part of match offset position
+        // M: part of match length
+        // ?: see documentation in command decoder
+
+        int32_t matchLength;
+        int32_t matchOffset;
+        uint32_t literalLength;
+        if ((command & 0xf0) == 0) {
+          if (lastLiteralLength == 0) {
+            // 0b0000_LLLL (0bLLLL_LLLL)*
+
+            // copy length :: fixed
+            //   0
+            matchOffset = 0;
+
+            // copy offset :: fixed
+            //   0
+            matchLength = 0;
+
+            // literal length - 3 :: variable bits :: valid range [4..]
+            //   3 + variableLength(command bits [0..3], 4)
+            literalLength = command & 0xf;
+            if (literalLength == 0) {
+              literalLength = 0xf;
+
+              uint32_t nextByte = 0;
+              while (input < inputLimit &&
+                     (nextByte = *(input++) & 0xFF) == 0) {
+                literalLength += 0xff;
+              }
+              literalLength += nextByte;
+            }
+            literalLength += 3;
+          } else if (lastLiteralLength <= 3) {
+            // 0b0000_PPLL 0bPPPP_PPPP
+
+            // copy length: fixed
+            //   3
+            matchLength = 3;
+
+            // copy offset :: 12 bits :: valid range [2048..3071]
+            //   [0..1] from command [2..3]
+            //   [2..9] from trailer [0..7]
+            //   [10] unset
+            //   [11] set
+            if (input >= inputLimit) {
+              throw MalformedInputException(input - inputAddress);
+            }
+            matchOffset = (command & 0xc) >> 2;
+            matchOffset |= (*(input++) & 0xFF) << 2;
+            matchOffset |= 0x800;
+
+            // literal length :: 2 bits :: valid range [0..3]
+            //   [0..1] from command [0..1]
+            literalLength = (command & 0x3);
+          } else {
+            // 0b0000_PPLL 0bPPPP_PPPP
+
+            // copy length :: fixed
+            //   2
+            matchLength = 2;
+
+            // copy offset :: 10 bits :: valid range [0..1023]
+            //   [0..1] from command [2..3]
+            //   [2..9] from trailer [0..7]
+            if (input >= inputLimit) {
+              throw MalformedInputException(input - inputAddress);
+            }
+            matchOffset = (command & 0xc) >> 2;
+            matchOffset |= (*(input++) & 0xFF) << 2;
+
+            // literal length :: 2 bits :: valid range [0..3]
+            //   [0..1] from command [0..1]
+            literalLength = (command & 0x3);
+          }
+        } else if (firstCommand) {
+          // first command has special handling when high nibble is set
+          matchLength = 0;
+          matchOffset = 0;
+          literalLength = command - 17;
+        } else if ((command & 0xf0) == 0x10) {
+          // 0b0001_?MMM (0bMMMM_MMMM)* 0bPPPP_PPPP_PPPP_PPLL
+
+          // copy length - 2 :: variable bits :: valid range [3..]
+          //   2 + variableLength(command bits [0..2], 3)
+          matchLength = command & 0x7;
+          if (matchLength == 0) {
+            matchLength = 0x7;
+
+            int32_t nextByte = 0;
+            while (input < inputLimit &&
+                   (nextByte = *(input++) & 0xFF) == 0) {
+              matchLength += 0xff;
+            }
+            matchLength += nextByte;
+          }
+          matchLength += 2;
+
+          // read trailer
+          if (input + SIZE_OF_SHORT > inputLimit) {
+            throw MalformedInputException(input - inputAddress);
+          }
+          uint32_t trailer = *reinterpret_cast<const uint16_t*>(input) & 0xFFFF;
+          input += SIZE_OF_SHORT;
+
+          // copy offset :: 16 bits :: valid range [32767..49151]
+          //   [0..13] from trailer [2..15]
+          //   [14] if command bit [3] unset
+          //   [15] if command bit [3] set
+          matchOffset = trailer >> 2;
+          if ((command & 0x8) == 0) {
+            matchOffset |= 0x4000;
+          } else {
+            matchOffset |= 0x8000;
+          }
+          matchOffset--;
+
+          // literal length :: 2 bits :: valid range [0..3]
+          //   [0..1] from trailer [0..1]
+          literalLength = trailer & 0x3;
+        } else if ((command & 0xe0) == 0x20) {
+          // 0b001M_MMMM (0bMMMM_MMMM)* 0bPPPP_PPPP_PPPP_PPLL
+
+          // copy length - 2 :: variable bits :: valid range [3..]
+          //   2 + variableLength(command bits [0..4], 5)
+          matchLength = command & 0x1f;
+          if (matchLength == 0) {
+            matchLength = 0x1f;
+
+            int nextByte = 0;
+            while (input < inputLimit &&
+                   (nextByte = *(input++) & 0xFF) == 0) {
+              matchLength += 0xff;
+            }
+            matchLength += nextByte;
+          }
+          matchLength += 2;
+
+          // read trailer
+          if (input + SIZE_OF_SHORT > inputLimit) {
+            throw MalformedInputException(input - inputAddress);
+          }
+          int32_t trailer = *reinterpret_cast<const int16_t*>(input) & 0xFFFF;
+          input += SIZE_OF_SHORT;
+
+          // copy offset :: 14 bits :: valid range [0..16383]
+          //  [0..13] from trailer [2..15]
+          matchOffset = trailer >> 2;
+
+          // literal length :: 2 bits :: valid range [0..3]
+          //   [0..1] from trailer [0..1]
+          literalLength = trailer & 0x3;
+        } else if ((command & 0xc0) != 0) {
+          // 0bMMMP_PPLL 0bPPPP_PPPP
+
+          // copy length - 1 :: 3 bits :: valid range [1..8]
+          //   [0..2] from command [5..7]
+          //   add 1
+          matchLength = (command & 0xe0) >> 5;
+          matchLength += 1;
+
+          // copy offset :: 11 bits :: valid range [0..4095]
+          //   [0..2] from command [2..4]
+          //   [3..10] from trailer [0..7]
+          if (input >= inputLimit) {
+            throw MalformedInputException(input - inputAddress);
+          }
+          matchOffset = (command & 0x1c) >> 2;
+          matchOffset |= (*(input++) & 0xFF) << 3;
+
+          // literal length :: 2 bits :: valid range [0..3]
+          //   [0..1] from command [0..1]
+          literalLength = (command & 0x3);
+        } else {
+          throw MalformedInputException(input - inputAddress - 1,
+                                        "Invalid LZO command " +
+                                        toHex(command));
+        }
+        firstCommand = false;
+
+        // copy match
+        if (matchLength != 0) {
+          // lzo encodes match offset minus one
+          matchOffset++;
+
+          char *matchAddress = output - matchOffset;
+          if (matchAddress < outputAddress ||
+              output + matchLength > outputLimit) {
+            throw MalformedInputException(input - inputAddress);
+          }
+          char *matchOutputLimit = output + matchLength;
+
+          if (output > fastOutputLimit) {
+            // slow match copy
+            while (output < matchOutputLimit) {
+              *(output++) = *(matchAddress++);
+            }
+          } else {
+            // copy repeated sequence
+            if (matchOffset < SIZE_OF_LONG) {
+              // 8 bytes apart so that we can copy long-at-a-time below
+              int32_t increment32 = DEC_32_TABLE[matchOffset];
+              int32_t decrement64 = DEC_64_TABLE[matchOffset];
+
+              output[0] = *matchAddress;
+              output[1] = *(matchAddress + 1);
+              output[2] = *(matchAddress + 2);
+              output[3] = *(matchAddress + 3);
+              output += SIZE_OF_INT;
+              matchAddress += increment32;
+
+              *reinterpret_cast<int32_t*>(output) =
+                *reinterpret_cast<int32_t*>(matchAddress);
+              output += SIZE_OF_INT;
+              matchAddress -= decrement64;
+            } else {
+              *reinterpret_cast<int64_t*>(output) =
+                *reinterpret_cast<int64_t*>(matchAddress);
+              matchAddress += SIZE_OF_LONG;
+              output += SIZE_OF_LONG;
+            }
+
+            if (matchOutputLimit >= fastOutputLimit) {
+              if (matchOutputLimit > outputLimit) {
+                throw MalformedInputException(input - inputAddress);
+              }
+
+              while (output < fastOutputLimit) {
+                *reinterpret_cast<int64_t*>(output) =
+                  *reinterpret_cast<int64_t*>(matchAddress);
+                matchAddress += SIZE_OF_LONG;
+                output += SIZE_OF_LONG;
+              }
+
+              while (output < matchOutputLimit) {
+                *(output++) = *(matchAddress++);
+              }
+            } else {
+              while (output < matchOutputLimit) {
+                *reinterpret_cast<int64_t*>(output) =
+                  *reinterpret_cast<int64_t*>(matchAddress);
+                matchAddress += SIZE_OF_LONG;
+                output += SIZE_OF_LONG;
+              }
+            }
+          }
+          output = matchOutputLimit; // correction in case we over-copied
+        }
+
+        // copy literal
+        char *literalOutputLimit = output + literalLength;
+        if (literalOutputLimit > fastOutputLimit ||
+            input + literalLength > inputLimit - SIZE_OF_LONG) {
+          if (literalOutputLimit > outputLimit) {
+            throw MalformedInputException(input - inputAddress);
+          }
+
+          // slow, precise copy
+          memcpy(output, input, literalLength);
+          input += literalLength;
+          output += literalLength;
+        } else {
+          // fast copy. We may over-copy but there's enough room in input
+          // and output to not overrun them
+          do {
+            *reinterpret_cast<int64_t*>(output) =
+              *reinterpret_cast<const int64_t*>(input);
+            input += SIZE_OF_LONG;
+            output += SIZE_OF_LONG;
+          } while (output < literalOutputLimit);
+          // adjust index if we over-copied
+          input -= (output - literalOutputLimit);
+          output = literalOutputLimit;
+        }
+        lastLiteralLength = literalLength;
+      }
+
+      if (input + SIZE_OF_SHORT > inputLimit &&
+          *reinterpret_cast<const int16_t*>(input) != 0) {
+        throw MalformedInputException(input - inputAddress);
+      }
+      input += SIZE_OF_SHORT;
+    }
+
+    return static_cast<uint64_t>(output - outputAddress);
+  }
+
+}
diff --git a/contrib/libs/apache/orc/c++/src/LzoDecompressor.hh b/contrib/libs/apache/orc/c++/src/LzoDecompressor.hh
index 32d8085174..9de8537dd8 100644
--- a/contrib/libs/apache/orc/c++/src/LzoDecompressor.hh
+++ b/contrib/libs/apache/orc/c++/src/LzoDecompressor.hh
@@ -1,42 +1,42 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_LZO_HH 
-#define ORC_LZO_HH 
- 
-#include "orc/OrcFile.hh" 
- 
-#include "Adaptor.hh" 
- 
-namespace orc { 
- 
-  /** 
-   * Decompress the bytes in to the output buffer. 
-   * @param inputAddress the start of the input 
-   * @param inputLimit one past the last byte of the input 
-   * @param outputAddress the start of the output buffer 
-   * @param outputLimit one past the last byte of the output buffer 
-   * @result the number of bytes decompressed 
-   */ 
-  uint64_t lzoDecompress(const char *inputAddress, 
-                         const char *inputLimit, 
-                         char *outputAddress, 
-                         char *outputLimit); 
-} 
- 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_LZO_HH
+#define ORC_LZO_HH
+
+#include "orc/OrcFile.hh"
+
+#include "Adaptor.hh"
+
+namespace orc {
+
+  /**
+   * Decompress the bytes in to the output buffer.
+   * @param inputAddress the start of the input
+   * @param inputLimit one past the last byte of the input
+   * @param outputAddress the start of the output buffer
+   * @param outputLimit one past the last byte of the output buffer
+   * @result the number of bytes decompressed
+   */
+  uint64_t lzoDecompress(const char *inputAddress,
+                         const char *inputLimit,
+                         char *outputAddress,
+                         char *outputLimit);
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/MemoryPool.cc b/contrib/libs/apache/orc/c++/src/MemoryPool.cc
index 178e9cc316..ecfb295bae 100644
--- a/contrib/libs/apache/orc/c++/src/MemoryPool.cc
+++ b/contrib/libs/apache/orc/c++/src/MemoryPool.cc
@@ -1,244 +1,244 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/Int128.hh" 
-#include "orc/MemoryPool.hh" 
- 
-#include "Adaptor.hh" 
- 
-#include <cstdlib> 
-#include <iostream> 
-#include <string.h> 
- 
-namespace orc { 
- 
-  MemoryPool::~MemoryPool() { 
-    // PASS 
-  } 
- 
-  class MemoryPoolImpl: public MemoryPool { 
-  public: 
-    virtual ~MemoryPoolImpl() override; 
- 
-    char* malloc(uint64_t size) override; 
-    void free(char* p) override; 
-  }; 
- 
-  char* MemoryPoolImpl::malloc(uint64_t size) { 
-    return static_cast<char*>(std::malloc(size)); 
-  } 
- 
-  void MemoryPoolImpl::free(char* p) { 
-    std::free(p); 
-  } 
- 
-  MemoryPoolImpl::~MemoryPoolImpl() { 
-    // PASS 
-  } 
- 
-  template <class T> 
-  DataBuffer<T>::DataBuffer(MemoryPool& pool, 
-                            uint64_t newSize 
-                            ): memoryPool(pool), 
-                               buf(nullptr), 
-                               currentSize(0), 
-                               currentCapacity(0) { 
-    resize(newSize); 
-  } 
- 
-  template <class T> 
-  DataBuffer<T>::DataBuffer(DataBuffer<T>&& buffer 
-                      ) noexcept: 
-                      memoryPool(buffer.memoryPool), 
-                      buf(buffer.buf), 
-                      currentSize(buffer.currentSize), 
-                      currentCapacity(buffer.currentCapacity)  { 
-    buffer.buf = nullptr; 
-    buffer.currentSize = 0; 
-    buffer.currentCapacity = 0; 
-  } 
- 
-  template <class T> 
-  DataBuffer<T>::~DataBuffer(){ 
-    for(uint64_t i=currentSize; i > 0; --i) { 
-      (buf + i - 1)->~T(); 
-    } 
-    if (buf) { 
-      memoryPool.free(reinterpret_cast<char*>(buf)); 
-    } 
-  } 
- 
-  template <class T> 
-  void DataBuffer<T>::resize(uint64_t newSize) { 
-    reserve(newSize); 
-    if (currentSize > newSize) { 
-      for(uint64_t i=currentSize; i > newSize; --i) { 
-        (buf + i - 1)->~T(); 
-      } 
-    } else if (newSize > currentSize) { 
-      for(uint64_t i=currentSize; i < newSize; ++i) { 
-        new (buf + i) T(); 
-      } 
-    } 
-    currentSize = newSize; 
-  } 
- 
-  template <class T> 
-  void DataBuffer<T>::reserve(uint64_t newCapacity){ 
-    if (newCapacity > currentCapacity || !buf) { 
-      if (buf) { 
-        T* buf_old = buf; 
-        buf = reinterpret_cast<T*>(memoryPool.malloc(sizeof(T) * newCapacity)); 
-        memcpy(buf, buf_old, sizeof(T) * currentSize); 
-        memoryPool.free(reinterpret_cast<char*>(buf_old)); 
-      } else { 
-        buf = reinterpret_cast<T*>(memoryPool.malloc(sizeof(T) * newCapacity)); 
-      } 
-      currentCapacity = newCapacity; 
-    } 
-  } 
- 
-  // Specializations for char 
- 
-  template <> 
-  DataBuffer<char>::~DataBuffer(){ 
-    if (buf) { 
-      memoryPool.free(reinterpret_cast<char*>(buf)); 
-    } 
-  } 
- 
-  template <> 
-  void DataBuffer<char>::resize(uint64_t newSize) { 
-    reserve(newSize); 
-    if (newSize > currentSize) { 
-      memset(buf + currentSize, 0, newSize - currentSize); 
-    } 
-    currentSize = newSize; 
-  } 
- 
-  // Specializations for char* 
- 
-  template <> 
-  DataBuffer<char*>::~DataBuffer(){ 
-    if (buf) { 
-      memoryPool.free(reinterpret_cast<char*>(buf)); 
-    } 
-  } 
- 
-  template <> 
-  void DataBuffer<char*>::resize(uint64_t newSize) { 
-    reserve(newSize); 
-    if (newSize > currentSize) { 
-      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(char*)); 
-    } 
-    currentSize = newSize; 
-  } 
- 
-  // Specializations for double 
- 
-  template <> 
-  DataBuffer<double>::~DataBuffer(){ 
-    if (buf) { 
-      memoryPool.free(reinterpret_cast<char*>(buf)); 
-    } 
-  } 
- 
-  template <> 
-  void DataBuffer<double>::resize(uint64_t newSize) { 
-    reserve(newSize); 
-    if (newSize > currentSize) { 
-      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(double)); 
-    } 
-    currentSize = newSize; 
-  } 
- 
-  // Specializations for int64_t 
- 
-  template <> 
-  DataBuffer<int64_t>::~DataBuffer(){ 
-    if (buf) { 
-      memoryPool.free(reinterpret_cast<char*>(buf)); 
-    } 
-  } 
- 
-  template <> 
-  void DataBuffer<int64_t>::resize(uint64_t newSize) { 
-    reserve(newSize); 
-    if (newSize > currentSize) { 
-      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(int64_t)); 
-    } 
-    currentSize = newSize; 
-  } 
- 
-  // Specializations for uint64_t 
- 
-  template <> 
-  DataBuffer<uint64_t>::~DataBuffer(){ 
-    if (buf) { 
-      memoryPool.free(reinterpret_cast<char*>(buf)); 
-    } 
-  } 
- 
-  template <> 
-  void DataBuffer<uint64_t>::resize(uint64_t newSize) { 
-    reserve(newSize); 
-    if (newSize > currentSize) { 
-      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(uint64_t)); 
-    } 
-    currentSize = newSize; 
-  } 
- 
-  // Specializations for unsigned char 
- 
-  template <> 
-  DataBuffer<unsigned char>::~DataBuffer(){ 
-    if (buf) { 
-      memoryPool.free(reinterpret_cast<char*>(buf)); 
-    } 
-  } 
- 
-  template <> 
-  void DataBuffer<unsigned char>::resize(uint64_t newSize) { 
-    reserve(newSize); 
-    if (newSize > currentSize) { 
-      memset(buf + currentSize, 0, newSize - currentSize); 
-    } 
-    currentSize = newSize; 
-  } 
- 
-  #ifdef __clang__ 
-    #pragma clang diagnostic ignored "-Wweak-template-vtables" 
-  #endif 
- 
-  template class DataBuffer<char>; 
-  template class DataBuffer<char*>; 
-  template class DataBuffer<double>; 
-  template class DataBuffer<Int128>; 
-  template class DataBuffer<int64_t>; 
-  template class DataBuffer<uint64_t>; 
-  template class DataBuffer<unsigned char>; 
- 
-  #ifdef __clang__ 
-    #pragma clang diagnostic ignored "-Wexit-time-destructors" 
-  #endif 
- 
-  MemoryPool* getDefaultPool() { 
-    static MemoryPoolImpl internal; 
-    return &internal; 
-  } 
-} // namespace orc 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Int128.hh"
+#include "orc/MemoryPool.hh"
+
+#include "Adaptor.hh"
+
+#include <cstdlib>
+#include <iostream>
+#include <string.h>
+
+namespace orc {
+
+  MemoryPool::~MemoryPool() {
+    // PASS
+  }
+
+  class MemoryPoolImpl: public MemoryPool {
+  public:
+    virtual ~MemoryPoolImpl() override;
+
+    char* malloc(uint64_t size) override;
+    void free(char* p) override;
+  };
+
+  char* MemoryPoolImpl::malloc(uint64_t size) {
+    return static_cast<char*>(std::malloc(size));
+  }
+
+  void MemoryPoolImpl::free(char* p) {
+    std::free(p);
+  }
+
+  MemoryPoolImpl::~MemoryPoolImpl() {
+    // PASS
+  }
+
+  template <class T>
+  DataBuffer<T>::DataBuffer(MemoryPool& pool,
+                            uint64_t newSize
+                            ): memoryPool(pool),
+                               buf(nullptr),
+                               currentSize(0),
+                               currentCapacity(0) {
+    resize(newSize);
+  }
+
+  template <class T>
+  DataBuffer<T>::DataBuffer(DataBuffer<T>&& buffer
+                      ) noexcept:
+                      memoryPool(buffer.memoryPool),
+                      buf(buffer.buf),
+                      currentSize(buffer.currentSize),
+                      currentCapacity(buffer.currentCapacity)  {
+    buffer.buf = nullptr;
+    buffer.currentSize = 0;
+    buffer.currentCapacity = 0;
+  }
+
+  template <class T>
+  DataBuffer<T>::~DataBuffer(){
+    for(uint64_t i=currentSize; i > 0; --i) {
+      (buf + i - 1)->~T();
+    }
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <class T>
+  void DataBuffer<T>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (currentSize > newSize) {
+      for(uint64_t i=currentSize; i > newSize; --i) {
+        (buf + i - 1)->~T();
+      }
+    } else if (newSize > currentSize) {
+      for(uint64_t i=currentSize; i < newSize; ++i) {
+        new (buf + i) T();
+      }
+    }
+    currentSize = newSize;
+  }
+
+  template <class T>
+  void DataBuffer<T>::reserve(uint64_t newCapacity){
+    if (newCapacity > currentCapacity || !buf) {
+      if (buf) {
+        T* buf_old = buf;
+        buf = reinterpret_cast<T*>(memoryPool.malloc(sizeof(T) * newCapacity));
+        memcpy(buf, buf_old, sizeof(T) * currentSize);
+        memoryPool.free(reinterpret_cast<char*>(buf_old));
+      } else {
+        buf = reinterpret_cast<T*>(memoryPool.malloc(sizeof(T) * newCapacity));
+      }
+      currentCapacity = newCapacity;
+    }
+  }
+
+  // Specializations for char
+
+  template <>
+  DataBuffer<char>::~DataBuffer(){
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<char>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, newSize - currentSize);
+    }
+    currentSize = newSize;
+  }
+
+  // Specializations for char*
+
+  template <>
+  DataBuffer<char*>::~DataBuffer(){
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<char*>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(char*));
+    }
+    currentSize = newSize;
+  }
+
+  // Specializations for double
+
+  template <>
+  DataBuffer<double>::~DataBuffer(){
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<double>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(double));
+    }
+    currentSize = newSize;
+  }
+
+  // Specializations for int64_t
+
+  template <>
+  DataBuffer<int64_t>::~DataBuffer(){
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<int64_t>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(int64_t));
+    }
+    currentSize = newSize;
+  }
+
+  // Specializations for uint64_t
+
+  template <>
+  DataBuffer<uint64_t>::~DataBuffer(){
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<uint64_t>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(uint64_t));
+    }
+    currentSize = newSize;
+  }
+
+  // Specializations for unsigned char
+
+  template <>
+  DataBuffer<unsigned char>::~DataBuffer(){
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<unsigned char>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, newSize - currentSize);
+    }
+    currentSize = newSize;
+  }
+
+  #ifdef __clang__
+    #pragma clang diagnostic ignored "-Wweak-template-vtables"
+  #endif
+
+  template class DataBuffer<char>;
+  template class DataBuffer<char*>;
+  template class DataBuffer<double>;
+  template class DataBuffer<Int128>;
+  template class DataBuffer<int64_t>;
+  template class DataBuffer<uint64_t>;
+  template class DataBuffer<unsigned char>;
+
+  #ifdef __clang__
+    #pragma clang diagnostic ignored "-Wexit-time-destructors"
+  #endif
+
+  MemoryPool* getDefaultPool() {
+    static MemoryPoolImpl internal;
+    return &internal;
+  }
+} // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/Murmur3.cc b/contrib/libs/apache/orc/c++/src/Murmur3.cc
index 63cf797a04..b45bd6d492 100644
--- a/contrib/libs/apache/orc/c++/src/Murmur3.cc
+++ b/contrib/libs/apache/orc/c++/src/Murmur3.cc
@@ -1,98 +1,98 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "Adaptor.hh" 
-#include "Murmur3.hh" 
- 
-#define ROTL64(x, r) ((x << r) | (x >> (64 - r))) 
- 
-namespace orc { 
- 
-  inline uint64_t rotl64 ( uint64_t x, int8_t r ) { 
-    return (x << r) | (x >> (64 - r)); 
-  } 
- 
-  inline uint64_t Murmur3::fmix64(uint64_t value) { 
-    value ^= (value >> 33); 
-    value *= 0xff51afd7ed558ccdL; 
-    value ^= (value >> 33); 
-    value *= 0xc4ceb9fe1a85ec53L; 
-    value ^= (value >> 33); 
-    return value; 
-  } 
- 
-  uint64_t Murmur3::hash64(const uint8_t *data, uint32_t len) { 
-    return hash64(data, len, DEFAULT_SEED); 
-  } 
- 
-  DIAGNOSTIC_PUSH 
- 
-#if defined(__clang__) 
-    DIAGNOSTIC_IGNORE("-Wimplicit-fallthrough") 
-#endif 
- 
-  uint64_t Murmur3::hash64(const uint8_t *data, uint32_t len, uint32_t seed) { 
-    uint64_t h = seed; 
-    uint32_t blocks = len >> 3; 
- 
-    const uint64_t* src = reinterpret_cast<const uint64_t*>(data); 
-    uint64_t c1 = 0x87c37b91114253d5L; 
-    uint64_t c2 = 0x4cf5ad432745937fL; 
-    for (uint32_t i = 0; i < blocks; i++) { 
-      uint64_t k = src[i]; 
-      k *= c1; 
-      k = ROTL64(k, 31); 
-      k *= c2; 
- 
-      h ^= k; 
-      h = ROTL64(h, 27); 
-      h = h * 5 + 0x52dce729; 
-    } 
- 
-    uint64_t k = 0; 
-    uint32_t idx = blocks << 3; 
-    switch (len - idx) { 
-      case 7: 
-        k ^= static_cast<uint64_t>(data[idx + 6]) << 48; 
-      case 6: 
-        k ^= static_cast<uint64_t>(data[idx + 5]) << 40; 
-      case 5: 
-        k ^= static_cast<uint64_t>(data[idx + 4]) << 32; 
-      case 4: 
-        k ^= static_cast<uint64_t>(data[idx + 3]) << 24; 
-      case 3: 
-        k ^= static_cast<uint64_t>(data[idx + 2]) << 16; 
-      case 2: 
-        k ^= static_cast<uint64_t>(data[idx + 1]) << 8; 
-      case 1: 
-        k ^= static_cast<uint64_t>(data[idx + 0]); 
- 
-        k *= c1; 
-        k = ROTL64(k, 31); 
-        k *= c2; 
-        h ^= k; 
-    } 
- 
-    h ^= len; 
-    h = fmix64(h); 
-    return h; 
-  } 
- 
-  DIAGNOSTIC_POP 
- 
-} 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "Murmur3.hh"
+
+#define ROTL64(x, r) ((x << r) | (x >> (64 - r)))
+
+namespace orc {
+
+  inline uint64_t rotl64 ( uint64_t x, int8_t r ) {
+    return (x << r) | (x >> (64 - r));
+  }
+
+  inline uint64_t Murmur3::fmix64(uint64_t value) {
+    value ^= (value >> 33);
+    value *= 0xff51afd7ed558ccdL;
+    value ^= (value >> 33);
+    value *= 0xc4ceb9fe1a85ec53L;
+    value ^= (value >> 33);
+    return value;
+  }
+
+  uint64_t Murmur3::hash64(const uint8_t *data, uint32_t len) {
+    return hash64(data, len, DEFAULT_SEED);
+  }
+
+  DIAGNOSTIC_PUSH
+
+#if defined(__clang__)
+    DIAGNOSTIC_IGNORE("-Wimplicit-fallthrough")
+#endif
+
+  uint64_t Murmur3::hash64(const uint8_t *data, uint32_t len, uint32_t seed) {
+    uint64_t h = seed;
+    uint32_t blocks = len >> 3;
+
+    const uint64_t* src = reinterpret_cast<const uint64_t*>(data);
+    uint64_t c1 = 0x87c37b91114253d5L;
+    uint64_t c2 = 0x4cf5ad432745937fL;
+    for (uint32_t i = 0; i < blocks; i++) {
+      uint64_t k = src[i];
+      k *= c1;
+      k = ROTL64(k, 31);
+      k *= c2;
+
+      h ^= k;
+      h = ROTL64(h, 27);
+      h = h * 5 + 0x52dce729;
+    }
+
+    uint64_t k = 0;
+    uint32_t idx = blocks << 3;
+    switch (len - idx) {
+      case 7:
+        k ^= static_cast<uint64_t>(data[idx + 6]) << 48;
+      case 6:
+        k ^= static_cast<uint64_t>(data[idx + 5]) << 40;
+      case 5:
+        k ^= static_cast<uint64_t>(data[idx + 4]) << 32;
+      case 4:
+        k ^= static_cast<uint64_t>(data[idx + 3]) << 24;
+      case 3:
+        k ^= static_cast<uint64_t>(data[idx + 2]) << 16;
+      case 2:
+        k ^= static_cast<uint64_t>(data[idx + 1]) << 8;
+      case 1:
+        k ^= static_cast<uint64_t>(data[idx + 0]);
+
+        k *= c1;
+        k = ROTL64(k, 31);
+        k *= c2;
+        h ^= k;
+    }
+
+    h ^= len;
+    h = fmix64(h);
+    return h;
+  }
+
+  DIAGNOSTIC_POP
+
+}
diff --git a/contrib/libs/apache/orc/c++/src/Murmur3.hh b/contrib/libs/apache/orc/c++/src/Murmur3.hh
index 9cf1de138f..02391811b0 100644
--- a/contrib/libs/apache/orc/c++/src/Murmur3.hh
+++ b/contrib/libs/apache/orc/c++/src/Murmur3.hh
@@ -1,40 +1,40 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_MURMUR3_HH 
-#define ORC_MURMUR3_HH 
- 
-#include "orc/orc-config.hh" 
- 
-namespace orc { 
- 
-  class Murmur3 { 
-  public: 
-    static const uint32_t DEFAULT_SEED = 104729; 
-    static const uint64_t NULL_HASHCODE = 2862933555777941757LL; 
- 
-    static uint64_t hash64(const uint8_t *data, uint32_t len); 
- 
-  private: 
-    static uint64_t fmix64(uint64_t value); 
-    static uint64_t hash64(const uint8_t* data, uint32_t len, uint32_t seed); 
-  }; 
- 
-} 
- 
-#endif //ORC_MURMUR3_HH 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_MURMUR3_HH
+#define ORC_MURMUR3_HH
+
+#include "orc/orc-config.hh"
+
+namespace orc {
+
+  class Murmur3 {
+  public:
+    static const uint32_t DEFAULT_SEED = 104729;
+    static const uint64_t NULL_HASHCODE = 2862933555777941757LL;
+
+    static uint64_t hash64(const uint8_t *data, uint32_t len);
+
+  private:
+    static uint64_t fmix64(uint64_t value);
+    static uint64_t hash64(const uint8_t* data, uint32_t len, uint32_t seed);
+  };
+
+}
+
+#endif //ORC_MURMUR3_HH
diff --git a/contrib/libs/apache/orc/c++/src/Options.hh b/contrib/libs/apache/orc/c++/src/Options.hh
index ee9982cdc2..795e166138 100644
--- a/contrib/libs/apache/orc/c++/src/Options.hh
+++ b/contrib/libs/apache/orc/c++/src/Options.hh
@@ -1,258 +1,258 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_OPTIONS_HH 
-#define ORC_OPTIONS_HH 
- 
-#include "orc/Int128.hh" 
-#include "orc/OrcFile.hh" 
-#include "orc/Reader.hh" 
- 
-#include <limits> 
- 
-namespace orc { 
- 
-  enum ColumnSelection { 
-    ColumnSelection_NONE = 0, 
-    ColumnSelection_NAMES = 1, 
-    ColumnSelection_FIELD_IDS = 2, 
-    ColumnSelection_TYPE_IDS = 3, 
-  }; 
- 
-/** 
- * ReaderOptions Implementation 
- */ 
-  struct ReaderOptionsPrivate { 
-    uint64_t tailLocation; 
-    std::ostream* errorStream; 
-    MemoryPool* memoryPool; 
-    std::string serializedTail; 
- 
-    ReaderOptionsPrivate() { 
-      tailLocation = std::numeric_limits<uint64_t>::max(); 
-      errorStream = &std::cerr; 
-      memoryPool = getDefaultPool(); 
-    } 
-  }; 
- 
-  ReaderOptions::ReaderOptions(): 
-    privateBits(std::unique_ptr<ReaderOptionsPrivate> 
-                (new ReaderOptionsPrivate())) { 
-    // PASS 
-  } 
- 
-  ReaderOptions::ReaderOptions(const ReaderOptions& rhs): 
-    privateBits(std::unique_ptr<ReaderOptionsPrivate> 
-                (new ReaderOptionsPrivate(*(rhs.privateBits.get())))) { 
-    // PASS 
-  } 
- 
-  ReaderOptions::ReaderOptions(ReaderOptions& rhs) { 
-    // swap privateBits with rhs 
-    ReaderOptionsPrivate* l = privateBits.release(); 
-    privateBits.reset(rhs.privateBits.release()); 
-    rhs.privateBits.reset(l); 
-  } 
- 
-  ReaderOptions& ReaderOptions::operator=(const ReaderOptions& rhs) { 
-    if (this != &rhs) { 
-      privateBits.reset(new ReaderOptionsPrivate(*(rhs.privateBits.get()))); 
-    } 
-    return *this; 
-  } 
- 
-  ReaderOptions::~ReaderOptions() { 
-    // PASS 
-  } 
- 
-  ReaderOptions& ReaderOptions::setMemoryPool(MemoryPool& pool) { 
-    privateBits->memoryPool = &pool; 
-    return *this; 
-  } 
- 
-  MemoryPool* ReaderOptions::getMemoryPool() const{ 
-    return privateBits->memoryPool; 
-  } 
- 
-  ReaderOptions& ReaderOptions::setTailLocation(uint64_t offset) { 
-    privateBits->tailLocation = offset; 
-    return *this; 
-  } 
- 
-  uint64_t ReaderOptions::getTailLocation() const { 
-    return privateBits->tailLocation; 
-  } 
- 
-  ReaderOptions& ReaderOptions::setSerializedFileTail(const std::string& value 
-                                                      ) { 
-    privateBits->serializedTail = value; 
-    return *this; 
-  } 
- 
-  std::string ReaderOptions::getSerializedFileTail() const { 
-    return privateBits->serializedTail; 
-  } 
- 
-  ReaderOptions& ReaderOptions::setErrorStream(std::ostream& stream) { 
-    privateBits->errorStream = &stream; 
-    return *this; 
-  } 
- 
-  std::ostream* ReaderOptions::getErrorStream() const { 
-    return privateBits->errorStream; 
-  } 
- 
-/** 
- * RowReaderOptions Implementation 
- */ 
- 
-  struct RowReaderOptionsPrivate { 
-    ColumnSelection selection; 
-    std::list<uint64_t> includedColumnIndexes; 
-    std::list<std::string> includedColumnNames; 
-    uint64_t dataStart; 
-    uint64_t dataLength; 
-    bool throwOnHive11DecimalOverflow; 
-    int32_t forcedScaleOnHive11Decimal; 
-    bool enableLazyDecoding; 
- 
-    RowReaderOptionsPrivate() { 
-      selection = ColumnSelection_NONE; 
-      dataStart = 0; 
-      dataLength = std::numeric_limits<uint64_t>::max(); 
-      throwOnHive11DecimalOverflow = true; 
-      forcedScaleOnHive11Decimal = 6; 
-      enableLazyDecoding = false; 
-    } 
-  }; 
- 
-  RowReaderOptions::RowReaderOptions(): 
-    privateBits(std::unique_ptr<RowReaderOptionsPrivate> 
-                (new RowReaderOptionsPrivate())) { 
-    // PASS 
-  } 
- 
-  RowReaderOptions::RowReaderOptions(const RowReaderOptions& rhs): 
-    privateBits(std::unique_ptr<RowReaderOptionsPrivate> 
-                (new RowReaderOptionsPrivate(*(rhs.privateBits.get())))) { 
-    // PASS 
-  } 
- 
-  RowReaderOptions::RowReaderOptions(RowReaderOptions& rhs) { 
-    // swap privateBits with rhs 
-    RowReaderOptionsPrivate* l = privateBits.release(); 
-    privateBits.reset(rhs.privateBits.release()); 
-    rhs.privateBits.reset(l); 
-  } 
- 
-  RowReaderOptions& RowReaderOptions::operator=(const RowReaderOptions& rhs) { 
-    if (this != &rhs) { 
-      privateBits.reset(new RowReaderOptionsPrivate(*(rhs.privateBits.get()))); 
-    } 
-    return *this; 
-  } 
- 
-  RowReaderOptions::~RowReaderOptions() { 
-    // PASS 
-  } 
- 
-  RowReaderOptions& RowReaderOptions::include(const std::list<uint64_t>& include) { 
-    privateBits->selection = ColumnSelection_FIELD_IDS; 
-    privateBits->includedColumnIndexes.assign(include.begin(), include.end()); 
-    privateBits->includedColumnNames.clear(); 
-    return *this; 
-  } 
- 
-  RowReaderOptions& RowReaderOptions::include(const std::list<std::string>& include) { 
-    privateBits->selection = ColumnSelection_NAMES; 
-    privateBits->includedColumnNames.assign(include.begin(), include.end()); 
-    privateBits->includedColumnIndexes.clear(); 
-    return *this; 
-  } 
- 
-  RowReaderOptions& RowReaderOptions::includeTypes(const std::list<uint64_t>& types) { 
-    privateBits->selection = ColumnSelection_TYPE_IDS; 
-    privateBits->includedColumnIndexes.assign(types.begin(), types.end()); 
-    privateBits->includedColumnNames.clear(); 
-    return *this; 
-  } 
- 
-  RowReaderOptions& RowReaderOptions::range(uint64_t offset, uint64_t length) { 
-    privateBits->dataStart = offset; 
-    privateBits->dataLength = length; 
-    return *this; 
-  } 
- 
-  bool RowReaderOptions::getIndexesSet() const { 
-    return privateBits->selection == ColumnSelection_FIELD_IDS; 
-  } 
- 
-  bool RowReaderOptions::getTypeIdsSet() const { 
-    return privateBits->selection == ColumnSelection_TYPE_IDS; 
-  } 
- 
-  const std::list<uint64_t>& RowReaderOptions::getInclude() const { 
-    return privateBits->includedColumnIndexes; 
-  } 
- 
-  bool RowReaderOptions::getNamesSet() const { 
-    return privateBits->selection == ColumnSelection_NAMES; 
-  } 
- 
-  const std::list<std::string>& RowReaderOptions::getIncludeNames() const { 
-    return privateBits->includedColumnNames; 
-  } 
- 
-  uint64_t RowReaderOptions::getOffset() const { 
-    return privateBits->dataStart; 
-  } 
- 
-  uint64_t RowReaderOptions::getLength() const { 
-    return privateBits->dataLength; 
-  } 
- 
-  RowReaderOptions& RowReaderOptions::throwOnHive11DecimalOverflow(bool shouldThrow){ 
-    privateBits->throwOnHive11DecimalOverflow = shouldThrow; 
-    return *this; 
-  } 
- 
-  bool RowReaderOptions::getThrowOnHive11DecimalOverflow() const { 
-    return privateBits->throwOnHive11DecimalOverflow; 
-  } 
- 
-  RowReaderOptions& RowReaderOptions::forcedScaleOnHive11Decimal(int32_t forcedScale 
-                                                           ) { 
-    privateBits->forcedScaleOnHive11Decimal = forcedScale; 
-    return *this; 
-  } 
- 
-  int32_t RowReaderOptions::getForcedScaleOnHive11Decimal() const { 
-    return privateBits->forcedScaleOnHive11Decimal; 
-  } 
- 
-  bool RowReaderOptions::getEnableLazyDecoding() const { 
-    return privateBits->enableLazyDecoding; 
-  } 
- 
-  RowReaderOptions& RowReaderOptions::setEnableLazyDecoding(bool enable) { 
-    privateBits->enableLazyDecoding = enable; 
-    return *this; 
-  } 
-} 
- 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_OPTIONS_HH
+#define ORC_OPTIONS_HH
+
+#include "orc/Int128.hh"
+#include "orc/OrcFile.hh"
+#include "orc/Reader.hh"
+
+#include <limits>
+
+namespace orc {
+
+  enum ColumnSelection {
+    ColumnSelection_NONE = 0,
+    ColumnSelection_NAMES = 1,
+    ColumnSelection_FIELD_IDS = 2,
+    ColumnSelection_TYPE_IDS = 3,
+  };
+
+/**
+ * ReaderOptions Implementation
+ */
+  struct ReaderOptionsPrivate {
+    uint64_t tailLocation;
+    std::ostream* errorStream;
+    MemoryPool* memoryPool;
+    std::string serializedTail;
+
+    ReaderOptionsPrivate() {
+      tailLocation = std::numeric_limits<uint64_t>::max();
+      errorStream = &std::cerr;
+      memoryPool = getDefaultPool();
+    }
+  };
+
+  ReaderOptions::ReaderOptions():
+    privateBits(std::unique_ptr<ReaderOptionsPrivate>
+                (new ReaderOptionsPrivate())) {
+    // PASS
+  }
+
+  ReaderOptions::ReaderOptions(const ReaderOptions& rhs):
+    privateBits(std::unique_ptr<ReaderOptionsPrivate>
+                (new ReaderOptionsPrivate(*(rhs.privateBits.get())))) {
+    // PASS
+  }
+
+  ReaderOptions::ReaderOptions(ReaderOptions& rhs) {
+    // swap privateBits with rhs
+    ReaderOptionsPrivate* l = privateBits.release();
+    privateBits.reset(rhs.privateBits.release());
+    rhs.privateBits.reset(l);
+  }
+
+  ReaderOptions& ReaderOptions::operator=(const ReaderOptions& rhs) {
+    if (this != &rhs) {
+      privateBits.reset(new ReaderOptionsPrivate(*(rhs.privateBits.get())));
+    }
+    return *this;
+  }
+
+  ReaderOptions::~ReaderOptions() {
+    // PASS
+  }
+
+  ReaderOptions& ReaderOptions::setMemoryPool(MemoryPool& pool) {
+    privateBits->memoryPool = &pool;
+    return *this;
+  }
+
+  MemoryPool* ReaderOptions::getMemoryPool() const{
+    return privateBits->memoryPool;
+  }
+
+  ReaderOptions& ReaderOptions::setTailLocation(uint64_t offset) {
+    privateBits->tailLocation = offset;
+    return *this;
+  }
+
+  uint64_t ReaderOptions::getTailLocation() const {
+    return privateBits->tailLocation;
+  }
+
+  ReaderOptions& ReaderOptions::setSerializedFileTail(const std::string& value
+                                                      ) {
+    privateBits->serializedTail = value;
+    return *this;
+  }
+
+  std::string ReaderOptions::getSerializedFileTail() const {
+    return privateBits->serializedTail;
+  }
+
+  ReaderOptions& ReaderOptions::setErrorStream(std::ostream& stream) {
+    privateBits->errorStream = &stream;
+    return *this;
+  }
+
+  std::ostream* ReaderOptions::getErrorStream() const {
+    return privateBits->errorStream;
+  }
+
+/**
+ * RowReaderOptions Implementation
+ */
+
+  struct RowReaderOptionsPrivate {
+    ColumnSelection selection;
+    std::list<uint64_t> includedColumnIndexes;
+    std::list<std::string> includedColumnNames;
+    uint64_t dataStart;
+    uint64_t dataLength;
+    bool throwOnHive11DecimalOverflow;
+    int32_t forcedScaleOnHive11Decimal;
+    bool enableLazyDecoding;
+
+    RowReaderOptionsPrivate() {
+      selection = ColumnSelection_NONE;
+      dataStart = 0;
+      dataLength = std::numeric_limits<uint64_t>::max();
+      throwOnHive11DecimalOverflow = true;
+      forcedScaleOnHive11Decimal = 6;
+      enableLazyDecoding = false;
+    }
+  };
+
+  RowReaderOptions::RowReaderOptions():
+    privateBits(std::unique_ptr<RowReaderOptionsPrivate>
+                (new RowReaderOptionsPrivate())) {
+    // PASS
+  }
+
+  RowReaderOptions::RowReaderOptions(const RowReaderOptions& rhs):
+    privateBits(std::unique_ptr<RowReaderOptionsPrivate>
+                (new RowReaderOptionsPrivate(*(rhs.privateBits.get())))) {
+    // PASS
+  }
+
+  RowReaderOptions::RowReaderOptions(RowReaderOptions& rhs) {
+    // swap privateBits with rhs
+    RowReaderOptionsPrivate* l = privateBits.release();
+    privateBits.reset(rhs.privateBits.release());
+    rhs.privateBits.reset(l);
+  }
+
+  RowReaderOptions& RowReaderOptions::operator=(const RowReaderOptions& rhs) {
+    if (this != &rhs) {
+      privateBits.reset(new RowReaderOptionsPrivate(*(rhs.privateBits.get())));
+    }
+    return *this;
+  }
+
+  RowReaderOptions::~RowReaderOptions() {
+    // PASS
+  }
+
+  RowReaderOptions& RowReaderOptions::include(const std::list<uint64_t>& include) {
+    privateBits->selection = ColumnSelection_FIELD_IDS;
+    privateBits->includedColumnIndexes.assign(include.begin(), include.end());
+    privateBits->includedColumnNames.clear();
+    return *this;
+  }
+
+  RowReaderOptions& RowReaderOptions::include(const std::list<std::string>& include) {
+    privateBits->selection = ColumnSelection_NAMES;
+    privateBits->includedColumnNames.assign(include.begin(), include.end());
+    privateBits->includedColumnIndexes.clear();
+    return *this;
+  }
+
+  RowReaderOptions& RowReaderOptions::includeTypes(const std::list<uint64_t>& types) {
+    privateBits->selection = ColumnSelection_TYPE_IDS;
+    privateBits->includedColumnIndexes.assign(types.begin(), types.end());
+    privateBits->includedColumnNames.clear();
+    return *this;
+  }
+
+  RowReaderOptions& RowReaderOptions::range(uint64_t offset, uint64_t length) {
+    privateBits->dataStart = offset;
+    privateBits->dataLength = length;
+    return *this;
+  }
+
+  bool RowReaderOptions::getIndexesSet() const {
+    return privateBits->selection == ColumnSelection_FIELD_IDS;
+  }
+
+  bool RowReaderOptions::getTypeIdsSet() const {
+    return privateBits->selection == ColumnSelection_TYPE_IDS;
+  }
+
+  const std::list<uint64_t>& RowReaderOptions::getInclude() const {
+    return privateBits->includedColumnIndexes;
+  }
+
+  bool RowReaderOptions::getNamesSet() const {
+    return privateBits->selection == ColumnSelection_NAMES;
+  }
+
+  const std::list<std::string>& RowReaderOptions::getIncludeNames() const {
+    return privateBits->includedColumnNames;
+  }
+
+  uint64_t RowReaderOptions::getOffset() const {
+    return privateBits->dataStart;
+  }
+
+  uint64_t RowReaderOptions::getLength() const {
+    return privateBits->dataLength;
+  }
+
+  RowReaderOptions& RowReaderOptions::throwOnHive11DecimalOverflow(bool shouldThrow){
+    privateBits->throwOnHive11DecimalOverflow = shouldThrow;
+    return *this;
+  }
+
+  bool RowReaderOptions::getThrowOnHive11DecimalOverflow() const {
+    return privateBits->throwOnHive11DecimalOverflow;
+  }
+
+  RowReaderOptions& RowReaderOptions::forcedScaleOnHive11Decimal(int32_t forcedScale
+                                                           ) {
+    privateBits->forcedScaleOnHive11Decimal = forcedScale;
+    return *this;
+  }
+
+  int32_t RowReaderOptions::getForcedScaleOnHive11Decimal() const {
+    return privateBits->forcedScaleOnHive11Decimal;
+  }
+
+  bool RowReaderOptions::getEnableLazyDecoding() const {
+    return privateBits->enableLazyDecoding;
+  }
+
+  RowReaderOptions& RowReaderOptions::setEnableLazyDecoding(bool enable) {
+    privateBits->enableLazyDecoding = enable;
+    return *this;
+  }
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/OrcFile.cc b/contrib/libs/apache/orc/c++/src/OrcFile.cc
index 5856db692e..a0158bbadf 100644
--- a/contrib/libs/apache/orc/c++/src/OrcFile.cc
+++ b/contrib/libs/apache/orc/c++/src/OrcFile.cc
@@ -1,184 +1,184 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "Adaptor.hh" 
-#include "orc/OrcFile.hh" 
-#include "orc/Exceptions.hh" 
- 
-#include <errno.h> 
-#include <fcntl.h> 
-#include <stdio.h> 
-#include <sys/stat.h> 
-#include <string.h> 
- 
-#ifdef _MSC_VER 
-#include <io.h> 
-#define S_IRUSR _S_IREAD 
-#define S_IWUSR _S_IWRITE 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "orc/OrcFile.hh"
+#include "orc/Exceptions.hh"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <string.h>
+
+#ifdef _MSC_VER
+#include <io.h>
+#define S_IRUSR _S_IREAD
+#define S_IWUSR _S_IWRITE
 #define stat _stat64
 #define fstat _fstat64
-#else 
-#include <unistd.h> 
-#define O_BINARY 0 
-#endif 
- 
-namespace orc { 
- 
-  class FileInputStream : public InputStream { 
-  private: 
-    std::string filename; 
-    int file; 
-    uint64_t totalLength; 
- 
-  public: 
-    FileInputStream(std::string _filename) { 
-      filename = _filename; 
-      file = open(filename.c_str(), O_BINARY | O_RDONLY); 
-      if (file == -1) { 
-        throw ParseError("Can't open " + filename); 
-      } 
-      struct stat fileStat; 
-      if (fstat(file, &fileStat) == -1) { 
-        throw ParseError("Can't stat " + filename); 
-      } 
-      totalLength = static_cast<uint64_t>(fileStat.st_size); 
-    } 
- 
-    ~FileInputStream() override; 
- 
-    uint64_t getLength() const override { 
-      return totalLength; 
-    } 
- 
-    uint64_t getNaturalReadSize() const override { 
-      return 128 * 1024; 
-    } 
- 
-    void read(void* buf, 
-              uint64_t length, 
-              uint64_t offset) override { 
-      if (!buf) { 
-        throw ParseError("Buffer is null"); 
-      } 
-      ssize_t bytesRead = pread(file, buf, length, static_cast<off_t>(offset)); 
- 
-      if (bytesRead == -1) { 
-        throw ParseError("Bad read of " + filename); 
-      } 
-      if (static_cast<uint64_t>(bytesRead) != length) { 
-        throw ParseError("Short read of " + filename); 
-      } 
-    } 
- 
-    const std::string& getName() const override { 
-      return filename; 
-    } 
-  }; 
- 
-  FileInputStream::~FileInputStream() { 
-    close(file); 
-  } 
- 
-  std::unique_ptr<InputStream> readFile(const std::string& path) { 
-#ifdef BUILD_LIBHDFSPP 
-    if(strncmp (path.c_str(), "hdfs://", 7) == 0){ 
-      return orc::readHdfsFile(std::string(path)); 
-    } else { 
-#endif 
-      return orc::readLocalFile(std::string(path)); 
-#ifdef BUILD_LIBHDFSPP 
-      } 
-#endif 
-  } 
- 
-  std::unique_ptr<InputStream> readLocalFile(const std::string& path) { 
-      return std::unique_ptr<InputStream>(new FileInputStream(path)); 
-  } 
- 
-  OutputStream::~OutputStream() { 
-      // PASS 
-  }; 
- 
-  class FileOutputStream : public OutputStream { 
-  private: 
-    std::string filename; 
-    int file; 
-    uint64_t bytesWritten; 
-    bool closed; 
- 
-  public: 
-    FileOutputStream(std::string _filename) { 
-      bytesWritten = 0; 
-      filename = _filename; 
-      closed = false; 
-      file = open( 
-                  filename.c_str(), 
-                  O_BINARY | O_CREAT | O_WRONLY | O_TRUNC, 
-                  S_IRUSR | S_IWUSR); 
-      if (file == -1) { 
-        throw ParseError("Can't open " + filename); 
-      } 
-    } 
- 
-    ~FileOutputStream() override; 
- 
-    uint64_t getLength() const override { 
-      return bytesWritten; 
-    } 
- 
-    uint64_t getNaturalWriteSize() const override { 
-      return 128 * 1024; 
-    } 
- 
-    void write(const void* buf, size_t length) override { 
-      if (closed) { 
-        throw std::logic_error("Cannot write to closed stream."); 
-      } 
-      ssize_t bytesWrite = ::write(file, buf, length); 
-      if (bytesWrite == -1) { 
-        throw ParseError("Bad write of " + filename); 
-      } 
-      if (static_cast<uint64_t>(bytesWrite) != length) { 
-        throw ParseError("Short write of " + filename); 
-      } 
-      bytesWritten += static_cast<uint64_t>(bytesWrite); 
-    } 
- 
-    const std::string& getName() const override { 
-      return filename; 
-    } 
- 
-    void close() override { 
-      if (!closed) { 
-        ::close(file); 
-        closed = true; 
-      } 
-    } 
-  }; 
- 
-  FileOutputStream::~FileOutputStream() { 
-    if (!closed) { 
-      ::close(file); 
-      closed = true; 
-    } 
-  } 
- 
-  std::unique_ptr<OutputStream> writeLocalFile(const std::string& path) { 
-    return std::unique_ptr<OutputStream>(new FileOutputStream(path)); 
-  } 
-} 
+#else
+#include <unistd.h>
+#define O_BINARY 0
+#endif
+
+namespace orc {
+
+  class FileInputStream : public InputStream {
+  private:
+    std::string filename;
+    int file;
+    uint64_t totalLength;
+
+  public:
+    FileInputStream(std::string _filename) {
+      filename = _filename;
+      file = open(filename.c_str(), O_BINARY | O_RDONLY);
+      if (file == -1) {
+        throw ParseError("Can't open " + filename);
+      }
+      struct stat fileStat;
+      if (fstat(file, &fileStat) == -1) {
+        throw ParseError("Can't stat " + filename);
+      }
+      totalLength = static_cast<uint64_t>(fileStat.st_size);
+    }
+
+    ~FileInputStream() override;
+
+    uint64_t getLength() const override {
+      return totalLength;
+    }
+
+    uint64_t getNaturalReadSize() const override {
+      return 128 * 1024;
+    }
+
+    void read(void* buf,
+              uint64_t length,
+              uint64_t offset) override {
+      if (!buf) {
+        throw ParseError("Buffer is null");
+      }
+      ssize_t bytesRead = pread(file, buf, length, static_cast<off_t>(offset));
+
+      if (bytesRead == -1) {
+        throw ParseError("Bad read of " + filename);
+      }
+      if (static_cast<uint64_t>(bytesRead) != length) {
+        throw ParseError("Short read of " + filename);
+      }
+    }
+
+    const std::string& getName() const override {
+      return filename;
+    }
+  };
+
+  FileInputStream::~FileInputStream() {
+    close(file);
+  }
+
+  std::unique_ptr<InputStream> readFile(const std::string& path) {
+#ifdef BUILD_LIBHDFSPP
+    if(strncmp (path.c_str(), "hdfs://", 7) == 0){
+      return orc::readHdfsFile(std::string(path));
+    } else {
+#endif
+      return orc::readLocalFile(std::string(path));
+#ifdef BUILD_LIBHDFSPP
+      }
+#endif
+  }
+
+  std::unique_ptr<InputStream> readLocalFile(const std::string& path) {
+      return std::unique_ptr<InputStream>(new FileInputStream(path));
+  }
+
+  OutputStream::~OutputStream() {
+      // PASS
+  };
+
+  class FileOutputStream : public OutputStream {
+  private:
+    std::string filename;
+    int file;
+    uint64_t bytesWritten;
+    bool closed;
+
+  public:
+    FileOutputStream(std::string _filename) {
+      bytesWritten = 0;
+      filename = _filename;
+      closed = false;
+      file = open(
+                  filename.c_str(),
+                  O_BINARY | O_CREAT | O_WRONLY | O_TRUNC,
+                  S_IRUSR | S_IWUSR);
+      if (file == -1) {
+        throw ParseError("Can't open " + filename);
+      }
+    }
+
+    ~FileOutputStream() override;
+
+    uint64_t getLength() const override {
+      return bytesWritten;
+    }
+
+    uint64_t getNaturalWriteSize() const override {
+      return 128 * 1024;
+    }
+
+    void write(const void* buf, size_t length) override {
+      if (closed) {
+        throw std::logic_error("Cannot write to closed stream.");
+      }
+      ssize_t bytesWrite = ::write(file, buf, length);
+      if (bytesWrite == -1) {
+        throw ParseError("Bad write of " + filename);
+      }
+      if (static_cast<uint64_t>(bytesWrite) != length) {
+        throw ParseError("Short write of " + filename);
+      }
+      bytesWritten += static_cast<uint64_t>(bytesWrite);
+    }
+
+    const std::string& getName() const override {
+      return filename;
+    }
+
+    void close() override {
+      if (!closed) {
+        ::close(file);
+        closed = true;
+      }
+    }
+  };
+
+  FileOutputStream::~FileOutputStream() {
+    if (!closed) {
+      ::close(file);
+      closed = true;
+    }
+  }
+
+  std::unique_ptr<OutputStream> writeLocalFile(const std::string& path) {
+    return std::unique_ptr<OutputStream>(new FileOutputStream(path));
+  }
+}
diff --git a/contrib/libs/apache/orc/c++/src/RLE.cc b/contrib/libs/apache/orc/c++/src/RLE.cc
index ea0181deaf..21f9082216 100644
--- a/contrib/libs/apache/orc/c++/src/RLE.cc
+++ b/contrib/libs/apache/orc/c++/src/RLE.cc
@@ -1,121 +1,121 @@
-/** 
-* Licensed to the Apache Software Foundation (ASF) under one 
-* or more contributor license agreements.  See the NOTICE file 
-* distributed with this work for additional information 
-* regarding copyright ownership.  The ASF licenses this file 
-* to you under the Apache License, Version 2.0 (the 
-* "License"); you may not use this file except in compliance 
-* with the License.  You may obtain a copy of the License at 
-* 
-*     http://www.apache.org/licenses/LICENSE-2.0 
-* 
-* Unless required by applicable law or agreed to in writing, software 
-* distributed under the License is distributed on an "AS IS" BASIS, 
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-* See the License for the specific language governing permissions and 
-* limitations under the License. 
-*/ 
- 
-#include "RLEv1.hh" 
-#include "RLEv2.hh" 
-#include "orc/Exceptions.hh" 
- 
-namespace orc { 
- 
-  RleEncoder::~RleEncoder() { 
-    // PASS 
-  } 
- 
-  RleDecoder::~RleDecoder() { 
-    // PASS 
-  } 
- 
-  std::unique_ptr<RleEncoder> createRleEncoder 
-                         (std::unique_ptr<BufferedOutputStream> output, 
-                          bool isSigned, 
-                          RleVersion version, 
-                          MemoryPool&, 
-                          bool alignedBitpacking) { 
-    switch (static_cast<int64_t>(version)) { 
-    case RleVersion_1: 
-      // We don't have std::make_unique() yet. 
-      return std::unique_ptr<RleEncoder>(new RleEncoderV1(std::move(output), 
-                                                          isSigned)); 
-    case RleVersion_2: 
-      return std::unique_ptr<RleEncoder>(new RleEncoderV2(std::move(output), 
-                                                            isSigned, alignedBitpacking)); 
-    default: 
-      throw NotImplementedYet("Not implemented yet"); 
-    } 
-  } 
- 
-  std::unique_ptr<RleDecoder> createRleDecoder 
-                         (std::unique_ptr<SeekableInputStream> input, 
-                          bool isSigned, 
-                          RleVersion version, 
-                          MemoryPool& pool) { 
-    switch (static_cast<int64_t>(version)) { 
-    case RleVersion_1: 
-      // We don't have std::make_unique() yet. 
-      return std::unique_ptr<RleDecoder>(new RleDecoderV1(std::move(input), 
-                                                          isSigned)); 
-    case RleVersion_2: 
-      return std::unique_ptr<RleDecoder>(new RleDecoderV2(std::move(input), 
-                                                          isSigned, pool)); 
-    default: 
-      throw NotImplementedYet("Not implemented yet"); 
-    } 
-  } 
- 
-  void RleEncoder::add(const int64_t* data, uint64_t numValues, 
-                         const char* notNull) { 
-    for (uint64_t i = 0; i < numValues; ++i) { 
-      if (!notNull || notNull[i]) { 
-        write(data[i]); 
-      } 
-    } 
-  } 
- 
-  void RleEncoder::writeVslong(int64_t val) { 
-    writeVulong((val << 1) ^ (val >> 63)); 
-  } 
- 
-  void RleEncoder::writeVulong(int64_t val) { 
-    while (true) { 
-      if ((val & ~0x7f) == 0) { 
-        writeByte(static_cast<char>(val)); 
-        return; 
-      } else { 
-        writeByte(static_cast<char>(0x80 | (val & 0x7f))); 
-        // cast val to unsigned so as to force 0-fill right shift 
-        val = (static_cast<uint64_t>(val) >> 7); 
-      } 
-    } 
-  } 
- 
-  void RleEncoder::writeByte(char c) { 
-    if (bufferPosition == bufferLength) { 
-      int addedSize = 0; 
-      if (!outputStream->Next(reinterpret_cast<void **>(&buffer), &addedSize)) { 
-        throw std::bad_alloc(); 
-      } 
-      bufferPosition = 0; 
-      bufferLength = static_cast<size_t>(addedSize); 
-    } 
-    buffer[bufferPosition++] = c; 
-  } 
- 
-  void RleEncoder::recordPosition(PositionRecorder* recorder) const { 
-    uint64_t flushedSize = outputStream->getSize(); 
-    uint64_t unflushedSize = static_cast<uint64_t>(bufferPosition); 
-    if (outputStream->isCompressed()) { 
-      recorder->add(flushedSize); 
-      recorder->add(unflushedSize); 
-    } else { 
-      flushedSize -= static_cast<uint64_t>(bufferLength); 
-      recorder->add(flushedSize + unflushedSize); 
-    } 
-    recorder->add(static_cast<uint64_t>(numLiterals)); 
-  } 
- 
-}  // namespace orc 
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include "RLEv1.hh"
+#include "RLEv2.hh"
+#include "orc/Exceptions.hh"
+
+namespace orc {
+
+  RleEncoder::~RleEncoder() {
+    // PASS
+  }
+
+  RleDecoder::~RleDecoder() {
+    // PASS
+  }
+
+  std::unique_ptr<RleEncoder> createRleEncoder
+                         (std::unique_ptr<BufferedOutputStream> output,
+                          bool isSigned,
+                          RleVersion version,
+                          MemoryPool&,
+                          bool alignedBitpacking) {
+    switch (static_cast<int64_t>(version)) {
+    case RleVersion_1:
+      // We don't have std::make_unique() yet.
+      return std::unique_ptr<RleEncoder>(new RleEncoderV1(std::move(output),
+                                                          isSigned));
+    case RleVersion_2:
+      return std::unique_ptr<RleEncoder>(new RleEncoderV2(std::move(output),
+                                                            isSigned, alignedBitpacking));
+    default:
+      throw NotImplementedYet("Not implemented yet");
+    }
+  }
+
+  std::unique_ptr<RleDecoder> createRleDecoder
+                         (std::unique_ptr<SeekableInputStream> input,
+                          bool isSigned,
+                          RleVersion version,
+                          MemoryPool& pool) {
+    switch (static_cast<int64_t>(version)) {
+    case RleVersion_1:
+      // We don't have std::make_unique() yet.
+      return std::unique_ptr<RleDecoder>(new RleDecoderV1(std::move(input),
+                                                          isSigned));
+    case RleVersion_2:
+      return std::unique_ptr<RleDecoder>(new RleDecoderV2(std::move(input),
+                                                          isSigned, pool));
+    default:
+      throw NotImplementedYet("Not implemented yet");
+    }
+  }
+
+  void RleEncoder::add(const int64_t* data, uint64_t numValues,
+                         const char* notNull) {
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!notNull || notNull[i]) {
+        write(data[i]);
+      }
+    }
+  }
+
+  void RleEncoder::writeVslong(int64_t val) {
+    writeVulong((val << 1) ^ (val >> 63));
+  }
+
+  void RleEncoder::writeVulong(int64_t val) {
+    while (true) {
+      if ((val & ~0x7f) == 0) {
+        writeByte(static_cast<char>(val));
+        return;
+      } else {
+        writeByte(static_cast<char>(0x80 | (val & 0x7f)));
+        // cast val to unsigned so as to force 0-fill right shift
+        val = (static_cast<uint64_t>(val) >> 7);
+      }
+    }
+  }
+
+  void RleEncoder::writeByte(char c) {
+    if (bufferPosition == bufferLength) {
+      int addedSize = 0;
+      if (!outputStream->Next(reinterpret_cast<void **>(&buffer), &addedSize)) {
+        throw std::bad_alloc();
+      }
+      bufferPosition = 0;
+      bufferLength = static_cast<size_t>(addedSize);
+    }
+    buffer[bufferPosition++] = c;
+  }
+
+  void RleEncoder::recordPosition(PositionRecorder* recorder) const {
+    uint64_t flushedSize = outputStream->getSize();
+    uint64_t unflushedSize = static_cast<uint64_t>(bufferPosition);
+    if (outputStream->isCompressed()) {
+      recorder->add(flushedSize);
+      recorder->add(unflushedSize);
+    } else {
+      flushedSize -= static_cast<uint64_t>(bufferLength);
+      recorder->add(flushedSize + unflushedSize);
+    }
+    recorder->add(static_cast<uint64_t>(numLiterals));
+  }
+
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/RLE.hh b/contrib/libs/apache/orc/c++/src/RLE.hh
index ec0330559e..6822bd812e 100644
--- a/contrib/libs/apache/orc/c++/src/RLE.hh
+++ b/contrib/libs/apache/orc/c++/src/RLE.hh
@@ -1,155 +1,155 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_RLE_HH 
-#define ORC_RLE_HH 
- 
-#include "io/InputStream.hh" 
-#include "io/OutputStream.hh" 
- 
-#include <memory> 
- 
-namespace orc { 
- 
-  inline int64_t zigZag(int64_t value) { 
-    return (value << 1) ^ (value >> 63); 
-  } 
- 
-  inline int64_t unZigZag(uint64_t value) { 
-    return value >> 1 ^ -(value & 1); 
-  } 
- 
-  class RleEncoder { 
-  public: 
-    // must be non-inline! 
-    virtual ~RleEncoder(); 
- 
-    RleEncoder( 
-            std::unique_ptr<BufferedOutputStream> outStream, 
-            bool hasSigned): 
-            outputStream(std::move(outStream)), 
-            bufferPosition(0), 
-            bufferLength(0), 
-            numLiterals(0), 
-            isSigned(hasSigned), 
-            buffer(nullptr){ 
-      //pass 
-    } 
- 
-    /** 
-     * Encode the next batch of values. 
-     * @param data the array to read from 
-     * @param numValues the number of values to write 
-     * @param notNull If the pointer is null, all values are read. If the 
-     *    pointer is not null, positions that are false are skipped. 
-     */ 
-    virtual void add(const int64_t* data, uint64_t numValues, 
-                      const char* notNull); 
- 
-    /** 
-     * Get size of buffer used so far. 
-     */ 
-    uint64_t getBufferSize() const { 
-        return outputStream->getSize(); 
-    } 
- 
-    /** 
-     * Flushing underlying BufferedOutputStream 
-     */ 
-    virtual uint64_t flush() = 0; 
- 
-    /** 
-     * record current position 
-     * @param recorder use the recorder to record current positions 
-     */ 
-    virtual void recordPosition(PositionRecorder* recorder) const; 
- 
-    virtual void write(int64_t val) = 0; 
- 
-  protected: 
-    std::unique_ptr<BufferedOutputStream> outputStream; 
-    size_t bufferPosition; 
-    size_t bufferLength; 
-    size_t numLiterals; 
-    int64_t* literals; 
-    bool isSigned; 
-    char* buffer; 
- 
-    virtual void writeByte(char c); 
- 
-    virtual void writeVulong(int64_t val); 
- 
-    virtual void writeVslong(int64_t val); 
-  }; 
- 
-  class RleDecoder { 
-  public: 
-    // must be non-inline! 
-    virtual ~RleDecoder(); 
- 
-    /** 
-     * Seek to a particular spot. 
-     */ 
-    virtual void seek(PositionProvider&) = 0; 
- 
-    /** 
-     * Seek over a given number of values. 
-     */ 
-    virtual void skip(uint64_t numValues) = 0; 
- 
-    /** 
-     * Read a number of values into the batch. 
-     * @param data the array to read into 
-     * @param numValues the number of values to read 
-     * @param notNull If the pointer is null, all values are read. If the 
-     *    pointer is not null, positions that are false are skipped. 
-     */ 
-    virtual void next(int64_t* data, uint64_t numValues, 
-                      const char* notNull) = 0; 
-  }; 
- 
-  /** 
-   * Create an RLE encoder. 
-   * @param output the output stream to write to 
-   * @param isSigned true if the number sequence is signed 
-   * @param version version of RLE decoding to do 
-   * @param pool memory pool to use for allocation 
-   */ 
-  std::unique_ptr<RleEncoder> createRleEncoder 
-                         (std::unique_ptr<BufferedOutputStream> output, 
-                          bool isSigned, 
-                          RleVersion version, 
-                          MemoryPool& pool, 
-                          bool alignedBitpacking); 
- 
-  /** 
-   * Create an RLE decoder. 
-   * @param input the input stream to read from 
-   * @param isSigned true if the number sequence is signed 
-   * @param version version of RLE decoding to do 
-   * @param pool memory pool to use for allocation 
-   */ 
-  std::unique_ptr<RleDecoder> createRleDecoder 
-                      (std::unique_ptr<SeekableInputStream> input, 
-                       bool isSigned, 
-                       RleVersion version, 
-                       MemoryPool& pool); 
- 
-}  // namespace orc 
- 
-#endif  // ORC_RLE_HH 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_RLE_HH
+#define ORC_RLE_HH
+
+#include "io/InputStream.hh"
+#include "io/OutputStream.hh"
+
+#include <memory>
+
+namespace orc {
+
+  inline int64_t zigZag(int64_t value) {
+    return (value << 1) ^ (value >> 63);
+  }
+
+  inline int64_t unZigZag(uint64_t value) {
+    return value >> 1 ^ -(value & 1);
+  }
+
+  class RleEncoder {
+  public:
+    // must be non-inline!
+    virtual ~RleEncoder();
+
+    RleEncoder(
+            std::unique_ptr<BufferedOutputStream> outStream,
+            bool hasSigned):
+            outputStream(std::move(outStream)),
+            bufferPosition(0),
+            bufferLength(0),
+            numLiterals(0),
+            isSigned(hasSigned),
+            buffer(nullptr){
+      //pass
+    }
+
+    /**
+     * Encode the next batch of values.
+     * @param data the array to read from
+     * @param numValues the number of values to write
+     * @param notNull If the pointer is null, all values are read. If the
+     *    pointer is not null, positions that are false are skipped.
+     */
+    virtual void add(const int64_t* data, uint64_t numValues,
+                      const char* notNull);
+
+    /**
+     * Get size of buffer used so far.
+     */
+    uint64_t getBufferSize() const {
+        return outputStream->getSize();
+    }
+
+    /**
+     * Flushing underlying BufferedOutputStream
+     */
+    virtual uint64_t flush() = 0;
+
+    /**
+     * record current position
+     * @param recorder use the recorder to record current positions
+     */
+    virtual void recordPosition(PositionRecorder* recorder) const;
+
+    virtual void write(int64_t val) = 0;
+
+  protected:
+    std::unique_ptr<BufferedOutputStream> outputStream;
+    size_t bufferPosition;
+    size_t bufferLength;
+    size_t numLiterals;
+    int64_t* literals;
+    bool isSigned;
+    char* buffer;
+
+    virtual void writeByte(char c);
+
+    virtual void writeVulong(int64_t val);
+
+    virtual void writeVslong(int64_t val);
+  };
+
+  class RleDecoder {
+  public:
+    // must be non-inline!
+    virtual ~RleDecoder();
+
+    /**
+     * Seek to a particular spot.
+     */
+    virtual void seek(PositionProvider&) = 0;
+
+    /**
+     * Seek over a given number of values.
+     */
+    virtual void skip(uint64_t numValues) = 0;
+
+    /**
+     * Read a number of values into the batch.
+     * @param data the array to read into
+     * @param numValues the number of values to read
+     * @param notNull If the pointer is null, all values are read. If the
+     *    pointer is not null, positions that are false are skipped.
+     */
+    virtual void next(int64_t* data, uint64_t numValues,
+                      const char* notNull) = 0;
+  };
+
+  /**
+   * Create an RLE encoder.
+   * @param output the output stream to write to
+   * @param isSigned true if the number sequence is signed
+   * @param version version of RLE decoding to do
+   * @param pool memory pool to use for allocation
+   */
+  std::unique_ptr<RleEncoder> createRleEncoder
+                         (std::unique_ptr<BufferedOutputStream> output,
+                          bool isSigned,
+                          RleVersion version,
+                          MemoryPool& pool,
+                          bool alignedBitpacking);
+
+  /**
+   * Create an RLE decoder.
+   * @param input the input stream to read from
+   * @param isSigned true if the number sequence is signed
+   * @param version version of RLE decoding to do
+   * @param pool memory pool to use for allocation
+   */
+  std::unique_ptr<RleDecoder> createRleDecoder
+                      (std::unique_ptr<SeekableInputStream> input,
+                       bool isSigned,
+                       RleVersion version,
+                       MemoryPool& pool);
+
+}  // namespace orc
+
+#endif  // ORC_RLE_HH
diff --git a/contrib/libs/apache/orc/c++/src/RLEV2Util.cc b/contrib/libs/apache/orc/c++/src/RLEV2Util.cc
index 20fc0931ef..12e2d057cd 100644
--- a/contrib/libs/apache/orc/c++/src/RLEV2Util.cc
+++ b/contrib/libs/apache/orc/c++/src/RLEV2Util.cc
@@ -1,70 +1,70 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with option work for additional information 
- * regarding copyright ownership.  The ASF licenses option file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use option file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "RLEV2Util.hh" 
- 
-namespace orc { 
- 
-  // Map FBS enum to bit width value. 
-  const uint8_t FBSToBitWidthMap[FixedBitSizes::SIZE] = { 
-    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 
-    26, 28, 30, 32, 40, 48, 56, 64 
-  }; 
- 
-  // Map bit length i to closest fixed bit width that can contain i bits. 
-  const uint8_t ClosestFixedBitsMap[65] = { 
-    1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 
-    26, 26, 28, 28, 30, 30, 32, 32, 
-    40, 40, 40, 40, 40, 40, 40, 40, 
-    48, 48, 48, 48, 48, 48, 48, 48, 
-    56, 56, 56, 56, 56, 56, 56, 56, 
-    64, 64, 64, 64, 64, 64, 64, 64 
-  }; 
- 
-  // Map bit length i to closest aligned fixed bit width that can contain i bits. 
-  const uint8_t ClosestAlignedFixedBitsMap[65] = { 
-      1, 1, 2, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24, 24, 
-      32, 32, 32, 32, 32, 32, 32, 32, 
-      40, 40, 40, 40, 40, 40, 40, 40, 
-      48, 48, 48, 48, 48, 48, 48, 48, 
-      56, 56, 56, 56, 56, 56, 56, 56, 
-      64, 64, 64, 64, 64, 64, 64, 64 
-  }; 
- 
-  // Map bit width to FBS enum. 
-  const uint8_t BitWidthToFBSMap[65] = { 
-      FixedBitSizes::ONE, FixedBitSizes::ONE, FixedBitSizes::TWO, FixedBitSizes::THREE, FixedBitSizes::FOUR, 
-      FixedBitSizes::FIVE, FixedBitSizes::SIX, FixedBitSizes::SEVEN, FixedBitSizes::EIGHT, 
-      FixedBitSizes::NINE, FixedBitSizes::TEN, FixedBitSizes::ELEVEN, FixedBitSizes::TWELVE, 
-      FixedBitSizes::THIRTEEN, FixedBitSizes::FOURTEEN, FixedBitSizes::FIFTEEN, FixedBitSizes::SIXTEEN, 
-      FixedBitSizes::SEVENTEEN, FixedBitSizes::EIGHTEEN, FixedBitSizes::NINETEEN, FixedBitSizes::TWENTY, 
-      FixedBitSizes::TWENTYONE, FixedBitSizes::TWENTYTWO, FixedBitSizes::TWENTYTHREE, FixedBitSizes::TWENTYFOUR, 
-      FixedBitSizes::TWENTYSIX, FixedBitSizes::TWENTYSIX, 
-      FixedBitSizes::TWENTYEIGHT, FixedBitSizes::TWENTYEIGHT, 
-      FixedBitSizes::THIRTY, FixedBitSizes::THIRTY, 
-      FixedBitSizes::THIRTYTWO, FixedBitSizes::THIRTYTWO, 
-      FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY, 
-      FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY, 
-      FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, 
-      FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, 
-      FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, 
-      FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, 
-      FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR, 
-      FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR 
-  }; 
-} 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with option work for additional information
+ * regarding copyright ownership.  The ASF licenses option file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use option file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RLEV2Util.hh"
+
+namespace orc {
+
+  // Map FBS enum to bit width value.
+  const uint8_t FBSToBitWidthMap[FixedBitSizes::SIZE] = {
+    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+    26, 28, 30, 32, 40, 48, 56, 64
+  };
+
+  // Map bit length i to closest fixed bit width that can contain i bits.
+  const uint8_t ClosestFixedBitsMap[65] = {
+    1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+    26, 26, 28, 28, 30, 30, 32, 32,
+    40, 40, 40, 40, 40, 40, 40, 40,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    56, 56, 56, 56, 56, 56, 56, 56,
+    64, 64, 64, 64, 64, 64, 64, 64
+  };
+
+  // Map bit length i to closest aligned fixed bit width that can contain i bits.
+  const uint8_t ClosestAlignedFixedBitsMap[65] = {
+      1, 1, 2, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24, 24,
+      32, 32, 32, 32, 32, 32, 32, 32,
+      40, 40, 40, 40, 40, 40, 40, 40,
+      48, 48, 48, 48, 48, 48, 48, 48,
+      56, 56, 56, 56, 56, 56, 56, 56,
+      64, 64, 64, 64, 64, 64, 64, 64
+  };
+
+  // Map bit width to FBS enum.
+  const uint8_t BitWidthToFBSMap[65] = {
+      FixedBitSizes::ONE, FixedBitSizes::ONE, FixedBitSizes::TWO, FixedBitSizes::THREE, FixedBitSizes::FOUR,
+      FixedBitSizes::FIVE, FixedBitSizes::SIX, FixedBitSizes::SEVEN, FixedBitSizes::EIGHT,
+      FixedBitSizes::NINE, FixedBitSizes::TEN, FixedBitSizes::ELEVEN, FixedBitSizes::TWELVE,
+      FixedBitSizes::THIRTEEN, FixedBitSizes::FOURTEEN, FixedBitSizes::FIFTEEN, FixedBitSizes::SIXTEEN,
+      FixedBitSizes::SEVENTEEN, FixedBitSizes::EIGHTEEN, FixedBitSizes::NINETEEN, FixedBitSizes::TWENTY,
+      FixedBitSizes::TWENTYONE, FixedBitSizes::TWENTYTWO, FixedBitSizes::TWENTYTHREE, FixedBitSizes::TWENTYFOUR,
+      FixedBitSizes::TWENTYSIX, FixedBitSizes::TWENTYSIX,
+      FixedBitSizes::TWENTYEIGHT, FixedBitSizes::TWENTYEIGHT,
+      FixedBitSizes::THIRTY, FixedBitSizes::THIRTY,
+      FixedBitSizes::THIRTYTWO, FixedBitSizes::THIRTYTWO,
+      FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY,
+      FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY,
+      FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT,
+      FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT,
+      FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX,
+      FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX,
+      FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR,
+      FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR
+  };
+}
diff --git a/contrib/libs/apache/orc/c++/src/RLEV2Util.hh b/contrib/libs/apache/orc/c++/src/RLEV2Util.hh
index 67a94c7c48..95a6826eaa 100644
--- a/contrib/libs/apache/orc/c++/src/RLEV2Util.hh
+++ b/contrib/libs/apache/orc/c++/src/RLEV2Util.hh
@@ -1,81 +1,81 @@
-/** 
-* Licensed to the Apache Software Foundation (ASF) under one 
-* or more contributor license agreements.  See the NOTICE file 
-* distributed with this work for additional information 
-* regarding copyright ownership.  The ASF licenses this file 
-* to you under the Apache License, Version 2.0 (the 
-* "License"); you may not use this file except in compliance 
-* with the License.  You may obtain a copy of the License at 
-* 
-*     http://www.apache.org/licenses/LICENSE-2.0 
-* 
-* Unless required by applicable law or agreed to in writing, software 
-* distributed under the License is distributed on an "AS IS" BASIS, 
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-* See the License for the specific language governing permissions and 
-* limitations under the License. 
-*/ 
- 
-#ifndef ORC_RLEV2UTIL_HH 
-#define ORC_RLEV2UTIL_HH 
- 
-#include "RLEv2.hh" 
- 
-namespace orc { 
-  extern const uint8_t FBSToBitWidthMap[FixedBitSizes::SIZE]; 
-  extern const uint8_t ClosestFixedBitsMap[65]; 
-  extern const uint8_t ClosestAlignedFixedBitsMap[65]; 
-  extern const uint8_t BitWidthToFBSMap[65]; 
- 
-  // The input n must be less than FixedBitSizes::SIZE. 
-  inline uint32_t decodeBitWidth(uint32_t n) { 
-    return FBSToBitWidthMap[n]; 
-  } 
- 
-  inline uint32_t getClosestFixedBits(uint32_t n) { 
-    if (n <= 64) { 
-      return ClosestFixedBitsMap[n]; 
-    } else { 
-      return 64; 
-    } 
-  } 
- 
-  inline uint32_t getClosestAlignedFixedBits(uint32_t n) { 
-    if (n <= 64) { 
-      return ClosestAlignedFixedBitsMap[n]; 
-    } else { 
-      return 64; 
-    } 
-  } 
- 
-  inline uint32_t encodeBitWidth(uint32_t n) { 
-    if (n <= 64) { 
-      return BitWidthToFBSMap[n]; 
-    } else { 
-      return FixedBitSizes::SIXTYFOUR; 
-    } 
-  } 
- 
-  inline uint32_t findClosestNumBits(int64_t value) { 
-    if (value < 0) { 
-      return getClosestFixedBits(64); 
-    } 
- 
-    uint32_t count = 0; 
-    while (value != 0) { 
-      count++; 
-      value = value >> 1; 
-    } 
-    return getClosestFixedBits(count); 
-  } 
- 
-  inline bool isSafeSubtract(int64_t left, int64_t right) { 
-    return ((left ^ right) >= 0) || ((left ^ (left - right)) >= 0); 
-  } 
- 
-  inline uint32_t RleEncoderV2::getOpCode(EncodingType encoding) { 
-    return static_cast<uint32_t >(encoding << 6); 
-  } 
-} 
- 
-#endif //ORC_RLEV2UTIL_HH 
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#ifndef ORC_RLEV2UTIL_HH
+#define ORC_RLEV2UTIL_HH
+
+#include "RLEv2.hh"
+
+namespace orc {
+  extern const uint8_t FBSToBitWidthMap[FixedBitSizes::SIZE];
+  extern const uint8_t ClosestFixedBitsMap[65];
+  extern const uint8_t ClosestAlignedFixedBitsMap[65];
+  extern const uint8_t BitWidthToFBSMap[65];
+
+  // The input n must be less than FixedBitSizes::SIZE.
+  inline uint32_t decodeBitWidth(uint32_t n) {
+    return FBSToBitWidthMap[n];
+  }
+
+  inline uint32_t getClosestFixedBits(uint32_t n) {
+    if (n <= 64) {
+      return ClosestFixedBitsMap[n];
+    } else {
+      return 64;
+    }
+  }
+
+  inline uint32_t getClosestAlignedFixedBits(uint32_t n) {
+    if (n <= 64) {
+      return ClosestAlignedFixedBitsMap[n];
+    } else {
+      return 64;
+    }
+  }
+
+  inline uint32_t encodeBitWidth(uint32_t n) {
+    if (n <= 64) {
+      return BitWidthToFBSMap[n];
+    } else {
+      return FixedBitSizes::SIXTYFOUR;
+    }
+  }
+
+  inline uint32_t findClosestNumBits(int64_t value) {
+    if (value < 0) {
+      return getClosestFixedBits(64);
+    }
+
+    uint32_t count = 0;
+    while (value != 0) {
+      count++;
+      value = value >> 1;
+    }
+    return getClosestFixedBits(count);
+  }
+
+  inline bool isSafeSubtract(int64_t left, int64_t right) {
+    return ((left ^ right) >= 0) || ((left ^ (left - right)) >= 0);
+  }
+
+  inline uint32_t RleEncoderV2::getOpCode(EncodingType encoding) {
+    return static_cast<uint32_t >(encoding << 6);
+  }
+}
+
+#endif //ORC_RLEV2UTIL_HH
diff --git a/contrib/libs/apache/orc/c++/src/RLEv1.cc b/contrib/libs/apache/orc/c++/src/RLEv1.cc
index aae9726bf6..fe333978db 100644
--- a/contrib/libs/apache/orc/c++/src/RLEv1.cc
+++ b/contrib/libs/apache/orc/c++/src/RLEv1.cc
@@ -1,302 +1,302 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "Adaptor.hh" 
-#include "Compression.hh" 
-#include "orc/Exceptions.hh" 
-#include "RLEv1.hh" 
- 
-#include <algorithm> 
- 
-namespace orc { 
- 
-const uint64_t MINIMUM_REPEAT = 3; 
-const uint64_t MAXIMUM_REPEAT = 127 + MINIMUM_REPEAT; 
- 
-const int64_t BASE_128_MASK = 0x7f; 
- 
-const int64_t MAX_DELTA = 127; 
-const int64_t MIN_DELTA = -128; 
-const uint64_t MAX_LITERAL_SIZE = 128; 
- 
-RleEncoderV1::RleEncoderV1( 
-                          std::unique_ptr<BufferedOutputStream> outStream, 
-                          bool hasSigned): 
-                          RleEncoder(std::move(outStream), hasSigned) { 
-  literals = new int64_t[MAX_LITERAL_SIZE]; 
-  delta = 0; 
-  repeat = false; 
-  tailRunLength = 0; 
-} 
- 
-RleEncoderV1::~RleEncoderV1() { 
-  delete [] literals; 
-} 
- 
-void RleEncoderV1::writeValues() { 
-  if (numLiterals != 0) { 
-    if (repeat) { 
-      writeByte(static_cast<char> 
-                (static_cast<uint64_t>(numLiterals) - MINIMUM_REPEAT)); 
-      writeByte(static_cast<char>(delta)); 
-      if (isSigned) { 
-        writeVslong(literals[0]); 
-      } else { 
-        writeVulong(literals[0]); 
-      } 
-    } else { 
-      writeByte(static_cast<char>(-numLiterals)); 
-      for(size_t i=0; i < numLiterals; ++i) { 
-        if (isSigned) { 
-          writeVslong(literals[i]); 
-        } else { 
-          writeVulong(literals[i]); 
-        } 
-      } 
-    } 
-    repeat = false; 
-    numLiterals = 0; 
-    tailRunLength = 0; 
-  } 
-} 
- 
-uint64_t RleEncoderV1::flush() { 
-  writeValues(); 
-  outputStream->BackUp(static_cast<int>(bufferLength - bufferPosition)); 
-  uint64_t dataSize = outputStream->flush(); 
-  bufferLength = bufferPosition = 0; 
-  return dataSize; 
-} 
- 
-void RleEncoderV1::write(int64_t value) { 
-  if (numLiterals == 0) { 
-    literals[numLiterals++] = value; 
-    tailRunLength = 1; 
-  } else if (repeat) { 
-    if (value == literals[0] + delta * static_cast<int64_t>(numLiterals)) { 
-      numLiterals += 1; 
-      if (numLiterals == MAXIMUM_REPEAT) { 
-        writeValues(); 
-      } 
-    } else { 
-      writeValues(); 
-      literals[numLiterals++] = value; 
-      tailRunLength = 1; 
-    } 
-  } else { 
-    if (tailRunLength == 1) { 
-      delta = value - literals[numLiterals - 1]; 
-      if (delta < MIN_DELTA || delta > MAX_DELTA) { 
-        tailRunLength = 1; 
-      } else { 
-        tailRunLength = 2; 
-      } 
-    } else if (value == literals[numLiterals - 1] + delta) { 
-      tailRunLength += 1; 
-    } else { 
-      delta = value - literals[numLiterals - 1]; 
-      if (delta < MIN_DELTA || delta > MAX_DELTA) { 
-        tailRunLength = 1; 
-      } else { 
-        tailRunLength = 2; 
-      } 
-    } 
-    if (tailRunLength == MINIMUM_REPEAT) { 
-      if (numLiterals + 1 == MINIMUM_REPEAT) { 
-        repeat = true; 
-        numLiterals += 1; 
-      } else { 
-        numLiterals -= static_cast<int>(MINIMUM_REPEAT - 1); 
-        int64_t base = literals[numLiterals]; 
-        writeValues(); 
-        literals[0] = base; 
-        repeat = true; 
-        numLiterals = MINIMUM_REPEAT; 
-      } 
-    } else { 
-      literals[numLiterals++] = value; 
-      if (numLiterals == MAX_LITERAL_SIZE) { 
-        writeValues(); 
-      } 
-    } 
-  } 
-} 
- 
-signed char RleDecoderV1::readByte() { 
-  if (bufferStart == bufferEnd) { 
-    int bufferLength; 
-    const void* bufferPointer; 
-    if (!inputStream->Next(&bufferPointer, &bufferLength)) { 
-      throw ParseError("bad read in readByte"); 
-    } 
-    bufferStart = static_cast<const char*>(bufferPointer); 
-    bufferEnd = bufferStart + bufferLength; 
-  } 
-  return *(bufferStart++); 
-} 
- 
-uint64_t RleDecoderV1::readLong() { 
-  uint64_t result = 0; 
-  int64_t offset = 0; 
-  signed char ch = readByte(); 
-  if (ch >= 0) { 
-    result = static_cast<uint64_t>(ch); 
-  } else { 
-    result = static_cast<uint64_t>(ch) & BASE_128_MASK; 
-    while ((ch = readByte()) < 0) { 
-      offset += 7; 
-      result |= (static_cast<uint64_t>(ch) & BASE_128_MASK) << offset; 
-    } 
-    result |= static_cast<uint64_t>(ch) << (offset + 7); 
-  } 
-  return result; 
-} 
- 
-void RleDecoderV1::skipLongs(uint64_t numValues) { 
-  while (numValues > 0) { 
-    if (readByte() >= 0) { 
-      --numValues; 
-    } 
-  } 
-} 
- 
-void RleDecoderV1::readHeader() { 
-  signed char ch = readByte(); 
-  if (ch < 0) { 
-    remainingValues = static_cast<uint64_t>(-ch); 
-    repeating = false; 
-  } else { 
-    remainingValues = static_cast<uint64_t>(ch) + MINIMUM_REPEAT; 
-    repeating = true; 
-    delta = readByte(); 
-    value = isSigned 
-        ? unZigZag(readLong()) 
-        : static_cast<int64_t>(readLong()); 
-  } 
-} 
- 
-RleDecoderV1::RleDecoderV1(std::unique_ptr<SeekableInputStream> input, 
-                           bool hasSigned) 
-    : inputStream(std::move(input)), 
-      isSigned(hasSigned), 
-      remainingValues(0), 
-      value(0), 
-      bufferStart(nullptr), 
-      bufferEnd(bufferStart), 
-      delta(0), 
-      repeating(false) { 
-} 
- 
-void RleDecoderV1::seek(PositionProvider& location) { 
-  // move the input stream 
-  inputStream->seek(location); 
-  // force a re-read from the stream 
-  bufferEnd = bufferStart; 
-  // read a new header 
-  readHeader(); 
-  // skip ahead the given number of records 
-  skip(location.next()); 
-} 
- 
-void RleDecoderV1::skip(uint64_t numValues) { 
-  while (numValues > 0) { 
-    if (remainingValues == 0) { 
-      readHeader(); 
-    } 
-    uint64_t count = std::min(numValues, remainingValues); 
-    remainingValues -= count; 
-    numValues -= count; 
-    if (repeating) { 
-      value += delta * static_cast<int64_t>(count); 
-    } else { 
-      skipLongs(count); 
-    } 
-  } 
-} 
- 
-void RleDecoderV1::next(int64_t* const data, 
-                        const uint64_t numValues, 
-                        const char* const notNull) { 
-  uint64_t position = 0; 
-  // skipNulls() 
-  if (notNull) { 
-    // Skip over null values. 
-    while (position < numValues && !notNull[position]) { 
-      ++position; 
-    } 
-  } 
-  while (position < numValues) { 
-    // If we are out of values, read more. 
-    if (remainingValues == 0) { 
-      readHeader(); 
-    } 
-    // How many do we read out of this block? 
-    uint64_t count = std::min(numValues - position, remainingValues); 
-    uint64_t consumed = 0; 
-    if (repeating) { 
-      if (notNull) { 
-        for (uint64_t i = 0; i < count; ++i) { 
-          if (notNull[position + i]) { 
-            data[position + i] = value + static_cast<int64_t>(consumed) * delta; 
-            consumed += 1; 
-          } 
-        } 
-      } else { 
-        for (uint64_t i = 0; i < count; ++i) { 
-          data[position + i] = value + static_cast<int64_t>(i) * delta; 
-        } 
-        consumed = count; 
-      } 
-      value += static_cast<int64_t>(consumed) * delta; 
-    } else { 
-      if (notNull) { 
-        for (uint64_t i = 0 ; i < count; ++i) { 
-          if (notNull[position + i]) { 
-            data[position + i] = isSigned 
-                ? unZigZag(readLong()) 
-                : static_cast<int64_t>(readLong()); 
-            ++consumed; 
-          } 
-        } 
-      } else { 
-        if (isSigned) { 
-          for (uint64_t i = 0; i < count; ++i) { 
-            data[position + i] = unZigZag(readLong()); 
-          } 
-        } else { 
-          for (uint64_t i = 0; i < count; ++i) { 
-            data[position + i] = static_cast<int64_t>(readLong()); 
-          } 
-        } 
-        consumed = count; 
-      } 
-    } 
-    remainingValues -= consumed; 
-    position += count; 
- 
-    // skipNulls() 
-    if (notNull) { 
-      // Skip over null values. 
-      while (position < numValues && !notNull[position]) { 
-        ++position; 
-      } 
-    } 
-  } 
-} 
- 
-}  // namespace orc 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "Compression.hh"
+#include "orc/Exceptions.hh"
+#include "RLEv1.hh"
+
+#include <algorithm>
+
+namespace orc {
+
+const uint64_t MINIMUM_REPEAT = 3;
+const uint64_t MAXIMUM_REPEAT = 127 + MINIMUM_REPEAT;
+
+const int64_t BASE_128_MASK = 0x7f;
+
+const int64_t MAX_DELTA = 127;
+const int64_t MIN_DELTA = -128;
+const uint64_t MAX_LITERAL_SIZE = 128;
+
+RleEncoderV1::RleEncoderV1(
+                          std::unique_ptr<BufferedOutputStream> outStream,
+                          bool hasSigned):
+                          RleEncoder(std::move(outStream), hasSigned) {
+  literals = new int64_t[MAX_LITERAL_SIZE];
+  delta = 0;
+  repeat = false;
+  tailRunLength = 0;
+}
+
+RleEncoderV1::~RleEncoderV1() {
+  delete [] literals;
+}
+
+void RleEncoderV1::writeValues() {
+  if (numLiterals != 0) {
+    if (repeat) {
+      writeByte(static_cast<char>
+                (static_cast<uint64_t>(numLiterals) - MINIMUM_REPEAT));
+      writeByte(static_cast<char>(delta));
+      if (isSigned) {
+        writeVslong(literals[0]);
+      } else {
+        writeVulong(literals[0]);
+      }
+    } else {
+      writeByte(static_cast<char>(-numLiterals));
+      for(size_t i=0; i < numLiterals; ++i) {
+        if (isSigned) {
+          writeVslong(literals[i]);
+        } else {
+          writeVulong(literals[i]);
+        }
+      }
+    }
+    repeat = false;
+    numLiterals = 0;
+    tailRunLength = 0;
+  }
+}
+
+uint64_t RleEncoderV1::flush() {
+  writeValues();
+  outputStream->BackUp(static_cast<int>(bufferLength - bufferPosition));
+  uint64_t dataSize = outputStream->flush();
+  bufferLength = bufferPosition = 0;
+  return dataSize;
+}
+
+void RleEncoderV1::write(int64_t value) {
+  if (numLiterals == 0) {
+    literals[numLiterals++] = value;
+    tailRunLength = 1;
+  } else if (repeat) {
+    if (value == literals[0] + delta * static_cast<int64_t>(numLiterals)) {
+      numLiterals += 1;
+      if (numLiterals == MAXIMUM_REPEAT) {
+        writeValues();
+      }
+    } else {
+      writeValues();
+      literals[numLiterals++] = value;
+      tailRunLength = 1;
+    }
+  } else {
+    if (tailRunLength == 1) {
+      delta = value - literals[numLiterals - 1];
+      if (delta < MIN_DELTA || delta > MAX_DELTA) {
+        tailRunLength = 1;
+      } else {
+        tailRunLength = 2;
+      }
+    } else if (value == literals[numLiterals - 1] + delta) {
+      tailRunLength += 1;
+    } else {
+      delta = value - literals[numLiterals - 1];
+      if (delta < MIN_DELTA || delta > MAX_DELTA) {
+        tailRunLength = 1;
+      } else {
+        tailRunLength = 2;
+      }
+    }
+    if (tailRunLength == MINIMUM_REPEAT) {
+      if (numLiterals + 1 == MINIMUM_REPEAT) {
+        repeat = true;
+        numLiterals += 1;
+      } else {
+        numLiterals -= static_cast<int>(MINIMUM_REPEAT - 1);
+        int64_t base = literals[numLiterals];
+        writeValues();
+        literals[0] = base;
+        repeat = true;
+        numLiterals = MINIMUM_REPEAT;
+      }
+    } else {
+      literals[numLiterals++] = value;
+      if (numLiterals == MAX_LITERAL_SIZE) {
+        writeValues();
+      }
+    }
+  }
+}
+
+signed char RleDecoderV1::readByte() {
+  if (bufferStart == bufferEnd) {
+    int bufferLength;
+    const void* bufferPointer;
+    if (!inputStream->Next(&bufferPointer, &bufferLength)) {
+      throw ParseError("bad read in readByte");
+    }
+    bufferStart = static_cast<const char*>(bufferPointer);
+    bufferEnd = bufferStart + bufferLength;
+  }
+  return *(bufferStart++);
+}
+
+uint64_t RleDecoderV1::readLong() {
+  uint64_t result = 0;
+  int64_t offset = 0;
+  signed char ch = readByte();
+  if (ch >= 0) {
+    result = static_cast<uint64_t>(ch);
+  } else {
+    result = static_cast<uint64_t>(ch) & BASE_128_MASK;
+    while ((ch = readByte()) < 0) {
+      offset += 7;
+      result |= (static_cast<uint64_t>(ch) & BASE_128_MASK) << offset;
+    }
+    result |= static_cast<uint64_t>(ch) << (offset + 7);
+  }
+  return result;
+}
+
+void RleDecoderV1::skipLongs(uint64_t numValues) {
+  while (numValues > 0) {
+    if (readByte() >= 0) {
+      --numValues;
+    }
+  }
+}
+
+void RleDecoderV1::readHeader() {
+  signed char ch = readByte();
+  if (ch < 0) {
+    remainingValues = static_cast<uint64_t>(-ch);
+    repeating = false;
+  } else {
+    remainingValues = static_cast<uint64_t>(ch) + MINIMUM_REPEAT;
+    repeating = true;
+    delta = readByte();
+    value = isSigned
+        ? unZigZag(readLong())
+        : static_cast<int64_t>(readLong());
+  }
+}
+
+RleDecoderV1::RleDecoderV1(std::unique_ptr<SeekableInputStream> input,
+                           bool hasSigned)
+    : inputStream(std::move(input)),
+      isSigned(hasSigned),
+      remainingValues(0),
+      value(0),
+      bufferStart(nullptr),
+      bufferEnd(bufferStart),
+      delta(0),
+      repeating(false) {
+}
+
+void RleDecoderV1::seek(PositionProvider& location) {
+  // move the input stream
+  inputStream->seek(location);
+  // force a re-read from the stream
+  bufferEnd = bufferStart;
+  // read a new header
+  readHeader();
+  // skip ahead the given number of records
+  skip(location.next());
+}
+
+void RleDecoderV1::skip(uint64_t numValues) {
+  while (numValues > 0) {
+    if (remainingValues == 0) {
+      readHeader();
+    }
+    uint64_t count = std::min(numValues, remainingValues);
+    remainingValues -= count;
+    numValues -= count;
+    if (repeating) {
+      value += delta * static_cast<int64_t>(count);
+    } else {
+      skipLongs(count);
+    }
+  }
+}
+
+void RleDecoderV1::next(int64_t* const data,
+                        const uint64_t numValues,
+                        const char* const notNull) {
+  uint64_t position = 0;
+  // skipNulls()
+  if (notNull) {
+    // Skip over null values.
+    while (position < numValues && !notNull[position]) {
+      ++position;
+    }
+  }
+  while (position < numValues) {
+    // If we are out of values, read more.
+    if (remainingValues == 0) {
+      readHeader();
+    }
+    // How many do we read out of this block?
+    uint64_t count = std::min(numValues - position, remainingValues);
+    uint64_t consumed = 0;
+    if (repeating) {
+      if (notNull) {
+        for (uint64_t i = 0; i < count; ++i) {
+          if (notNull[position + i]) {
+            data[position + i] = value + static_cast<int64_t>(consumed) * delta;
+            consumed += 1;
+          }
+        }
+      } else {
+        for (uint64_t i = 0; i < count; ++i) {
+          data[position + i] = value + static_cast<int64_t>(i) * delta;
+        }
+        consumed = count;
+      }
+      value += static_cast<int64_t>(consumed) * delta;
+    } else {
+      if (notNull) {
+        for (uint64_t i = 0 ; i < count; ++i) {
+          if (notNull[position + i]) {
+            data[position + i] = isSigned
+                ? unZigZag(readLong())
+                : static_cast<int64_t>(readLong());
+            ++consumed;
+          }
+        }
+      } else {
+        if (isSigned) {
+          for (uint64_t i = 0; i < count; ++i) {
+            data[position + i] = unZigZag(readLong());
+          }
+        } else {
+          for (uint64_t i = 0; i < count; ++i) {
+            data[position + i] = static_cast<int64_t>(readLong());
+          }
+        }
+        consumed = count;
+      }
+    }
+    remainingValues -= consumed;
+    position += count;
+
+    // skipNulls()
+    if (notNull) {
+      // Skip over null values.
+      while (position < numValues && !notNull[position]) {
+        ++position;
+      }
+    }
+  }
+}
+
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/RLEv1.hh b/contrib/libs/apache/orc/c++/src/RLEv1.hh
index eb0cf1d8c2..8e31d70873 100644
--- a/contrib/libs/apache/orc/c++/src/RLEv1.hh
+++ b/contrib/libs/apache/orc/c++/src/RLEv1.hh
@@ -1,91 +1,91 @@
-/** 
-* Licensed to the Apache Software Foundation (ASF) under one 
-* or more contributor license agreements.  See the NOTICE file 
-* distributed with this work for additional information 
-* regarding copyright ownership.  The ASF licenses this file 
-* to you under the Apache License, Version 2.0 (the 
-* "License"); you may not use this file except in compliance 
-* with the License.  You may obtain a copy of the License at 
-* 
-*     http://www.apache.org/licenses/LICENSE-2.0 
-* 
-* Unless required by applicable law or agreed to in writing, software 
-* distributed under the License is distributed on an "AS IS" BASIS, 
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-* See the License for the specific language governing permissions and 
-* limitations under the License. 
-*/ 
- 
-#ifndef ORC_RLEV1_HH 
-#define ORC_RLEV1_HH 
- 
-#include "Adaptor.hh" 
-#include "RLE.hh" 
- 
-#include <memory> 
- 
-namespace orc { 
- 
-class RleEncoderV1 : public RleEncoder { 
-public: 
-    RleEncoderV1(std::unique_ptr<BufferedOutputStream> outStream, 
-                 bool hasSigned); 
-    ~RleEncoderV1() override ; 
- 
-    /** 
-     * Flushing underlying BufferedOutputStream 
-     */ 
-    uint64_t flush() override; 
- 
-    void write(int64_t val) override; 
- 
-private: 
-    int64_t delta; 
-    bool repeat; 
-    uint64_t tailRunLength; 
- 
-    void writeValues(); 
-}; 
- 
-class RleDecoderV1 : public RleDecoder { 
-public: 
-    RleDecoderV1(std::unique_ptr<SeekableInputStream> input, 
-                 bool isSigned); 
- 
-    /** 
-    * Seek to a particular spot. 
-    */ 
-    void seek(PositionProvider&) override; 
- 
-    /** 
-    * Seek over a given number of values. 
-    */ 
-    void skip(uint64_t numValues) override; 
- 
-    /** 
-    * Read a number of values into the batch. 
-    */ 
-    void next(int64_t* data, uint64_t numValues, 
-              const char* notNull) override; 
- 
-private: 
-    inline signed char readByte(); 
- 
-    inline void readHeader(); 
- 
-    inline uint64_t readLong(); 
- 
-    inline void skipLongs(uint64_t numValues); 
- 
-    const std::unique_ptr<SeekableInputStream> inputStream; 
-    const bool isSigned; 
-    uint64_t remainingValues; 
-    int64_t value; 
-    const char *bufferStart; 
-    const char *bufferEnd; 
-    int64_t delta; 
-    bool repeating; 
-}; 
-}  // namespace orc 
- 
-#endif  // ORC_RLEV1_HH 
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#ifndef ORC_RLEV1_HH
+#define ORC_RLEV1_HH
+
+#include "Adaptor.hh"
+#include "RLE.hh"
+
+#include <memory>
+
+namespace orc {
+
+class RleEncoderV1 : public RleEncoder {
+public:
+    RleEncoderV1(std::unique_ptr<BufferedOutputStream> outStream,
+                 bool hasSigned);
+    ~RleEncoderV1() override ;
+
+    /**
+     * Flushing underlying BufferedOutputStream
+     */
+    uint64_t flush() override;
+
+    void write(int64_t val) override;
+
+private:
+    int64_t delta;
+    bool repeat;
+    uint64_t tailRunLength;
+
+    void writeValues();
+};
+
+class RleDecoderV1 : public RleDecoder {
+public:
+    RleDecoderV1(std::unique_ptr<SeekableInputStream> input,
+                 bool isSigned);
+
+    /**
+    * Seek to a particular spot.
+    */
+    void seek(PositionProvider&) override;
+
+    /**
+    * Seek over a given number of values.
+    */
+    void skip(uint64_t numValues) override;
+
+    /**
+    * Read a number of values into the batch.
+    */
+    void next(int64_t* data, uint64_t numValues,
+              const char* notNull) override;
+
+private:
+    inline signed char readByte();
+
+    inline void readHeader();
+
+    inline uint64_t readLong();
+
+    inline void skipLongs(uint64_t numValues);
+
+    const std::unique_ptr<SeekableInputStream> inputStream;
+    const bool isSigned;
+    uint64_t remainingValues;
+    int64_t value;
+    const char *bufferStart;
+    const char *bufferEnd;
+    int64_t delta;
+    bool repeating;
+};
+}  // namespace orc
+
+#endif  // ORC_RLEV1_HH
diff --git a/contrib/libs/apache/orc/c++/src/RLEv2.hh b/contrib/libs/apache/orc/c++/src/RLEv2.hh
index 5c740dfd27..f85dabd9e6 100644
--- a/contrib/libs/apache/orc/c++/src/RLEv2.hh
+++ b/contrib/libs/apache/orc/c++/src/RLEv2.hh
@@ -1,251 +1,251 @@
-/** 
-* Licensed to the Apache Software Foundation (ASF) under one 
-* or more contributor license agreements.  See the NOTICE file 
-* distributed with this work for additional information 
-* regarding copyright ownership.  The ASF licenses this file 
-* to you under the Apache License, Version 2.0 (the 
-* "License"); you may not use this file except in compliance 
-* with the License.  You may obtain a copy of the License at 
-* 
-*     http://www.apache.org/licenses/LICENSE-2.0 
-* 
-* Unless required by applicable law or agreed to in writing, software 
-* distributed under the License is distributed on an "AS IS" BASIS, 
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-* See the License for the specific language governing permissions and 
-* limitations under the License. 
-*/ 
- 
-#ifndef ORC_RLEV2_HH 
-#define ORC_RLEV2_HH 
- 
-#include "Adaptor.hh" 
-#include "orc/Exceptions.hh" 
-#include "RLE.hh" 
- 
-#include <vector> 
- 
-#define MIN_REPEAT 3 
-#define HIST_LEN 32 
-namespace orc { 
- 
-struct FixedBitSizes { 
-    enum FBS { 
-        ONE = 0, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, ELEVEN, TWELVE, 
-        THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN, 
-        TWENTY, TWENTYONE, TWENTYTWO, TWENTYTHREE, TWENTYFOUR, TWENTYSIX, 
-        TWENTYEIGHT, THIRTY, THIRTYTWO, FORTY, FORTYEIGHT, FIFTYSIX, SIXTYFOUR, SIZE 
-    }; 
-}; 
- 
-enum EncodingType { SHORT_REPEAT=0, DIRECT=1, PATCHED_BASE=2, DELTA=3 }; 
- 
-struct EncodingOption { 
-  EncodingType encoding; 
-  int64_t fixedDelta; 
-  int64_t gapVsPatchListCount; 
-  int64_t zigzagLiteralsCount; 
-  int64_t baseRedLiteralsCount; 
-  int64_t adjDeltasCount; 
-  uint32_t zzBits90p; 
-  uint32_t zzBits100p; 
-  uint32_t brBits95p; 
-  uint32_t brBits100p; 
-  uint32_t bitsDeltaMax; 
-  uint32_t patchWidth; 
-  uint32_t patchGapWidth; 
-  uint32_t patchLength; 
-  int64_t min; 
-  bool isFixedDelta; 
-}; 
- 
-class RleEncoderV2 : public RleEncoder { 
-public: 
-    RleEncoderV2(std::unique_ptr<BufferedOutputStream> outStream, bool hasSigned, bool alignBitPacking = true); 
- 
-    ~RleEncoderV2() override { 
-      delete [] literals; 
-      delete [] gapVsPatchList; 
-      delete [] zigzagLiterals; 
-      delete [] baseRedLiterals; 
-      delete [] adjDeltas; 
-    } 
-    /** 
-     * Flushing underlying BufferedOutputStream 
-     */ 
-    uint64_t flush() override; 
- 
-    void write(int64_t val) override; 
- 
-private: 
- 
-    const bool alignedBitPacking; 
-    uint32_t fixedRunLength; 
-    uint32_t variableRunLength; 
-    int64_t prevDelta; 
-    int32_t histgram[HIST_LEN]; 
- 
-    // The four list below should actually belong to EncodingOption since it only holds temporal values in write(int64_t val), 
-    // it is move here for performance consideration. 
-    int64_t* gapVsPatchList; 
-    int64_t*  zigzagLiterals; 
-    int64_t*  baseRedLiterals; 
-    int64_t*  adjDeltas; 
- 
-    uint32_t getOpCode(EncodingType encoding); 
-    void determineEncoding(EncodingOption& option); 
-    void computeZigZagLiterals(EncodingOption& option); 
-    void preparePatchedBlob(EncodingOption& option); 
- 
-    void writeInts(int64_t* input, uint32_t offset, size_t len, uint32_t bitSize); 
-    void initializeLiterals(int64_t val); 
-    void writeValues(EncodingOption& option); 
-    void writeShortRepeatValues(EncodingOption& option); 
-    void writeDirectValues(EncodingOption& option); 
-    void writePatchedBasedValues(EncodingOption& option); 
-    void writeDeltaValues(EncodingOption& option); 
-    uint32_t percentileBits(int64_t* data, size_t offset, size_t length, double p, bool reuseHist = false); 
-}; 
- 
-class RleDecoderV2 : public RleDecoder { 
-public: 
-  RleDecoderV2(std::unique_ptr<SeekableInputStream> input, 
-               bool isSigned, MemoryPool& pool); 
- 
-  /** 
-  * Seek to a particular spot. 
-  */ 
-  void seek(PositionProvider&) override; 
- 
-  /** 
-  * Seek over a given number of values. 
-  */ 
-  void skip(uint64_t numValues) override; 
- 
-  /** 
-  * Read a number of values into the batch. 
-  */ 
-  void next(int64_t* data, uint64_t numValues, 
-            const char* notNull) override; 
- 
-private: 
- 
-  // Used by PATCHED_BASE 
-  void adjustGapAndPatch() { 
-    curGap = static_cast<uint64_t>(unpackedPatch[patchIdx]) >> 
-      patchBitSize; 
-    curPatch = unpackedPatch[patchIdx] & patchMask; 
-    actualGap = 0; 
- 
-    // special case: gap is >255 then patch value will be 0. 
-    // if gap is <=255 then patch value cannot be 0 
-    while (curGap == 255 && curPatch == 0) { 
-      actualGap += 255; 
-      ++patchIdx; 
-      curGap = static_cast<uint64_t>(unpackedPatch[patchIdx]) >> 
-        patchBitSize; 
-      curPatch = unpackedPatch[patchIdx] & patchMask; 
-    } 
-    // add the left over gap 
-    actualGap += curGap; 
-  } 
- 
-  void resetReadLongs() { 
-    bitsLeft = 0; 
-    curByte = 0; 
-  } 
- 
-  void resetRun() { 
-    resetReadLongs(); 
-    bitSize = 0; 
-  } 
- 
-  unsigned char readByte() { 
-  if (bufferStart == bufferEnd) { 
-    int bufferLength; 
-    const void* bufferPointer; 
-    if (!inputStream->Next(&bufferPointer, &bufferLength)) { 
-      throw ParseError("bad read in RleDecoderV2::readByte"); 
-    } 
-    bufferStart = static_cast<const char*>(bufferPointer); 
-    bufferEnd = bufferStart + bufferLength; 
-  } 
- 
-  unsigned char result = static_cast<unsigned char>(*bufferStart++); 
-  return result; 
-} 
- 
-  int64_t readLongBE(uint64_t bsz); 
-  int64_t readVslong(); 
-  uint64_t readVulong(); 
-  uint64_t readLongs(int64_t *data, uint64_t offset, uint64_t len, 
-                     uint64_t fb, const char* notNull = nullptr) { 
-  uint64_t ret = 0; 
- 
-  // TODO: unroll to improve performance 
-  for(uint64_t i = offset; i < (offset + len); i++) { 
-    // skip null positions 
-    if (notNull && !notNull[i]) { 
-      continue; 
-    } 
-    uint64_t result = 0; 
-    uint64_t bitsLeftToRead = fb; 
-    while (bitsLeftToRead > bitsLeft) { 
-      result <<= bitsLeft; 
-      result |= curByte & ((1 << bitsLeft) - 1); 
-      bitsLeftToRead -= bitsLeft; 
-      curByte = readByte(); 
-      bitsLeft = 8; 
-    } 
- 
-    // handle the left over bits 
-    if (bitsLeftToRead > 0) { 
-      result <<= bitsLeftToRead; 
-      bitsLeft -= static_cast<uint32_t>(bitsLeftToRead); 
-      result |= (curByte >> bitsLeft) & ((1 << bitsLeftToRead) - 1); 
-    } 
-    data[i] = static_cast<int64_t>(result); 
-    ++ret; 
-  } 
- 
-  return ret; 
-} 
- 
-  uint64_t nextShortRepeats(int64_t* data, uint64_t offset, uint64_t numValues, 
-                            const char* notNull); 
-  uint64_t nextDirect(int64_t* data, uint64_t offset, uint64_t numValues, 
-                      const char* notNull); 
-  uint64_t nextPatched(int64_t* data, uint64_t offset, uint64_t numValues, 
-                       const char* notNull); 
-  uint64_t nextDelta(int64_t* data, uint64_t offset, uint64_t numValues, 
-                     const char* notNull); 
- 
-  const std::unique_ptr<SeekableInputStream> inputStream; 
-  const bool isSigned; 
- 
-  unsigned char firstByte; 
-  uint64_t runLength; 
-  uint64_t runRead; 
-  const char *bufferStart; 
-  const char *bufferEnd; 
-  int64_t deltaBase; // Used by DELTA 
-  uint64_t byteSize; // Used by SHORT_REPEAT and PATCHED_BASE 
-  int64_t firstValue; // Used by SHORT_REPEAT and DELTA 
-  int64_t prevValue; // Used by DELTA 
-  uint32_t bitSize; // Used by DIRECT, PATCHED_BASE and DELTA 
-  uint32_t bitsLeft; // Used by anything that uses readLongs 
-  uint32_t curByte; // Used by anything that uses readLongs 
-  uint32_t patchBitSize; // Used by PATCHED_BASE 
-  uint64_t unpackedIdx; // Used by PATCHED_BASE 
-  uint64_t patchIdx; // Used by PATCHED_BASE 
-  int64_t base; // Used by PATCHED_BASE 
-  uint64_t curGap; // Used by PATCHED_BASE 
-  int64_t curPatch; // Used by PATCHED_BASE 
-  int64_t patchMask; // Used by PATCHED_BASE 
-  int64_t actualGap; // Used by PATCHED_BASE 
-  DataBuffer<int64_t> unpacked; // Used by PATCHED_BASE 
-  DataBuffer<int64_t> unpackedPatch; // Used by PATCHED_BASE 
-}; 
-}  // namespace orc 
- 
-#endif  // ORC_RLEV2_HH 
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#ifndef ORC_RLEV2_HH
+#define ORC_RLEV2_HH
+
+#include "Adaptor.hh"
+#include "orc/Exceptions.hh"
+#include "RLE.hh"
+
+#include <vector>
+
+#define MIN_REPEAT 3
+#define HIST_LEN 32
+namespace orc {
+
+struct FixedBitSizes {
+    enum FBS {
+        ONE = 0, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, ELEVEN, TWELVE,
+        THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN,
+        TWENTY, TWENTYONE, TWENTYTWO, TWENTYTHREE, TWENTYFOUR, TWENTYSIX,
+        TWENTYEIGHT, THIRTY, THIRTYTWO, FORTY, FORTYEIGHT, FIFTYSIX, SIXTYFOUR, SIZE
+    };
+};
+
+enum EncodingType { SHORT_REPEAT=0, DIRECT=1, PATCHED_BASE=2, DELTA=3 };
+
+struct EncodingOption {
+  EncodingType encoding;
+  int64_t fixedDelta;
+  int64_t gapVsPatchListCount;
+  int64_t zigzagLiteralsCount;
+  int64_t baseRedLiteralsCount;
+  int64_t adjDeltasCount;
+  uint32_t zzBits90p;
+  uint32_t zzBits100p;
+  uint32_t brBits95p;
+  uint32_t brBits100p;
+  uint32_t bitsDeltaMax;
+  uint32_t patchWidth;
+  uint32_t patchGapWidth;
+  uint32_t patchLength;
+  int64_t min;
+  bool isFixedDelta;
+};
+
+class RleEncoderV2 : public RleEncoder {
+public:
+    RleEncoderV2(std::unique_ptr<BufferedOutputStream> outStream, bool hasSigned, bool alignBitPacking = true);
+
+    ~RleEncoderV2() override {
+      delete [] literals;
+      delete [] gapVsPatchList;
+      delete [] zigzagLiterals;
+      delete [] baseRedLiterals;
+      delete [] adjDeltas;
+    }
+    /**
+     * Flushing underlying BufferedOutputStream
+     */
+    uint64_t flush() override;
+
+    void write(int64_t val) override;
+
+private:
+
+    const bool alignedBitPacking;
+    uint32_t fixedRunLength;
+    uint32_t variableRunLength;
+    int64_t prevDelta;
+    int32_t histgram[HIST_LEN];
+
+    // The four list below should actually belong to EncodingOption since it only holds temporal values in write(int64_t val),
+    // it is move here for performance consideration.
+    int64_t* gapVsPatchList;
+    int64_t*  zigzagLiterals;
+    int64_t*  baseRedLiterals;
+    int64_t*  adjDeltas;
+
+    uint32_t getOpCode(EncodingType encoding);
+    void determineEncoding(EncodingOption& option);
+    void computeZigZagLiterals(EncodingOption& option);
+    void preparePatchedBlob(EncodingOption& option);
+
+    void writeInts(int64_t* input, uint32_t offset, size_t len, uint32_t bitSize);
+    void initializeLiterals(int64_t val);
+    void writeValues(EncodingOption& option);
+    void writeShortRepeatValues(EncodingOption& option);
+    void writeDirectValues(EncodingOption& option);
+    void writePatchedBasedValues(EncodingOption& option);
+    void writeDeltaValues(EncodingOption& option);
+    uint32_t percentileBits(int64_t* data, size_t offset, size_t length, double p, bool reuseHist = false);
+};
+
+class RleDecoderV2 : public RleDecoder {
+public:
+  RleDecoderV2(std::unique_ptr<SeekableInputStream> input,
+               bool isSigned, MemoryPool& pool);
+
+  /**
+  * Seek to a particular spot.
+  */
+  void seek(PositionProvider&) override;
+
+  /**
+  * Seek over a given number of values.
+  */
+  void skip(uint64_t numValues) override;
+
+  /**
+  * Read a number of values into the batch.
+  */
+  void next(int64_t* data, uint64_t numValues,
+            const char* notNull) override;
+
+private:
+
+  // Used by PATCHED_BASE
+  void adjustGapAndPatch() {
+    curGap = static_cast<uint64_t>(unpackedPatch[patchIdx]) >>
+      patchBitSize;
+    curPatch = unpackedPatch[patchIdx] & patchMask;
+    actualGap = 0;
+
+    // special case: gap is >255 then patch value will be 0.
+    // if gap is <=255 then patch value cannot be 0
+    while (curGap == 255 && curPatch == 0) {
+      actualGap += 255;
+      ++patchIdx;
+      curGap = static_cast<uint64_t>(unpackedPatch[patchIdx]) >>
+        patchBitSize;
+      curPatch = unpackedPatch[patchIdx] & patchMask;
+    }
+    // add the left over gap
+    actualGap += curGap;
+  }
+
+  void resetReadLongs() {
+    bitsLeft = 0;
+    curByte = 0;
+  }
+
+  void resetRun() {
+    resetReadLongs();
+    bitSize = 0;
+  }
+
+  unsigned char readByte() {
+  if (bufferStart == bufferEnd) {
+    int bufferLength;
+    const void* bufferPointer;
+    if (!inputStream->Next(&bufferPointer, &bufferLength)) {
+      throw ParseError("bad read in RleDecoderV2::readByte");
+    }
+    bufferStart = static_cast<const char*>(bufferPointer);
+    bufferEnd = bufferStart + bufferLength;
+  }
+
+  unsigned char result = static_cast<unsigned char>(*bufferStart++);
+  return result;
+}
+
+  int64_t readLongBE(uint64_t bsz);
+  int64_t readVslong();
+  uint64_t readVulong();
+  uint64_t readLongs(int64_t *data, uint64_t offset, uint64_t len,
+                     uint64_t fb, const char* notNull = nullptr) {
+  uint64_t ret = 0;
+
+  // TODO: unroll to improve performance
+  for(uint64_t i = offset; i < (offset + len); i++) {
+    // skip null positions
+    if (notNull && !notNull[i]) {
+      continue;
+    }
+    uint64_t result = 0;
+    uint64_t bitsLeftToRead = fb;
+    while (bitsLeftToRead > bitsLeft) {
+      result <<= bitsLeft;
+      result |= curByte & ((1 << bitsLeft) - 1);
+      bitsLeftToRead -= bitsLeft;
+      curByte = readByte();
+      bitsLeft = 8;
+    }
+
+    // handle the left over bits
+    if (bitsLeftToRead > 0) {
+      result <<= bitsLeftToRead;
+      bitsLeft -= static_cast<uint32_t>(bitsLeftToRead);
+      result |= (curByte >> bitsLeft) & ((1 << bitsLeftToRead) - 1);
+    }
+    data[i] = static_cast<int64_t>(result);
+    ++ret;
+  }
+
+  return ret;
+}
+
+  uint64_t nextShortRepeats(int64_t* data, uint64_t offset, uint64_t numValues,
+                            const char* notNull);
+  uint64_t nextDirect(int64_t* data, uint64_t offset, uint64_t numValues,
+                      const char* notNull);
+  uint64_t nextPatched(int64_t* data, uint64_t offset, uint64_t numValues,
+                       const char* notNull);
+  uint64_t nextDelta(int64_t* data, uint64_t offset, uint64_t numValues,
+                     const char* notNull);
+
+  const std::unique_ptr<SeekableInputStream> inputStream;
+  const bool isSigned;
+
+  unsigned char firstByte;
+  uint64_t runLength;
+  uint64_t runRead;
+  const char *bufferStart;
+  const char *bufferEnd;
+  int64_t deltaBase; // Used by DELTA
+  uint64_t byteSize; // Used by SHORT_REPEAT and PATCHED_BASE
+  int64_t firstValue; // Used by SHORT_REPEAT and DELTA
+  int64_t prevValue; // Used by DELTA
+  uint32_t bitSize; // Used by DIRECT, PATCHED_BASE and DELTA
+  uint32_t bitsLeft; // Used by anything that uses readLongs
+  uint32_t curByte; // Used by anything that uses readLongs
+  uint32_t patchBitSize; // Used by PATCHED_BASE
+  uint64_t unpackedIdx; // Used by PATCHED_BASE
+  uint64_t patchIdx; // Used by PATCHED_BASE
+  int64_t base; // Used by PATCHED_BASE
+  uint64_t curGap; // Used by PATCHED_BASE
+  int64_t curPatch; // Used by PATCHED_BASE
+  int64_t patchMask; // Used by PATCHED_BASE
+  int64_t actualGap; // Used by PATCHED_BASE
+  DataBuffer<int64_t> unpacked; // Used by PATCHED_BASE
+  DataBuffer<int64_t> unpackedPatch; // Used by PATCHED_BASE
+};
+}  // namespace orc
+
+#endif  // ORC_RLEV2_HH
diff --git a/contrib/libs/apache/orc/c++/src/Reader.cc b/contrib/libs/apache/orc/c++/src/Reader.cc
index a633567a9c..f35106ee44 100644
--- a/contrib/libs/apache/orc/c++/src/Reader.cc
+++ b/contrib/libs/apache/orc/c++/src/Reader.cc
@@ -1,513 +1,513 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "Adaptor.hh" 
-#include "BloomFilter.hh" 
-#include "Options.hh" 
-#include "Reader.hh" 
-#include "Statistics.hh" 
-#include "StripeStream.hh" 
- 
-#include "wrap/coded-stream-wrapper.h" 
- 
-#include <algorithm> 
-#include <iostream> 
-#include <memory> 
-#include <sstream> 
-#include <string> 
-#include <vector> 
-#include <iterator> 
-#include <set> 
- 
-namespace orc { 
- 
-  const WriterVersionImpl &WriterVersionImpl::VERSION_HIVE_8732() { 
-    static const WriterVersionImpl version(WriterVersion_HIVE_8732); 
-    return version; 
-  } 
- 
-  uint64_t getCompressionBlockSize(const proto::PostScript& ps) { 
-    if (ps.has_compressionblocksize()) { 
-      return ps.compressionblocksize(); 
-    } else { 
-      return 256 * 1024; 
-    } 
-  } 
- 
-  CompressionKind convertCompressionKind(const proto::PostScript& ps) { 
-    if (ps.has_compression()) { 
-      return static_cast<CompressionKind>(ps.compression()); 
-    } else { 
-      throw ParseError("Unknown compression type"); 
-    } 
-  } 
- 
-  std::string ColumnSelector::toDotColumnPath() { 
-      if (columns.empty()) { 
-          return std::string(); 
-      } 
-      std::ostringstream columnStream; 
-      std::copy(columns.begin(), columns.end(), 
-              std::ostream_iterator<std::string>(columnStream, ".")); 
-      std::string columnPath = columnStream.str(); 
-      return columnPath.substr(0, columnPath.length() - 1); 
-  } 
- 
- 
-  void ColumnSelector::selectChildren(std::vector<bool>& selectedColumns, const Type& type) { 
-    size_t id = static_cast<size_t>(type.getColumnId()); 
-    if (!selectedColumns[id]) { 
-      selectedColumns[id] = true; 
-      for(size_t c = id; c <= type.getMaximumColumnId(); ++c){ 
-        selectedColumns[c] = true; 
-      } 
-    } 
-  } 
- 
-  /** 
-   * Recurses over a type tree and selects the parents of every selected type. 
-   * @return true if any child was selected. 
-   */ 
-  bool ColumnSelector::selectParents(std::vector<bool>& selectedColumns, const Type& type) { 
-    size_t id = static_cast<size_t>(type.getColumnId()); 
-    bool result = selectedColumns[id]; 
-    for(uint64_t c=0; c < type.getSubtypeCount(); ++c) { 
-      result |= selectParents(selectedColumns, *type.getSubtype(c)); 
-    } 
-    selectedColumns[id] = result; 
-    return result; 
-  } 
- 
-  /** 
-   * Recurses over a type tree and build two maps 
-   * map<TypeName, TypeId>, map<TypeId, Type> 
-   */ 
-  void ColumnSelector::buildTypeNameIdMap(const Type* type) { 
-    // map<type_id, Type*> 
-    idTypeMap[type->getColumnId()] = type; 
- 
-    if (STRUCT == type->getKind()) { 
-      for (size_t i = 0; i < type->getSubtypeCount(); ++i) { 
-        const std::string& fieldName = type->getFieldName(i); 
-        columns.push_back(fieldName); 
-        nameIdMap[toDotColumnPath()] = type->getSubtype(i)->getColumnId(); 
-        buildTypeNameIdMap(type->getSubtype(i)); 
-        columns.pop_back(); 
-      } 
-    } else { 
-      // other non-primitive type 
-      for (size_t j = 0; j < type->getSubtypeCount(); ++j) { 
-        buildTypeNameIdMap(type->getSubtype(j)); 
-      } 
-    } 
-  } 
- 
-  void ColumnSelector::updateSelected(std::vector<bool>& selectedColumns, 
-                                      const RowReaderOptions& options) { 
-    selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), false); 
-    if (contents->schema->getKind() == STRUCT && options.getIndexesSet()) { 
-      for(std::list<uint64_t>::const_iterator field = options.getInclude().begin(); 
-          field != options.getInclude().end(); ++field) { 
-        updateSelectedByFieldId(selectedColumns, *field); 
-      } 
-    } else if (contents->schema->getKind() == STRUCT && options.getNamesSet()) { 
-      for(std::list<std::string>::const_iterator field = options.getIncludeNames().begin(); 
-          field != options.getIncludeNames().end(); ++field) { 
-        updateSelectedByName(selectedColumns, *field); 
-      } 
-    } else if (options.getTypeIdsSet()) { 
-      for(std::list<uint64_t>::const_iterator typeId = options.getInclude().begin(); 
-          typeId != options.getInclude().end(); ++typeId) { 
-        updateSelectedByTypeId(selectedColumns, *typeId); 
-      } 
-    } else { 
-      // default is to select all columns 
-      std::fill(selectedColumns.begin(), selectedColumns.end(), true); 
-    } 
-    selectParents(selectedColumns, *contents->schema.get()); 
-    selectedColumns[0] = true; // column 0 is selected by default 
-  } 
- 
-  void ColumnSelector::updateSelectedByFieldId(std::vector<bool>& selectedColumns, 
-                                               uint64_t fieldId) { 
-    if (fieldId < contents->schema->getSubtypeCount()) { 
-      selectChildren(selectedColumns, *contents->schema->getSubtype(fieldId)); 
-    } else { 
-      std::stringstream buffer; 
-      buffer << "Invalid column selected " << fieldId << " out of " 
-             << contents->schema->getSubtypeCount(); 
-      throw ParseError(buffer.str()); 
-    } 
-  } 
- 
-  void ColumnSelector::updateSelectedByTypeId(std::vector<bool>& selectedColumns, uint64_t typeId) { 
-    if (typeId < selectedColumns.size()) { 
-      const Type& type = *idTypeMap[typeId]; 
-      selectChildren(selectedColumns, type); 
-    } else { 
-      std::stringstream buffer; 
-      buffer << "Invalid type id selected " << typeId << " out of " 
-             << selectedColumns.size(); 
-      throw ParseError(buffer.str()); 
-    } 
-  } 
- 
-  void ColumnSelector::updateSelectedByName(std::vector<bool>& selectedColumns, 
-                                            const std::string& fieldName) { 
-    std::map<std::string, uint64_t>::const_iterator ite = nameIdMap.find(fieldName); 
-    if (ite != nameIdMap.end()) { 
-      updateSelectedByTypeId(selectedColumns, ite->second); 
-    } else { 
-      throw ParseError("Invalid column selected " + fieldName); 
-    } 
-  } 
- 
-  ColumnSelector::ColumnSelector(const FileContents* _contents): contents(_contents) { 
-    buildTypeNameIdMap(contents->schema.get()); 
-  } 
- 
-  RowReaderImpl::RowReaderImpl(std::shared_ptr<FileContents> _contents, 
-                               const RowReaderOptions& opts 
-                         ): localTimezone(getLocalTimezone()), 
-                            contents(_contents), 
-                            throwOnHive11DecimalOverflow(opts.getThrowOnHive11DecimalOverflow()), 
-                            forcedScaleOnHive11Decimal(opts.getForcedScaleOnHive11Decimal()), 
-                            footer(contents->footer.get()), 
-                            firstRowOfStripe(*contents->pool, 0), 
-                            enableEncodedBlock(opts.getEnableLazyDecoding()) { 
-    uint64_t numberOfStripes; 
-    numberOfStripes = static_cast<uint64_t>(footer->stripes_size()); 
-    currentStripe = numberOfStripes; 
-    lastStripe = 0; 
-    currentRowInStripe = 0; 
-    rowsInCurrentStripe = 0; 
-    uint64_t rowTotal = 0; 
- 
-    firstRowOfStripe.resize(numberOfStripes); 
-    for(size_t i=0; i < numberOfStripes; ++i) { 
-      firstRowOfStripe[i] = rowTotal; 
-      proto::StripeInformation stripeInfo = 
-        footer->stripes(static_cast<int>(i)); 
-      rowTotal += stripeInfo.numberofrows(); 
-      bool isStripeInRange = stripeInfo.offset() >= opts.getOffset() && 
-        stripeInfo.offset() < opts.getOffset() + opts.getLength(); 
-      if (isStripeInRange) { 
-        if (i < currentStripe) { 
-          currentStripe = i; 
-        } 
-        if (i >= lastStripe) { 
-          lastStripe = i + 1; 
-        } 
-      } 
-    } 
-    firstStripe = currentStripe; 
- 
-    if (currentStripe == 0) { 
-      previousRow = (std::numeric_limits<uint64_t>::max)(); 
-    } else if (currentStripe == numberOfStripes) { 
-      previousRow = footer->numberofrows(); 
-    } else { 
-      previousRow = firstRowOfStripe[firstStripe]-1; 
-    } 
- 
-    ColumnSelector column_selector(contents.get()); 
-    column_selector.updateSelected(selectedColumns, opts); 
-  } 
- 
-  CompressionKind RowReaderImpl::getCompression() const { 
-    return contents->compression; 
-  } 
- 
-  uint64_t RowReaderImpl::getCompressionSize() const { 
-    return contents->blockSize; 
-  } 
- 
-  const std::vector<bool> RowReaderImpl::getSelectedColumns() const { 
-    return selectedColumns; 
-  } 
- 
-  const Type& RowReaderImpl::getSelectedType() const { 
-    if (selectedSchema.get() == nullptr) { 
-      selectedSchema = buildSelectedType(contents->schema.get(), 
-                                         selectedColumns); 
-    } 
-    return *(selectedSchema.get()); 
-  } 
- 
-  uint64_t RowReaderImpl::getRowNumber() const { 
-    return previousRow; 
-  } 
- 
-  void RowReaderImpl::seekToRow(uint64_t rowNumber) { 
-    // Empty file 
-    if (lastStripe == 0) { 
-      return; 
-    } 
- 
-    // If we are reading only a portion of the file 
-    // (bounded by firstStripe and lastStripe), 
-    // seeking before or after the portion of interest should return no data. 
-    // Implement this by setting previousRow to the number of rows in the file. 
- 
-    // seeking past lastStripe 
-    uint64_t num_stripes = static_cast<uint64_t>(footer->stripes_size()); 
-    if ( (lastStripe == num_stripes 
-            && rowNumber >= footer->numberofrows())  || 
-         (lastStripe < num_stripes 
-            && rowNumber >= firstRowOfStripe[lastStripe])   ) { 
-      currentStripe = num_stripes; 
-      previousRow = footer->numberofrows(); 
-      return; 
-    } 
- 
-    uint64_t seekToStripe = 0; 
-    while (seekToStripe+1 < lastStripe && 
-                  firstRowOfStripe[seekToStripe+1] <= rowNumber) { 
-      seekToStripe++; 
-    } 
- 
-    // seeking before the first stripe 
-    if (seekToStripe < firstStripe) { 
-      currentStripe = num_stripes; 
-      previousRow = footer->numberofrows(); 
-      return; 
-    } 
- 
-    currentStripe = seekToStripe; 
-    currentRowInStripe = rowNumber - firstRowOfStripe[currentStripe]; 
-    previousRow = rowNumber; 
-    startNextStripe(); 
- 
-    uint64_t rowsToSkip = currentRowInStripe; 
- 
-    if (footer->rowindexstride() > 0 && 
-        currentStripeInfo.indexlength() > 0) { 
-      uint32_t rowGroupId = 
-        static_cast<uint32_t>(currentRowInStripe / footer->rowindexstride()); 
-      rowsToSkip -= rowGroupId * footer->rowindexstride(); 
- 
-      if (rowGroupId != 0) { 
-        seekToRowGroup(rowGroupId); 
-      } 
-    } 
- 
-    reader->skip(rowsToSkip); 
-  } 
- 
-  void RowReaderImpl::seekToRowGroup(uint32_t rowGroupEntryId) { 
-    // reset all previous row indexes 
-    rowIndexes.clear(); 
- 
-    // obtain row indexes for selected columns 
-    uint64_t offset = currentStripeInfo.offset(); 
-    for (int i = 0; i < currentStripeFooter.streams_size(); ++i) { 
-      const proto::Stream& pbStream = currentStripeFooter.streams(i); 
-      uint64_t colId = pbStream.column(); 
-      if (selectedColumns[colId] && pbStream.has_kind() 
-          && pbStream.kind() == proto::Stream_Kind_ROW_INDEX) { 
-        std::unique_ptr<SeekableInputStream> inStream = 
-          createDecompressor(getCompression(), 
-                             std::unique_ptr<SeekableInputStream> 
-                               (new SeekableFileInputStream 
-                                  (contents->stream.get(), 
-                                   offset, 
-                                   pbStream.length(), 
-                                   *contents->pool)), 
-                             getCompressionSize(), 
-                             *contents->pool); 
- 
-        proto::RowIndex rowIndex; 
-        if (!rowIndex.ParseFromZeroCopyStream(inStream.get())) { 
-          throw ParseError("Failed to parse the row index"); 
-        } 
- 
-        rowIndexes[colId] = rowIndex; 
-      } 
-      offset += pbStream.length(); 
-    } 
- 
-    // store positions for selected columns 
-    std::vector<std::list<uint64_t>> positions; 
-    // store position providers for selected colimns 
-    std::unordered_map<uint64_t, PositionProvider> positionProviders; 
- 
-    for (auto rowIndex = rowIndexes.cbegin(); 
-         rowIndex != rowIndexes.cend(); ++rowIndex) { 
-      uint64_t colId = rowIndex->first; 
-      const proto::RowIndexEntry& entry = 
-        rowIndex->second.entry(static_cast<int32_t>(rowGroupEntryId)); 
- 
-      // copy index positions for a specific column 
-      positions.push_back({}); 
-      auto& position = positions.back(); 
-      for (int pos = 0; pos != entry.positions_size(); ++pos) { 
-        position.push_back(entry.positions(pos)); 
-      } 
-      positionProviders.insert(std::make_pair(colId, PositionProvider(position))); 
-    } 
- 
-    reader->seekToRowGroup(positionProviders); 
-  } 
- 
-  const FileContents& RowReaderImpl::getFileContents() const { 
-    return *contents; 
-  } 
- 
-  bool RowReaderImpl::getThrowOnHive11DecimalOverflow() const { 
-    return throwOnHive11DecimalOverflow; 
-  } 
- 
-  int32_t RowReaderImpl::getForcedScaleOnHive11Decimal() const { 
-    return forcedScaleOnHive11Decimal; 
-  } 
- 
-  proto::StripeFooter getStripeFooter(const proto::StripeInformation& info, 
-                                      const FileContents& contents) { 
-    uint64_t stripeFooterStart = info.offset() + info.indexlength() + 
-      info.datalength(); 
-    uint64_t stripeFooterLength = info.footerlength(); 
-    std::unique_ptr<SeekableInputStream> pbStream = 
-      createDecompressor(contents.compression, 
-                         std::unique_ptr<SeekableInputStream> 
-                         (new SeekableFileInputStream(contents.stream.get(), 
-                                                      stripeFooterStart, 
-                                                      stripeFooterLength, 
-                                                      *contents.pool)), 
-                         contents.blockSize, 
-                         *contents.pool); 
-    proto::StripeFooter result; 
-    if (!result.ParseFromZeroCopyStream(pbStream.get())) { 
-      throw ParseError(std::string("bad StripeFooter from ") + 
-                       pbStream->getName()); 
-    } 
-    return result; 
-  } 
- 
-  ReaderImpl::ReaderImpl(std::shared_ptr<FileContents> _contents, 
-                         const ReaderOptions& opts, 
-                         uint64_t _fileLength, 
-                         uint64_t _postscriptLength 
-                         ): contents(std::move(_contents)), 
-                            options(opts), 
-                            fileLength(_fileLength), 
-                            postscriptLength(_postscriptLength), 
-                            footer(contents->footer.get()) { 
-    isMetadataLoaded = false; 
-    checkOrcVersion(); 
-    numberOfStripes = static_cast<uint64_t>(footer->stripes_size()); 
-    contents->schema = REDUNDANT_MOVE(convertType(footer->types(0), *footer)); 
-    contents->blockSize = getCompressionBlockSize(*contents->postscript); 
-    contents->compression= convertCompressionKind(*contents->postscript); 
-  } 
- 
-  std::string ReaderImpl::getSerializedFileTail() const { 
-    proto::FileTail tail; 
-    proto::PostScript *mutable_ps = tail.mutable_postscript(); 
-    mutable_ps->CopyFrom(*contents->postscript); 
-    proto::Footer *mutableFooter = tail.mutable_footer(); 
-    mutableFooter->CopyFrom(*footer); 
-    tail.set_filelength(fileLength); 
-    tail.set_postscriptlength(postscriptLength); 
-    TString result; 
-    if (!tail.SerializeToString(&result)) { 
-      throw ParseError("Failed to serialize file tail"); 
-    } 
-    return result; 
-  } 
- 
-  const ReaderOptions& ReaderImpl::getReaderOptions() const { 
-    return options; 
-  } 
- 
-  CompressionKind ReaderImpl::getCompression() const { 
-    return contents->compression; 
-  } 
- 
-  uint64_t ReaderImpl::getCompressionSize() const { 
-    return contents->blockSize; 
-  } 
- 
-  uint64_t ReaderImpl::getNumberOfStripes() const { 
-    return numberOfStripes; 
-  } 
- 
-  uint64_t ReaderImpl::getNumberOfStripeStatistics() const { 
-    if (!isMetadataLoaded) { 
-      readMetadata(); 
-    } 
-    return metadata.get() == nullptr ? 0 : 
-      static_cast<uint64_t>(metadata->stripestats_size()); 
-  } 
- 
-  std::unique_ptr<StripeInformation> 
-  ReaderImpl::getStripe(uint64_t stripeIndex) const { 
-    if (stripeIndex > getNumberOfStripes()) { 
-      throw std::logic_error("stripe index out of range"); 
-    } 
-    proto::StripeInformation stripeInfo = 
-      footer->stripes(static_cast<int>(stripeIndex)); 
- 
-    return std::unique_ptr<StripeInformation> 
-      (new StripeInformationImpl 
-       (stripeInfo.offset(), 
-        stripeInfo.indexlength(), 
-        stripeInfo.datalength(), 
-        stripeInfo.footerlength(), 
-        stripeInfo.numberofrows(), 
-        contents->stream.get(), 
-        *contents->pool, 
-        contents->compression, 
-        contents->blockSize)); 
-  } 
- 
-  FileVersion ReaderImpl::getFormatVersion() const { 
-    if (contents->postscript->version_size() != 2) { 
-      return FileVersion::v_0_11(); 
-    } 
-    return FileVersion( 
-                contents->postscript->version(0), 
-                contents->postscript->version(1)); 
-  } 
- 
-  uint64_t ReaderImpl::getNumberOfRows() const { 
-    return footer->numberofrows(); 
-  } 
- 
-  WriterId ReaderImpl::getWriterId() const { 
-    if (footer->has_writer()) { 
-      uint32_t id = footer->writer(); 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "BloomFilter.hh"
+#include "Options.hh"
+#include "Reader.hh"
+#include "Statistics.hh"
+#include "StripeStream.hh"
+
+#include "wrap/coded-stream-wrapper.h"
+
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <iterator>
+#include <set>
+
+namespace orc {
+
+  const WriterVersionImpl &WriterVersionImpl::VERSION_HIVE_8732() {
+    static const WriterVersionImpl version(WriterVersion_HIVE_8732);
+    return version;
+  }
+
+  uint64_t getCompressionBlockSize(const proto::PostScript& ps) {
+    if (ps.has_compressionblocksize()) {
+      return ps.compressionblocksize();
+    } else {
+      return 256 * 1024;
+    }
+  }
+
+  CompressionKind convertCompressionKind(const proto::PostScript& ps) {
+    if (ps.has_compression()) {
+      return static_cast<CompressionKind>(ps.compression());
+    } else {
+      throw ParseError("Unknown compression type");
+    }
+  }
+
+  std::string ColumnSelector::toDotColumnPath() {
+      if (columns.empty()) {
+          return std::string();
+      }
+      std::ostringstream columnStream;
+      std::copy(columns.begin(), columns.end(),
+              std::ostream_iterator<std::string>(columnStream, "."));
+      std::string columnPath = columnStream.str();
+      return columnPath.substr(0, columnPath.length() - 1);
+  }
+
+
+  void ColumnSelector::selectChildren(std::vector<bool>& selectedColumns, const Type& type) {
+    size_t id = static_cast<size_t>(type.getColumnId());
+    if (!selectedColumns[id]) {
+      selectedColumns[id] = true;
+      for(size_t c = id; c <= type.getMaximumColumnId(); ++c){
+        selectedColumns[c] = true;
+      }
+    }
+  }
+
+  /**
+   * Recurses over a type tree and selects the parents of every selected type.
+   * @return true if any child was selected.
+   */
+  bool ColumnSelector::selectParents(std::vector<bool>& selectedColumns, const Type& type) {
+    size_t id = static_cast<size_t>(type.getColumnId());
+    bool result = selectedColumns[id];
+    for(uint64_t c=0; c < type.getSubtypeCount(); ++c) {
+      result |= selectParents(selectedColumns, *type.getSubtype(c));
+    }
+    selectedColumns[id] = result;
+    return result;
+  }
+
+  /**
+   * Recurses over a type tree and build two maps
+   * map<TypeName, TypeId>, map<TypeId, Type>
+   */
+  void ColumnSelector::buildTypeNameIdMap(const Type* type) {
+    // map<type_id, Type*>
+    idTypeMap[type->getColumnId()] = type;
+
+    if (STRUCT == type->getKind()) {
+      for (size_t i = 0; i < type->getSubtypeCount(); ++i) {
+        const std::string& fieldName = type->getFieldName(i);
+        columns.push_back(fieldName);
+        nameIdMap[toDotColumnPath()] = type->getSubtype(i)->getColumnId();
+        buildTypeNameIdMap(type->getSubtype(i));
+        columns.pop_back();
+      }
+    } else {
+      // other non-primitive type
+      for (size_t j = 0; j < type->getSubtypeCount(); ++j) {
+        buildTypeNameIdMap(type->getSubtype(j));
+      }
+    }
+  }
+
+  void ColumnSelector::updateSelected(std::vector<bool>& selectedColumns,
+                                      const RowReaderOptions& options) {
+    selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), false);
+    if (contents->schema->getKind() == STRUCT && options.getIndexesSet()) {
+      for(std::list<uint64_t>::const_iterator field = options.getInclude().begin();
+          field != options.getInclude().end(); ++field) {
+        updateSelectedByFieldId(selectedColumns, *field);
+      }
+    } else if (contents->schema->getKind() == STRUCT && options.getNamesSet()) {
+      for(std::list<std::string>::const_iterator field = options.getIncludeNames().begin();
+          field != options.getIncludeNames().end(); ++field) {
+        updateSelectedByName(selectedColumns, *field);
+      }
+    } else if (options.getTypeIdsSet()) {
+      for(std::list<uint64_t>::const_iterator typeId = options.getInclude().begin();
+          typeId != options.getInclude().end(); ++typeId) {
+        updateSelectedByTypeId(selectedColumns, *typeId);
+      }
+    } else {
+      // default is to select all columns
+      std::fill(selectedColumns.begin(), selectedColumns.end(), true);
+    }
+    selectParents(selectedColumns, *contents->schema.get());
+    selectedColumns[0] = true; // column 0 is selected by default
+  }
+
+  void ColumnSelector::updateSelectedByFieldId(std::vector<bool>& selectedColumns,
+                                               uint64_t fieldId) {
+    if (fieldId < contents->schema->getSubtypeCount()) {
+      selectChildren(selectedColumns, *contents->schema->getSubtype(fieldId));
+    } else {
+      std::stringstream buffer;
+      buffer << "Invalid column selected " << fieldId << " out of "
+             << contents->schema->getSubtypeCount();
+      throw ParseError(buffer.str());
+    }
+  }
+
+  void ColumnSelector::updateSelectedByTypeId(std::vector<bool>& selectedColumns, uint64_t typeId) {
+    if (typeId < selectedColumns.size()) {
+      const Type& type = *idTypeMap[typeId];
+      selectChildren(selectedColumns, type);
+    } else {
+      std::stringstream buffer;
+      buffer << "Invalid type id selected " << typeId << " out of "
+             << selectedColumns.size();
+      throw ParseError(buffer.str());
+    }
+  }
+
+  void ColumnSelector::updateSelectedByName(std::vector<bool>& selectedColumns,
+                                            const std::string& fieldName) {
+    std::map<std::string, uint64_t>::const_iterator ite = nameIdMap.find(fieldName);
+    if (ite != nameIdMap.end()) {
+      updateSelectedByTypeId(selectedColumns, ite->second);
+    } else {
+      throw ParseError("Invalid column selected " + fieldName);
+    }
+  }
+
+  ColumnSelector::ColumnSelector(const FileContents* _contents): contents(_contents) {
+    buildTypeNameIdMap(contents->schema.get());
+  }
+
+  RowReaderImpl::RowReaderImpl(std::shared_ptr<FileContents> _contents,
+                               const RowReaderOptions& opts
+                         ): localTimezone(getLocalTimezone()),
+                            contents(_contents),
+                            throwOnHive11DecimalOverflow(opts.getThrowOnHive11DecimalOverflow()),
+                            forcedScaleOnHive11Decimal(opts.getForcedScaleOnHive11Decimal()),
+                            footer(contents->footer.get()),
+                            firstRowOfStripe(*contents->pool, 0),
+                            enableEncodedBlock(opts.getEnableLazyDecoding()) {
+    uint64_t numberOfStripes;
+    numberOfStripes = static_cast<uint64_t>(footer->stripes_size());
+    currentStripe = numberOfStripes;
+    lastStripe = 0;
+    currentRowInStripe = 0;
+    rowsInCurrentStripe = 0;
+    uint64_t rowTotal = 0;
+
+    firstRowOfStripe.resize(numberOfStripes);
+    for(size_t i=0; i < numberOfStripes; ++i) {
+      firstRowOfStripe[i] = rowTotal;
+      proto::StripeInformation stripeInfo =
+        footer->stripes(static_cast<int>(i));
+      rowTotal += stripeInfo.numberofrows();
+      bool isStripeInRange = stripeInfo.offset() >= opts.getOffset() &&
+        stripeInfo.offset() < opts.getOffset() + opts.getLength();
+      if (isStripeInRange) {
+        if (i < currentStripe) {
+          currentStripe = i;
+        }
+        if (i >= lastStripe) {
+          lastStripe = i + 1;
+        }
+      }
+    }
+    firstStripe = currentStripe;
+
+    if (currentStripe == 0) {
+      previousRow = (std::numeric_limits<uint64_t>::max)();
+    } else if (currentStripe == numberOfStripes) {
+      previousRow = footer->numberofrows();
+    } else {
+      previousRow = firstRowOfStripe[firstStripe]-1;
+    }
+
+    ColumnSelector column_selector(contents.get());
+    column_selector.updateSelected(selectedColumns, opts);
+  }
+
+  CompressionKind RowReaderImpl::getCompression() const {
+    return contents->compression;
+  }
+
+  uint64_t RowReaderImpl::getCompressionSize() const {
+    return contents->blockSize;
+  }
+
+  const std::vector<bool> RowReaderImpl::getSelectedColumns() const {
+    return selectedColumns;
+  }
+
+  const Type& RowReaderImpl::getSelectedType() const {
+    if (selectedSchema.get() == nullptr) {
+      selectedSchema = buildSelectedType(contents->schema.get(),
+                                         selectedColumns);
+    }
+    return *(selectedSchema.get());
+  }
+
+  uint64_t RowReaderImpl::getRowNumber() const {
+    return previousRow;
+  }
+
+  void RowReaderImpl::seekToRow(uint64_t rowNumber) {
+    // Empty file
+    if (lastStripe == 0) {
+      return;
+    }
+
+    // If we are reading only a portion of the file
+    // (bounded by firstStripe and lastStripe),
+    // seeking before or after the portion of interest should return no data.
+    // Implement this by setting previousRow to the number of rows in the file.
+
+    // seeking past lastStripe
+    uint64_t num_stripes = static_cast<uint64_t>(footer->stripes_size());
+    if ( (lastStripe == num_stripes
+            && rowNumber >= footer->numberofrows())  ||
+         (lastStripe < num_stripes
+            && rowNumber >= firstRowOfStripe[lastStripe])   ) {
+      currentStripe = num_stripes;
+      previousRow = footer->numberofrows();
+      return;
+    }
+
+    uint64_t seekToStripe = 0;
+    while (seekToStripe+1 < lastStripe &&
+                  firstRowOfStripe[seekToStripe+1] <= rowNumber) {
+      seekToStripe++;
+    }
+
+    // seeking before the first stripe
+    if (seekToStripe < firstStripe) {
+      currentStripe = num_stripes;
+      previousRow = footer->numberofrows();
+      return;
+    }
+
+    currentStripe = seekToStripe;
+    currentRowInStripe = rowNumber - firstRowOfStripe[currentStripe];
+    previousRow = rowNumber;
+    startNextStripe();
+
+    uint64_t rowsToSkip = currentRowInStripe;
+
+    if (footer->rowindexstride() > 0 &&
+        currentStripeInfo.indexlength() > 0) {
+      uint32_t rowGroupId =
+        static_cast<uint32_t>(currentRowInStripe / footer->rowindexstride());
+      rowsToSkip -= rowGroupId * footer->rowindexstride();
+
+      if (rowGroupId != 0) {
+        seekToRowGroup(rowGroupId);
+      }
+    }
+
+    reader->skip(rowsToSkip);
+  }
+
+  void RowReaderImpl::seekToRowGroup(uint32_t rowGroupEntryId) {
+    // reset all previous row indexes
+    rowIndexes.clear();
+
+    // obtain row indexes for selected columns
+    uint64_t offset = currentStripeInfo.offset();
+    for (int i = 0; i < currentStripeFooter.streams_size(); ++i) {
+      const proto::Stream& pbStream = currentStripeFooter.streams(i);
+      uint64_t colId = pbStream.column();
+      if (selectedColumns[colId] && pbStream.has_kind()
+          && pbStream.kind() == proto::Stream_Kind_ROW_INDEX) {
+        std::unique_ptr<SeekableInputStream> inStream =
+          createDecompressor(getCompression(),
+                             std::unique_ptr<SeekableInputStream>
+                               (new SeekableFileInputStream
+                                  (contents->stream.get(),
+                                   offset,
+                                   pbStream.length(),
+                                   *contents->pool)),
+                             getCompressionSize(),
+                             *contents->pool);
+
+        proto::RowIndex rowIndex;
+        if (!rowIndex.ParseFromZeroCopyStream(inStream.get())) {
+          throw ParseError("Failed to parse the row index");
+        }
+
+        rowIndexes[colId] = rowIndex;
+      }
+      offset += pbStream.length();
+    }
+
+    // store positions for selected columns
+    std::vector<std::list<uint64_t>> positions;
+    // store position providers for selected colimns
+    std::unordered_map<uint64_t, PositionProvider> positionProviders;
+
+    for (auto rowIndex = rowIndexes.cbegin();
+         rowIndex != rowIndexes.cend(); ++rowIndex) {
+      uint64_t colId = rowIndex->first;
+      const proto::RowIndexEntry& entry =
+        rowIndex->second.entry(static_cast<int32_t>(rowGroupEntryId));
+
+      // copy index positions for a specific column
+      positions.push_back({});
+      auto& position = positions.back();
+      for (int pos = 0; pos != entry.positions_size(); ++pos) {
+        position.push_back(entry.positions(pos));
+      }
+      positionProviders.insert(std::make_pair(colId, PositionProvider(position)));
+    }
+
+    reader->seekToRowGroup(positionProviders);
+  }
+
+  const FileContents& RowReaderImpl::getFileContents() const {
+    return *contents;
+  }
+
+  bool RowReaderImpl::getThrowOnHive11DecimalOverflow() const {
+    return throwOnHive11DecimalOverflow;
+  }
+
+  int32_t RowReaderImpl::getForcedScaleOnHive11Decimal() const {
+    return forcedScaleOnHive11Decimal;
+  }
+
+  proto::StripeFooter getStripeFooter(const proto::StripeInformation& info,
+                                      const FileContents& contents) {
+    uint64_t stripeFooterStart = info.offset() + info.indexlength() +
+      info.datalength();
+    uint64_t stripeFooterLength = info.footerlength();
+    std::unique_ptr<SeekableInputStream> pbStream =
+      createDecompressor(contents.compression,
+                         std::unique_ptr<SeekableInputStream>
+                         (new SeekableFileInputStream(contents.stream.get(),
+                                                      stripeFooterStart,
+                                                      stripeFooterLength,
+                                                      *contents.pool)),
+                         contents.blockSize,
+                         *contents.pool);
+    proto::StripeFooter result;
+    if (!result.ParseFromZeroCopyStream(pbStream.get())) {
+      throw ParseError(std::string("bad StripeFooter from ") +
+                       pbStream->getName());
+    }
+    return result;
+  }
+
+  ReaderImpl::ReaderImpl(std::shared_ptr<FileContents> _contents,
+                         const ReaderOptions& opts,
+                         uint64_t _fileLength,
+                         uint64_t _postscriptLength
+                         ): contents(std::move(_contents)),
+                            options(opts),
+                            fileLength(_fileLength),
+                            postscriptLength(_postscriptLength),
+                            footer(contents->footer.get()) {
+    isMetadataLoaded = false;
+    checkOrcVersion();
+    numberOfStripes = static_cast<uint64_t>(footer->stripes_size());
+    contents->schema = REDUNDANT_MOVE(convertType(footer->types(0), *footer));
+    contents->blockSize = getCompressionBlockSize(*contents->postscript);
+    contents->compression= convertCompressionKind(*contents->postscript);
+  }
+
+  std::string ReaderImpl::getSerializedFileTail() const {
+    proto::FileTail tail;
+    proto::PostScript *mutable_ps = tail.mutable_postscript();
+    mutable_ps->CopyFrom(*contents->postscript);
+    proto::Footer *mutableFooter = tail.mutable_footer();
+    mutableFooter->CopyFrom(*footer);
+    tail.set_filelength(fileLength);
+    tail.set_postscriptlength(postscriptLength);
+    TString result;
+    if (!tail.SerializeToString(&result)) {
+      throw ParseError("Failed to serialize file tail");
+    }
+    return result;
+  }
+
+  const ReaderOptions& ReaderImpl::getReaderOptions() const {
+    return options;
+  }
+
+  CompressionKind ReaderImpl::getCompression() const {
+    return contents->compression;
+  }
+
+  uint64_t ReaderImpl::getCompressionSize() const {
+    return contents->blockSize;
+  }
+
+  uint64_t ReaderImpl::getNumberOfStripes() const {
+    return numberOfStripes;
+  }
+
+  uint64_t ReaderImpl::getNumberOfStripeStatistics() const {
+    if (!isMetadataLoaded) {
+      readMetadata();
+    }
+    return metadata.get() == nullptr ? 0 :
+      static_cast<uint64_t>(metadata->stripestats_size());
+  }
+
+  std::unique_ptr<StripeInformation>
+  ReaderImpl::getStripe(uint64_t stripeIndex) const {
+    if (stripeIndex > getNumberOfStripes()) {
+      throw std::logic_error("stripe index out of range");
+    }
+    proto::StripeInformation stripeInfo =
+      footer->stripes(static_cast<int>(stripeIndex));
+
+    return std::unique_ptr<StripeInformation>
+      (new StripeInformationImpl
+       (stripeInfo.offset(),
+        stripeInfo.indexlength(),
+        stripeInfo.datalength(),
+        stripeInfo.footerlength(),
+        stripeInfo.numberofrows(),
+        contents->stream.get(),
+        *contents->pool,
+        contents->compression,
+        contents->blockSize));
+  }
+
+  FileVersion ReaderImpl::getFormatVersion() const {
+    if (contents->postscript->version_size() != 2) {
+      return FileVersion::v_0_11();
+    }
+    return FileVersion(
+                contents->postscript->version(0),
+                contents->postscript->version(1));
+  }
+
+  uint64_t ReaderImpl::getNumberOfRows() const {
+    return footer->numberofrows();
+  }
+
+  WriterId ReaderImpl::getWriterId() const {
+    if (footer->has_writer()) {
+      uint32_t id = footer->writer();
       if (id > WriterId::TRINO_WRITER) {
-        return WriterId::UNKNOWN_WRITER; 
-      } else { 
-	return static_cast<WriterId>(id); 
-      } 
-    } 
-    return WriterId::ORC_JAVA_WRITER; 
-  } 
- 
-  uint32_t ReaderImpl::getWriterIdValue() const { 
-    if (footer->has_writer()) { 
-      return footer->writer(); 
-    } else { 
-      return WriterId::ORC_JAVA_WRITER; 
-    } 
-  } 
- 
+        return WriterId::UNKNOWN_WRITER;
+      } else {
+	return static_cast<WriterId>(id);
+      }
+    }
+    return WriterId::ORC_JAVA_WRITER;
+  }
+
+  uint32_t ReaderImpl::getWriterIdValue() const {
+    if (footer->has_writer()) {
+      return footer->writer();
+    } else {
+      return WriterId::ORC_JAVA_WRITER;
+    }
+  }
+
   std::string ReaderImpl::getSoftwareVersion() const {
     std::ostringstream buffer;
     buffer << writerIdToString(getWriterIdValue());
@@ -517,704 +517,704 @@ namespace orc {
     return buffer.str();
   }
 
-  WriterVersion ReaderImpl::getWriterVersion() const { 
-    if (!contents->postscript->has_writerversion()) { 
-      return WriterVersion_ORIGINAL; 
-    } 
-    return static_cast<WriterVersion>(contents->postscript->writerversion()); 
-  } 
- 
-  uint64_t ReaderImpl::getContentLength() const { 
-    return footer->contentlength(); 
-  } 
- 
-  uint64_t ReaderImpl::getStripeStatisticsLength() const { 
-    return contents->postscript->metadatalength(); 
-  } 
- 
-  uint64_t ReaderImpl::getFileFooterLength() const { 
-    return contents->postscript->footerlength(); 
-  } 
- 
-  uint64_t ReaderImpl::getFilePostscriptLength() const { 
-    return postscriptLength; 
-  } 
- 
-  uint64_t ReaderImpl::getFileLength() const { 
-    return fileLength; 
-  } 
- 
-  uint64_t ReaderImpl::getRowIndexStride() const { 
-    return footer->rowindexstride(); 
-  } 
- 
-  const std::string& ReaderImpl::getStreamName() const { 
-    return contents->stream->getName(); 
-  } 
- 
-  std::list<std::string> ReaderImpl::getMetadataKeys() const { 
-    std::list<std::string> result; 
-    for(int i=0; i < footer->metadata_size(); ++i) { 
-      result.push_back(footer->metadata(i).name()); 
-    } 
-    return result; 
-  } 
- 
-  std::string ReaderImpl::getMetadataValue(const std::string& key) const { 
-    for(int i=0; i < footer->metadata_size(); ++i) { 
-      if (footer->metadata(i).name() == TString(key)) { 
-        return footer->metadata(i).value(); 
-      } 
-    } 
-    throw std::range_error("key not found"); 
-  } 
- 
-  void ReaderImpl::getRowIndexStatistics(const proto::StripeInformation& stripeInfo, 
-      uint64_t stripeIndex, const proto::StripeFooter& currentStripeFooter, 
-      std::vector<std::vector<proto::ColumnStatistics> >* indexStats) const { 
-    int num_streams = currentStripeFooter.streams_size(); 
-    uint64_t offset = stripeInfo.offset(); 
-    uint64_t indexEnd = stripeInfo.offset() + stripeInfo.indexlength(); 
-    for (int i = 0; i < num_streams; i++) { 
-      const proto::Stream& stream = currentStripeFooter.streams(i); 
-      StreamKind streamKind = static_cast<StreamKind>(stream.kind()); 
-      uint64_t length = static_cast<uint64_t>(stream.length()); 
-      if (streamKind == StreamKind::StreamKind_ROW_INDEX) { 
-        if (offset + length > indexEnd) { 
-          std::stringstream msg; 
-          msg << "Malformed RowIndex stream meta in stripe " << stripeIndex 
-              << ": streamOffset=" << offset << ", streamLength=" << length 
-              << ", stripeOffset=" << stripeInfo.offset() << ", stripeIndexLength=" 
-              << stripeInfo.indexlength(); 
-          throw ParseError(msg.str()); 
-        } 
-        std::unique_ptr<SeekableInputStream> pbStream = 
-          createDecompressor(contents->compression, 
-                  std::unique_ptr<SeekableInputStream> 
-                  (new SeekableFileInputStream(contents->stream.get(), 
-                                                offset, 
-                                                length, 
-                                                *contents->pool)), 
-                  contents->blockSize, 
-                  *(contents->pool)); 
- 
-        proto::RowIndex rowIndex; 
-        if (!rowIndex.ParseFromZeroCopyStream(pbStream.get())) { 
-          throw ParseError("Failed to parse RowIndex from stripe footer"); 
-        } 
-        int num_entries = rowIndex.entry_size(); 
-        size_t column = static_cast<size_t>(stream.column()); 
-        for (int j = 0; j < num_entries; j++) { 
-          const proto::RowIndexEntry& entry = rowIndex.entry(j); 
-          (*indexStats)[column].push_back(entry.statistics()); 
-        } 
-      } 
-      offset += length; 
-    } 
-  } 
- 
-  bool ReaderImpl::hasMetadataValue(const std::string& key) const { 
-    for(int i=0; i < footer->metadata_size(); ++i) { 
-      if (footer->metadata(i).name() == TString(key)) { 
-        return true; 
-      } 
-    } 
-    return false; 
-  } 
- 
-  const Type& ReaderImpl::getType() const { 
-    return *(contents->schema.get()); 
-  } 
- 
-  std::unique_ptr<StripeStatistics> 
-  ReaderImpl::getStripeStatistics(uint64_t stripeIndex) const { 
-    if (!isMetadataLoaded) { 
-      readMetadata(); 
-    } 
-    if (metadata.get() == nullptr) { 
-      throw std::logic_error("No stripe statistics in file"); 
-    } 
-    size_t num_cols = static_cast<size_t>( 
-                          metadata->stripestats( 
-                              static_cast<int>(stripeIndex)).colstats_size()); 
-    std::vector<std::vector<proto::ColumnStatistics> > indexStats(num_cols); 
- 
-    proto::StripeInformation currentStripeInfo = 
-        footer->stripes(static_cast<int>(stripeIndex)); 
-    proto::StripeFooter currentStripeFooter = 
-        getStripeFooter(currentStripeInfo, *contents.get()); 
- 
-    getRowIndexStatistics(currentStripeInfo, stripeIndex, currentStripeFooter, &indexStats); 
- 
-    const Timezone& writerTZ = 
-      currentStripeFooter.has_writertimezone() ? 
-        getTimezoneByName(currentStripeFooter.writertimezone()) : 
-        getLocalTimezone(); 
-    StatContext statContext(hasCorrectStatistics(), &writerTZ); 
-    return std::unique_ptr<StripeStatistics> 
-           (new StripeStatisticsImpl(metadata->stripestats(static_cast<int>(stripeIndex)), 
-                                                   indexStats, statContext)); 
-  } 
- 
-  std::unique_ptr<Statistics> ReaderImpl::getStatistics() const { 
-    StatContext statContext(hasCorrectStatistics()); 
-    return std::unique_ptr<Statistics> 
-      (new StatisticsImpl(*footer, statContext)); 
-  } 
- 
-  std::unique_ptr<ColumnStatistics> 
-  ReaderImpl::getColumnStatistics(uint32_t index) const { 
-    if (index >= static_cast<uint64_t>(footer->statistics_size())) { 
-      throw std::logic_error("column index out of range"); 
-    } 
-    proto::ColumnStatistics col = 
-      footer->statistics(static_cast<int32_t>(index)); 
- 
-    StatContext statContext(hasCorrectStatistics()); 
-    return std::unique_ptr<ColumnStatistics> (convertColumnStatistics(col, statContext)); 
-  } 
- 
-  void ReaderImpl::readMetadata() const { 
-    uint64_t metadataSize = contents->postscript->metadatalength(); 
-    uint64_t footerLength = contents->postscript->footerlength(); 
-    if (fileLength < metadataSize + footerLength + postscriptLength + 1) { 
-      std::stringstream msg; 
-      msg << "Invalid Metadata length: fileLength=" << fileLength 
-          << ", metadataLength=" << metadataSize << ", footerLength=" << footerLength 
-          << ", postscriptLength=" << postscriptLength; 
-      throw ParseError(msg.str()); 
-    } 
-    uint64_t metadataStart = fileLength - metadataSize - footerLength - postscriptLength - 1; 
-    if (metadataSize != 0) { 
-      std::unique_ptr<SeekableInputStream> pbStream = 
-        createDecompressor(contents->compression, 
-                           std::unique_ptr<SeekableInputStream> 
-                             (new SeekableFileInputStream(contents->stream.get(), 
-                                                          metadataStart, 
-                                                          metadataSize, 
-                                                          *contents->pool)), 
-                           contents->blockSize, 
-                           *contents->pool); 
-      metadata.reset(new proto::Metadata()); 
-      if (!metadata->ParseFromZeroCopyStream(pbStream.get())) { 
-        throw ParseError("Failed to parse the metadata"); 
-      } 
-    } 
-    isMetadataLoaded = true; 
-  } 
- 
-  bool ReaderImpl::hasCorrectStatistics() const { 
-    return !WriterVersionImpl::VERSION_HIVE_8732().compareGT(getWriterVersion()); 
-  } 
- 
-  void ReaderImpl::checkOrcVersion() { 
-    FileVersion version = getFormatVersion(); 
-    if (version != FileVersion(0, 11) && version != FileVersion(0, 12)) { 
-      *(options.getErrorStream()) 
-        << "Warning: ORC file " << contents->stream->getName() 
-        << " was written in an unknown format version " 
-        << version.toString() << "\n"; 
-    } 
-  } 
- 
-  std::unique_ptr<RowReader> ReaderImpl::createRowReader() const { 
-    RowReaderOptions defaultOpts; 
-    return createRowReader(defaultOpts); 
-  } 
- 
-  std::unique_ptr<RowReader> ReaderImpl::createRowReader( 
-           const RowReaderOptions& opts) const { 
-    return std::unique_ptr<RowReader>(new RowReaderImpl(contents, opts)); 
-  } 
- 
-  uint64_t maxStreamsForType(const proto::Type& type) { 
-    switch (static_cast<int64_t>(type.kind())) { 
-      case proto::Type_Kind_STRUCT: 
-        return 1; 
-      case proto::Type_Kind_INT: 
-      case proto::Type_Kind_LONG: 
-      case proto::Type_Kind_SHORT: 
-      case proto::Type_Kind_FLOAT: 
-      case proto::Type_Kind_DOUBLE: 
-      case proto::Type_Kind_BOOLEAN: 
-      case proto::Type_Kind_BYTE: 
-      case proto::Type_Kind_DATE: 
-      case proto::Type_Kind_LIST: 
-      case proto::Type_Kind_MAP: 
-      case proto::Type_Kind_UNION: 
-        return 2; 
-      case proto::Type_Kind_BINARY: 
-      case proto::Type_Kind_DECIMAL: 
-      case proto::Type_Kind_TIMESTAMP: 
-        return 3; 
-      case proto::Type_Kind_CHAR: 
-      case proto::Type_Kind_STRING: 
-      case proto::Type_Kind_VARCHAR: 
-        return 4; 
-      default: 
-          return 0; 
-      } 
-  } 
- 
-  uint64_t ReaderImpl::getMemoryUse(int stripeIx) { 
-    std::vector<bool> selectedColumns; 
-    selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), true); 
-    return getMemoryUse(stripeIx, selectedColumns); 
-  } 
- 
-  uint64_t ReaderImpl::getMemoryUseByFieldId(const std::list<uint64_t>& include, int stripeIx) { 
-    std::vector<bool> selectedColumns; 
-    selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), false); 
-    ColumnSelector column_selector(contents.get()); 
-    if (contents->schema->getKind() == STRUCT && include.begin() != include.end()) { 
-      for(std::list<uint64_t>::const_iterator field = include.begin(); 
-          field != include.end(); ++field) { 
-        column_selector.updateSelectedByFieldId(selectedColumns, *field); 
-      } 
-    } else { 
-      // default is to select all columns 
-      std::fill(selectedColumns.begin(), selectedColumns.end(), true); 
-    } 
-    column_selector.selectParents(selectedColumns, *contents->schema.get()); 
-    selectedColumns[0] = true; // column 0 is selected by default 
-    return getMemoryUse(stripeIx, selectedColumns); 
-  } 
- 
-  uint64_t ReaderImpl::getMemoryUseByName(const std::list<std::string>& names, int stripeIx) { 
-    std::vector<bool> selectedColumns; 
-    selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), false); 
-    ColumnSelector column_selector(contents.get()); 
-    if (contents->schema->getKind() == STRUCT && names.begin() != names.end()) { 
-      for(std::list<std::string>::const_iterator field = names.begin(); 
-          field != names.end(); ++field) { 
-        column_selector.updateSelectedByName(selectedColumns, *field); 
-      } 
-    } else { 
-      // default is to select all columns 
-      std::fill(selectedColumns.begin(), selectedColumns.end(), true); 
-    } 
-    column_selector.selectParents(selectedColumns, *contents->schema.get()); 
-    selectedColumns[0] = true; // column 0 is selected by default 
-    return getMemoryUse(stripeIx, selectedColumns); 
-  } 
- 
-  uint64_t ReaderImpl::getMemoryUseByTypeId(const std::list<uint64_t>& include, int stripeIx) { 
-    std::vector<bool> selectedColumns; 
-    selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), false); 
-    ColumnSelector column_selector(contents.get()); 
-    if (include.begin() != include.end()) { 
-      for(std::list<uint64_t>::const_iterator field = include.begin(); 
-          field != include.end(); ++field) { 
-        column_selector.updateSelectedByTypeId(selectedColumns, *field); 
-      } 
-    } else { 
-      // default is to select all columns 
-      std::fill(selectedColumns.begin(), selectedColumns.end(), true); 
-    } 
-    column_selector.selectParents(selectedColumns, *contents->schema.get()); 
-    selectedColumns[0] = true; // column 0 is selected by default 
-    return getMemoryUse(stripeIx, selectedColumns); 
-  } 
- 
-  uint64_t ReaderImpl::getMemoryUse(int stripeIx, std::vector<bool>& selectedColumns) { 
-    uint64_t maxDataLength = 0; 
- 
-    if (stripeIx >= 0 && stripeIx < footer->stripes_size()) { 
-      uint64_t stripe = footer->stripes(stripeIx).datalength(); 
-      if (maxDataLength < stripe) { 
-        maxDataLength = stripe; 
-      } 
-    } else { 
-      for (int i=0; i < footer->stripes_size(); i++) { 
-        uint64_t stripe = footer->stripes(i).datalength(); 
-        if (maxDataLength < stripe) { 
-          maxDataLength = stripe; 
-        } 
-      } 
-    } 
- 
-    bool hasStringColumn = false; 
-    uint64_t nSelectedStreams = 0; 
-    for (int i=0; !hasStringColumn && i < footer->types_size(); i++) { 
-      if (selectedColumns[static_cast<size_t>(i)]) { 
-        const proto::Type& type = footer->types(i); 
-        nSelectedStreams += maxStreamsForType(type) ; 
-        switch (static_cast<int64_t>(type.kind())) { 
-          case proto::Type_Kind_CHAR: 
-          case proto::Type_Kind_STRING: 
-          case proto::Type_Kind_VARCHAR: 
-          case proto::Type_Kind_BINARY: { 
-            hasStringColumn = true; 
-            break; 
-          } 
-          default: { 
-            break; 
-          } 
-        } 
-      } 
-    } 
- 
-    /* If a string column is read, use stripe datalength as a memory estimate 
-     * because we don't know the dictionary size. Multiply by 2 because 
-     * a string column requires two buffers: 
-     * in the input stream and in the seekable input stream. 
-     * If no string column is read, estimate from the number of streams. 
-     */ 
-    uint64_t memory = hasStringColumn ? 2 * maxDataLength : 
-        std::min(uint64_t(maxDataLength), 
-                 nSelectedStreams * contents->stream->getNaturalReadSize()); 
- 
-    // Do we need even more memory to read the footer or the metadata? 
-    if (memory < contents->postscript->footerlength() + DIRECTORY_SIZE_GUESS) { 
-      memory =  contents->postscript->footerlength() + DIRECTORY_SIZE_GUESS; 
-    } 
-    if (memory < contents->postscript->metadatalength()) { 
-      memory =  contents->postscript->metadatalength(); 
-    } 
- 
-    // Account for firstRowOfStripe. 
-    memory += static_cast<uint64_t>(footer->stripes_size()) * sizeof(uint64_t); 
- 
-    // Decompressors need buffers for each stream 
-    uint64_t decompressorMemory = 0; 
-    if (contents->compression != CompressionKind_NONE) { 
-      for (int i=0; i < footer->types_size(); i++) { 
-        if (selectedColumns[static_cast<size_t>(i)]) { 
-          const proto::Type& type = footer->types(i); 
-          decompressorMemory += maxStreamsForType(type) * contents->blockSize; 
-        } 
-      } 
-      if (contents->compression == CompressionKind_SNAPPY) { 
-        decompressorMemory *= 2;  // Snappy decompressor uses a second buffer 
-      } 
-    } 
- 
-    return memory + decompressorMemory ; 
-  } 
- 
-  void RowReaderImpl::startNextStripe() { 
-    reader.reset(); // ColumnReaders use lots of memory; free old memory first 
-    currentStripeInfo = footer->stripes(static_cast<int>(currentStripe)); 
-    uint64_t fileLength = contents->stream->getLength(); 
-    if (currentStripeInfo.offset() + currentStripeInfo.indexlength() + 
-        currentStripeInfo.datalength() + currentStripeInfo.footerlength() >= fileLength) { 
-      std::stringstream msg; 
-      msg << "Malformed StripeInformation at stripe index " << currentStripe << ": fileLength=" 
-          << fileLength << ", StripeInfo=(offset=" << currentStripeInfo.offset() << ", indexLength=" 
-          << currentStripeInfo.indexlength() << ", dataLength=" << currentStripeInfo.datalength() 
-          << ", footerLength=" << currentStripeInfo.footerlength() << ")"; 
-      throw ParseError(msg.str()); 
-    } 
-    currentStripeFooter = getStripeFooter(currentStripeInfo, *contents.get()); 
-    rowsInCurrentStripe = currentStripeInfo.numberofrows(); 
-    const Timezone& writerTimezone = 
-      currentStripeFooter.has_writertimezone() ? 
-        getTimezoneByName(currentStripeFooter.writertimezone()) : 
-        localTimezone; 
-    StripeStreamsImpl stripeStreams(*this, currentStripe, currentStripeInfo, 
-                                    currentStripeFooter, 
-                                    currentStripeInfo.offset(), 
-                                    *(contents->stream.get()), 
-                                    writerTimezone); 
-    reader = buildReader(*contents->schema.get(), stripeStreams); 
-  } 
- 
-  bool RowReaderImpl::next(ColumnVectorBatch& data) { 
-    if (currentStripe >= lastStripe) { 
-      data.numElements = 0; 
-      if (lastStripe > 0) { 
-        previousRow = firstRowOfStripe[lastStripe - 1] + 
-          footer->stripes(static_cast<int>(lastStripe - 1)).numberofrows(); 
-      } else { 
-        previousRow = 0; 
-      } 
-      return false; 
-    } 
-    if (currentRowInStripe == 0) { 
-      startNextStripe(); 
-    } 
-    uint64_t rowsToRead = 
-      std::min(static_cast<uint64_t>(data.capacity), 
-               rowsInCurrentStripe - currentRowInStripe); 
-    data.numElements = rowsToRead; 
-    if (enableEncodedBlock) { 
-      reader->nextEncoded(data, rowsToRead, nullptr); 
-    } 
-    else { 
-      reader->next(data, rowsToRead, nullptr); 
-    } 
-    // update row number 
-    previousRow = firstRowOfStripe[currentStripe] + currentRowInStripe; 
-    currentRowInStripe += rowsToRead; 
-    if (currentRowInStripe >= rowsInCurrentStripe) { 
-      currentStripe += 1; 
-      currentRowInStripe = 0; 
-    } 
-    return rowsToRead != 0; 
-  } 
- 
-  std::unique_ptr<ColumnVectorBatch> RowReaderImpl::createRowBatch 
-                                              (uint64_t capacity) const { 
-    return getSelectedType().createRowBatch(capacity, *contents->pool, enableEncodedBlock); 
-  } 
- 
-  void ensureOrcFooter(InputStream* stream, 
-                       DataBuffer<char> *buffer, 
-                       uint64_t postscriptLength) { 
- 
-    const std::string MAGIC("ORC"); 
-    const uint64_t magicLength = MAGIC.length(); 
-    const char * const bufferStart = buffer->data(); 
-    const uint64_t bufferLength = buffer->size(); 
- 
-    if (postscriptLength < magicLength || bufferLength < magicLength) { 
-      throw ParseError("Invalid ORC postscript length"); 
-    } 
-    const char* magicStart = bufferStart + bufferLength - 1 - magicLength; 
- 
-    // Look for the magic string at the end of the postscript. 
-    if (memcmp(magicStart, MAGIC.c_str(), magicLength) != 0) { 
-      // If there is no magic string at the end, check the beginning. 
-      // Only files written by Hive 0.11.0 don't have the tail ORC string. 
-      std::unique_ptr<char[]> frontBuffer( new char[magicLength] ); 
-      stream->read(frontBuffer.get(), magicLength, 0); 
-      bool foundMatch = memcmp(frontBuffer.get(), MAGIC.c_str(), magicLength) == 0; 
- 
-      if (!foundMatch) { 
-        throw ParseError("Not an ORC file"); 
-      } 
-    } 
-  } 
- 
-  /** 
-   * Read the file's postscript from the given buffer. 
-   * @param stream the file stream 
-   * @param buffer the buffer with the tail of the file. 
-   * @param postscriptSize the length of postscript in bytes 
-   */ 
-  std::unique_ptr<proto::PostScript> readPostscript(InputStream *stream, 
-                                                    DataBuffer<char> *buffer, 
-                                                    uint64_t postscriptSize) { 
-    char *ptr = buffer->data(); 
-    uint64_t readSize = buffer->size(); 
- 
-    ensureOrcFooter(stream, buffer, postscriptSize); 
- 
-    std::unique_ptr<proto::PostScript> postscript = 
-      std::unique_ptr<proto::PostScript>(new proto::PostScript()); 
-    if (readSize < 1 + postscriptSize) { 
-      std::stringstream msg; 
-      msg << "Invalid ORC postscript length: " << postscriptSize << ", file length = " 
-          << stream->getLength(); 
-      throw ParseError(msg.str()); 
-    } 
-    if (!postscript->ParseFromArray(ptr + readSize - 1 - postscriptSize, 
-                                   static_cast<int>(postscriptSize))) { 
-      throw ParseError("Failed to parse the postscript from " + 
-                       stream->getName()); 
-    } 
-    return REDUNDANT_MOVE(postscript); 
-  } 
- 
-  /** 
-   * Check that indices in the type tree are valid, so we won't crash 
-   * when we convert the proto::Types to TypeImpls. 
-   */ 
-  void checkProtoTypeIds(const proto::Footer &footer) { 
-    std::stringstream msg; 
-    int maxId = footer.types_size(); 
-    if (maxId <= 0) { 
-      throw ParseError("Footer is corrupt: no types found"); 
-    } 
-    for (int i = 0; i < maxId; ++i) { 
-      const proto::Type& type = footer.types(i); 
-      for (int j = 0; j < type.subtypes_size(); ++j) { 
-        int subTypeId = static_cast<int>(type.subtypes(j)); 
-        if (subTypeId <= i) { 
-          msg << "Footer is corrupt: malformed link from type " << i << " to " 
-              << subTypeId; 
-          throw ParseError(msg.str()); 
-        } 
-        if (subTypeId >= maxId) { 
-          msg << "Footer is corrupt: types(" << subTypeId << ") not exists"; 
-          throw ParseError(msg.str()); 
-        } 
-        if (j > 0 && static_cast<int>(type.subtypes(j - 1)) >= subTypeId) { 
-          msg << "Footer is corrupt: subType(" << (j-1) << ") >= subType(" << j 
-              << ") in types(" << i << "). (" << type.subtypes(j - 1) << " >= " 
-              << subTypeId << ")"; 
-          throw ParseError(msg.str()); 
-        } 
-      } 
-    } 
-  } 
- 
-  /** 
-   * Parse the footer from the given buffer. 
-   * @param stream the file's stream 
-   * @param buffer the buffer to parse the footer from 
-   * @param footerOffset the offset within the buffer that contains the footer 
-   * @param ps the file's postscript 
-   * @param memoryPool the memory pool to use 
-   */ 
-  std::unique_ptr<proto::Footer> readFooter(InputStream* stream, 
-                                            const DataBuffer<char> *buffer, 
-                                            uint64_t footerOffset, 
-                                            const proto::PostScript& ps, 
-                                            MemoryPool& memoryPool) { 
-    const char *footerPtr = buffer->data() + footerOffset; 
- 
-    std::unique_ptr<SeekableInputStream> pbStream = 
-      createDecompressor(convertCompressionKind(ps), 
-                         std::unique_ptr<SeekableInputStream> 
-                         (new SeekableArrayInputStream(footerPtr, 
-                                                       ps.footerlength())), 
-                         getCompressionBlockSize(ps), 
-                         memoryPool); 
- 
-    std::unique_ptr<proto::Footer> footer = 
-      std::unique_ptr<proto::Footer>(new proto::Footer()); 
-    if (!footer->ParseFromZeroCopyStream(pbStream.get())) { 
-      throw ParseError("Failed to parse the footer from " + 
-                       stream->getName()); 
-    } 
- 
-    checkProtoTypeIds(*footer); 
-    return REDUNDANT_MOVE(footer); 
-  } 
- 
-  std::unique_ptr<Reader> createReader(std::unique_ptr<InputStream> stream, 
-                                       const ReaderOptions& options) { 
-    std::shared_ptr<FileContents> contents = std::shared_ptr<FileContents>(new FileContents()); 
-    contents->pool = options.getMemoryPool(); 
-    contents->errorStream = options.getErrorStream(); 
-    std::string serializedFooter = options.getSerializedFileTail(); 
-    uint64_t fileLength; 
-    uint64_t postscriptLength; 
-    if (serializedFooter.length() != 0) { 
-      // Parse the file tail from the serialized one. 
-      proto::FileTail tail; 
-      if (!tail.ParseFromString(TString(serializedFooter))) { 
-        throw ParseError("Failed to parse the file tail from string"); 
-      } 
-      contents->postscript.reset(new proto::PostScript(tail.postscript())); 
-      contents->footer.reset(new proto::Footer(tail.footer())); 
-      fileLength = tail.filelength(); 
-      postscriptLength = tail.postscriptlength(); 
-    } else { 
-      // figure out the size of the file using the option or filesystem 
-      fileLength = std::min(options.getTailLocation(), 
-                            static_cast<uint64_t>(stream->getLength())); 
- 
-      //read last bytes into buffer to get PostScript 
-      uint64_t readSize = std::min(fileLength, DIRECTORY_SIZE_GUESS); 
-      if (readSize < 4) { 
-        throw ParseError("File size too small"); 
-      } 
-      std::unique_ptr<DataBuffer<char>> buffer( new DataBuffer<char>(*contents->pool, readSize) ); 
-      stream->read(buffer->data(), readSize, fileLength - readSize); 
- 
-      postscriptLength = buffer->data()[readSize - 1] & 0xff; 
-      contents->postscript = REDUNDANT_MOVE(readPostscript(stream.get(), 
-        buffer.get(), postscriptLength)); 
-      uint64_t footerSize = contents->postscript->footerlength(); 
-      uint64_t tailSize = 1 + postscriptLength + footerSize; 
-      if (tailSize >= fileLength) { 
-        std::stringstream msg; 
-        msg << "Invalid ORC tailSize=" << tailSize << ", fileLength=" << fileLength; 
-        throw ParseError(msg.str()); 
-      } 
-      uint64_t footerOffset; 
- 
-      if (tailSize > readSize) { 
-        buffer->resize(footerSize); 
-        stream->read(buffer->data(), footerSize, fileLength - tailSize); 
-        footerOffset = 0; 
-      } else { 
-        footerOffset = readSize - tailSize; 
-      } 
- 
-      contents->footer = REDUNDANT_MOVE(readFooter(stream.get(), buffer.get(), 
-        footerOffset, *contents->postscript,  *contents->pool)); 
-    } 
-    contents->stream = std::move(stream); 
-    return std::unique_ptr<Reader>(new ReaderImpl(std::move(contents), 
-                                                  options, 
-                                                  fileLength, 
-                                                  postscriptLength)); 
-  } 
- 
-  std::map<uint32_t, BloomFilterIndex> 
-  ReaderImpl::getBloomFilters(uint32_t stripeIndex, 
-                              const std::set<uint32_t>& included) const { 
-    std::map<uint32_t, BloomFilterIndex> ret; 
- 
-    // find stripe info 
-    if (stripeIndex >= static_cast<uint32_t>(footer->stripes_size())) { 
-      throw std::logic_error("Illegal stripe index: " + to_string(static_cast<int64_t>(stripeIndex))); 
-    } 
-    const proto::StripeInformation currentStripeInfo = 
-      footer->stripes(static_cast<int>(stripeIndex)); 
-    const proto::StripeFooter currentStripeFooter = 
-      getStripeFooter(currentStripeInfo, *contents); 
- 
-    // iterate stripe footer to get stream of bloomfilter 
-    uint64_t offset = static_cast<uint64_t>(currentStripeInfo.offset()); 
-    for (int i = 0; i < currentStripeFooter.streams_size(); i++) { 
-      const proto::Stream& stream = currentStripeFooter.streams(i); 
-      uint32_t column = static_cast<uint32_t>(stream.column()); 
-      uint64_t length = static_cast<uint64_t>(stream.length()); 
- 
-      // a bloom filter stream from a selected column is found 
-      if (stream.kind() == proto::Stream_Kind_BLOOM_FILTER_UTF8 && 
-          (included.empty() || included.find(column) != included.end())) { 
- 
-        std::unique_ptr<SeekableInputStream> pbStream = 
-          createDecompressor(contents->compression, 
-                             std::unique_ptr<SeekableInputStream> 
-                               (new SeekableFileInputStream(contents->stream.get(), 
-                                                            offset, 
-                                                            length, 
-                                                            *contents->pool)), 
-                             contents->blockSize, 
-                             *(contents->pool)); 
- 
-        proto::BloomFilterIndex pbBFIndex; 
-        if (!pbBFIndex.ParseFromZeroCopyStream(pbStream.get())) { 
-          throw ParseError("Failed to parse BloomFilterIndex"); 
-        } 
- 
-        BloomFilterIndex bfIndex; 
-        for (int j = 0; j < pbBFIndex.bloomfilter_size(); j++) { 
-	  std::unique_ptr<BloomFilter> entry = BloomFilterUTF8Utils::deserialize( 
-            stream.kind(), 
-            currentStripeFooter.columns(static_cast<int>(stream.column())), 
-            pbBFIndex.bloomfilter(j)); 
-          bfIndex.entries.push_back(std::shared_ptr<BloomFilter>(std::move(entry))); 
-        } 
- 
-        // add bloom filters to result for one column 
-        ret[column] = bfIndex; 
-      } 
- 
-      offset += length; 
-    } 
- 
-    return ret; 
-  } 
- 
-  RowReader::~RowReader() { 
-    // PASS 
-  } 
- 
-  Reader::~Reader() { 
-    // PASS 
-  } 
- 
-  InputStream::~InputStream() { 
-    // PASS 
-  }; 
- 
- 
- 
-}// namespace 
+  WriterVersion ReaderImpl::getWriterVersion() const {
+    if (!contents->postscript->has_writerversion()) {
+      return WriterVersion_ORIGINAL;
+    }
+    return static_cast<WriterVersion>(contents->postscript->writerversion());
+  }
+
+  uint64_t ReaderImpl::getContentLength() const {
+    return footer->contentlength();
+  }
+
+  uint64_t ReaderImpl::getStripeStatisticsLength() const {
+    return contents->postscript->metadatalength();
+  }
+
+  uint64_t ReaderImpl::getFileFooterLength() const {
+    return contents->postscript->footerlength();
+  }
+
+  uint64_t ReaderImpl::getFilePostscriptLength() const {
+    return postscriptLength;
+  }
+
+  uint64_t ReaderImpl::getFileLength() const {
+    return fileLength;
+  }
+
+  uint64_t ReaderImpl::getRowIndexStride() const {
+    return footer->rowindexstride();
+  }
+
+  const std::string& ReaderImpl::getStreamName() const {
+    return contents->stream->getName();
+  }
+
+  std::list<std::string> ReaderImpl::getMetadataKeys() const {
+    std::list<std::string> result;
+    for(int i=0; i < footer->metadata_size(); ++i) {
+      result.push_back(footer->metadata(i).name());
+    }
+    return result;
+  }
+
+  std::string ReaderImpl::getMetadataValue(const std::string& key) const {
+    for(int i=0; i < footer->metadata_size(); ++i) {
+      if (footer->metadata(i).name() == TString(key)) {
+        return footer->metadata(i).value();
+      }
+    }
+    throw std::range_error("key not found");
+  }
+
+  void ReaderImpl::getRowIndexStatistics(const proto::StripeInformation& stripeInfo,
+      uint64_t stripeIndex, const proto::StripeFooter& currentStripeFooter,
+      std::vector<std::vector<proto::ColumnStatistics> >* indexStats) const {
+    int num_streams = currentStripeFooter.streams_size();
+    uint64_t offset = stripeInfo.offset();
+    uint64_t indexEnd = stripeInfo.offset() + stripeInfo.indexlength();
+    for (int i = 0; i < num_streams; i++) {
+      const proto::Stream& stream = currentStripeFooter.streams(i);
+      StreamKind streamKind = static_cast<StreamKind>(stream.kind());
+      uint64_t length = static_cast<uint64_t>(stream.length());
+      if (streamKind == StreamKind::StreamKind_ROW_INDEX) {
+        if (offset + length > indexEnd) {
+          std::stringstream msg;
+          msg << "Malformed RowIndex stream meta in stripe " << stripeIndex
+              << ": streamOffset=" << offset << ", streamLength=" << length
+              << ", stripeOffset=" << stripeInfo.offset() << ", stripeIndexLength="
+              << stripeInfo.indexlength();
+          throw ParseError(msg.str());
+        }
+        std::unique_ptr<SeekableInputStream> pbStream =
+          createDecompressor(contents->compression,
+                  std::unique_ptr<SeekableInputStream>
+                  (new SeekableFileInputStream(contents->stream.get(),
+                                                offset,
+                                                length,
+                                                *contents->pool)),
+                  contents->blockSize,
+                  *(contents->pool));
+
+        proto::RowIndex rowIndex;
+        if (!rowIndex.ParseFromZeroCopyStream(pbStream.get())) {
+          throw ParseError("Failed to parse RowIndex from stripe footer");
+        }
+        int num_entries = rowIndex.entry_size();
+        size_t column = static_cast<size_t>(stream.column());
+        for (int j = 0; j < num_entries; j++) {
+          const proto::RowIndexEntry& entry = rowIndex.entry(j);
+          (*indexStats)[column].push_back(entry.statistics());
+        }
+      }
+      offset += length;
+    }
+  }
+
+  bool ReaderImpl::hasMetadataValue(const std::string& key) const {
+    for(int i=0; i < footer->metadata_size(); ++i) {
+      if (footer->metadata(i).name() == TString(key)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  const Type& ReaderImpl::getType() const {
+    return *(contents->schema.get());
+  }
+
+  std::unique_ptr<StripeStatistics>
+  ReaderImpl::getStripeStatistics(uint64_t stripeIndex) const {
+    if (!isMetadataLoaded) {
+      readMetadata();
+    }
+    if (metadata.get() == nullptr) {
+      throw std::logic_error("No stripe statistics in file");
+    }
+    size_t num_cols = static_cast<size_t>(
+                          metadata->stripestats(
+                              static_cast<int>(stripeIndex)).colstats_size());
+    std::vector<std::vector<proto::ColumnStatistics> > indexStats(num_cols);
+
+    proto::StripeInformation currentStripeInfo =
+        footer->stripes(static_cast<int>(stripeIndex));
+    proto::StripeFooter currentStripeFooter =
+        getStripeFooter(currentStripeInfo, *contents.get());
+
+    getRowIndexStatistics(currentStripeInfo, stripeIndex, currentStripeFooter, &indexStats);
+
+    const Timezone& writerTZ =
+      currentStripeFooter.has_writertimezone() ?
+        getTimezoneByName(currentStripeFooter.writertimezone()) :
+        getLocalTimezone();
+    StatContext statContext(hasCorrectStatistics(), &writerTZ);
+    return std::unique_ptr<StripeStatistics>
+           (new StripeStatisticsImpl(metadata->stripestats(static_cast<int>(stripeIndex)),
+                                                   indexStats, statContext));
+  }
+
+  std::unique_ptr<Statistics> ReaderImpl::getStatistics() const {
+    StatContext statContext(hasCorrectStatistics());
+    return std::unique_ptr<Statistics>
+      (new StatisticsImpl(*footer, statContext));
+  }
+
+  std::unique_ptr<ColumnStatistics>
+  ReaderImpl::getColumnStatistics(uint32_t index) const {
+    if (index >= static_cast<uint64_t>(footer->statistics_size())) {
+      throw std::logic_error("column index out of range");
+    }
+    proto::ColumnStatistics col =
+      footer->statistics(static_cast<int32_t>(index));
+
+    StatContext statContext(hasCorrectStatistics());
+    return std::unique_ptr<ColumnStatistics> (convertColumnStatistics(col, statContext));
+  }
+
+  void ReaderImpl::readMetadata() const {
+    uint64_t metadataSize = contents->postscript->metadatalength();
+    uint64_t footerLength = contents->postscript->footerlength();
+    if (fileLength < metadataSize + footerLength + postscriptLength + 1) {
+      std::stringstream msg;
+      msg << "Invalid Metadata length: fileLength=" << fileLength
+          << ", metadataLength=" << metadataSize << ", footerLength=" << footerLength
+          << ", postscriptLength=" << postscriptLength;
+      throw ParseError(msg.str());
+    }
+    uint64_t metadataStart = fileLength - metadataSize - footerLength - postscriptLength - 1;
+    if (metadataSize != 0) {
+      std::unique_ptr<SeekableInputStream> pbStream =
+        createDecompressor(contents->compression,
+                           std::unique_ptr<SeekableInputStream>
+                             (new SeekableFileInputStream(contents->stream.get(),
+                                                          metadataStart,
+                                                          metadataSize,
+                                                          *contents->pool)),
+                           contents->blockSize,
+                           *contents->pool);
+      metadata.reset(new proto::Metadata());
+      if (!metadata->ParseFromZeroCopyStream(pbStream.get())) {
+        throw ParseError("Failed to parse the metadata");
+      }
+    }
+    isMetadataLoaded = true;
+  }
+
+  bool ReaderImpl::hasCorrectStatistics() const {
+    return !WriterVersionImpl::VERSION_HIVE_8732().compareGT(getWriterVersion());
+  }
+
+  void ReaderImpl::checkOrcVersion() {
+    FileVersion version = getFormatVersion();
+    if (version != FileVersion(0, 11) && version != FileVersion(0, 12)) {
+      *(options.getErrorStream())
+        << "Warning: ORC file " << contents->stream->getName()
+        << " was written in an unknown format version "
+        << version.toString() << "\n";
+    }
+  }
+
+  std::unique_ptr<RowReader> ReaderImpl::createRowReader() const {
+    RowReaderOptions defaultOpts;
+    return createRowReader(defaultOpts);
+  }
+
+  std::unique_ptr<RowReader> ReaderImpl::createRowReader(
+           const RowReaderOptions& opts) const {
+    return std::unique_ptr<RowReader>(new RowReaderImpl(contents, opts));
+  }
+
+  uint64_t maxStreamsForType(const proto::Type& type) {
+    switch (static_cast<int64_t>(type.kind())) {
+      case proto::Type_Kind_STRUCT:
+        return 1;
+      case proto::Type_Kind_INT:
+      case proto::Type_Kind_LONG:
+      case proto::Type_Kind_SHORT:
+      case proto::Type_Kind_FLOAT:
+      case proto::Type_Kind_DOUBLE:
+      case proto::Type_Kind_BOOLEAN:
+      case proto::Type_Kind_BYTE:
+      case proto::Type_Kind_DATE:
+      case proto::Type_Kind_LIST:
+      case proto::Type_Kind_MAP:
+      case proto::Type_Kind_UNION:
+        return 2;
+      case proto::Type_Kind_BINARY:
+      case proto::Type_Kind_DECIMAL:
+      case proto::Type_Kind_TIMESTAMP:
+        return 3;
+      case proto::Type_Kind_CHAR:
+      case proto::Type_Kind_STRING:
+      case proto::Type_Kind_VARCHAR:
+        return 4;
+      default:
+          return 0;
+      }
+  }
+
+  uint64_t ReaderImpl::getMemoryUse(int stripeIx) {
+    std::vector<bool> selectedColumns;
+    selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), true);
+    return getMemoryUse(stripeIx, selectedColumns);
+  }
+
+  uint64_t ReaderImpl::getMemoryUseByFieldId(const std::list<uint64_t>& include, int stripeIx) {
+    std::vector<bool> selectedColumns;
+    selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), false);
+    ColumnSelector column_selector(contents.get());
+    if (contents->schema->getKind() == STRUCT && include.begin() != include.end()) {
+      for(std::list<uint64_t>::const_iterator field = include.begin();
+          field != include.end(); ++field) {
+        column_selector.updateSelectedByFieldId(selectedColumns, *field);
+      }
+    } else {
+      // default is to select all columns
+      std::fill(selectedColumns.begin(), selectedColumns.end(), true);
+    }
+    column_selector.selectParents(selectedColumns, *contents->schema.get());
+    selectedColumns[0] = true; // column 0 is selected by default
+    return getMemoryUse(stripeIx, selectedColumns);
+  }
+
+  uint64_t ReaderImpl::getMemoryUseByName(const std::list<std::string>& names, int stripeIx) {
+    std::vector<bool> selectedColumns;
+    selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), false);
+    ColumnSelector column_selector(contents.get());
+    if (contents->schema->getKind() == STRUCT && names.begin() != names.end()) {
+      for(std::list<std::string>::const_iterator field = names.begin();
+          field != names.end(); ++field) {
+        column_selector.updateSelectedByName(selectedColumns, *field);
+      }
+    } else {
+      // default is to select all columns
+      std::fill(selectedColumns.begin(), selectedColumns.end(), true);
+    }
+    column_selector.selectParents(selectedColumns, *contents->schema.get());
+    selectedColumns[0] = true; // column 0 is selected by default
+    return getMemoryUse(stripeIx, selectedColumns);
+  }
+
+  uint64_t ReaderImpl::getMemoryUseByTypeId(const std::list<uint64_t>& include, int stripeIx) {
+    std::vector<bool> selectedColumns;
+    selectedColumns.assign(static_cast<size_t>(contents->footer->types_size()), false);
+    ColumnSelector column_selector(contents.get());
+    if (include.begin() != include.end()) {
+      for(std::list<uint64_t>::const_iterator field = include.begin();
+          field != include.end(); ++field) {
+        column_selector.updateSelectedByTypeId(selectedColumns, *field);
+      }
+    } else {
+      // default is to select all columns
+      std::fill(selectedColumns.begin(), selectedColumns.end(), true);
+    }
+    column_selector.selectParents(selectedColumns, *contents->schema.get());
+    selectedColumns[0] = true; // column 0 is selected by default
+    return getMemoryUse(stripeIx, selectedColumns);
+  }
+
+  uint64_t ReaderImpl::getMemoryUse(int stripeIx, std::vector<bool>& selectedColumns) {
+    uint64_t maxDataLength = 0;
+
+    if (stripeIx >= 0 && stripeIx < footer->stripes_size()) {
+      uint64_t stripe = footer->stripes(stripeIx).datalength();
+      if (maxDataLength < stripe) {
+        maxDataLength = stripe;
+      }
+    } else {
+      for (int i=0; i < footer->stripes_size(); i++) {
+        uint64_t stripe = footer->stripes(i).datalength();
+        if (maxDataLength < stripe) {
+          maxDataLength = stripe;
+        }
+      }
+    }
+
+    bool hasStringColumn = false;
+    uint64_t nSelectedStreams = 0;
+    for (int i=0; !hasStringColumn && i < footer->types_size(); i++) {
+      if (selectedColumns[static_cast<size_t>(i)]) {
+        const proto::Type& type = footer->types(i);
+        nSelectedStreams += maxStreamsForType(type) ;
+        switch (static_cast<int64_t>(type.kind())) {
+          case proto::Type_Kind_CHAR:
+          case proto::Type_Kind_STRING:
+          case proto::Type_Kind_VARCHAR:
+          case proto::Type_Kind_BINARY: {
+            hasStringColumn = true;
+            break;
+          }
+          default: {
+            break;
+          }
+        }
+      }
+    }
+
+    /* If a string column is read, use stripe datalength as a memory estimate
+     * because we don't know the dictionary size. Multiply by 2 because
+     * a string column requires two buffers:
+     * in the input stream and in the seekable input stream.
+     * If no string column is read, estimate from the number of streams.
+     */
+    uint64_t memory = hasStringColumn ? 2 * maxDataLength :
+        std::min(uint64_t(maxDataLength),
+                 nSelectedStreams * contents->stream->getNaturalReadSize());
+
+    // Do we need even more memory to read the footer or the metadata?
+    if (memory < contents->postscript->footerlength() + DIRECTORY_SIZE_GUESS) {
+      memory =  contents->postscript->footerlength() + DIRECTORY_SIZE_GUESS;
+    }
+    if (memory < contents->postscript->metadatalength()) {
+      memory =  contents->postscript->metadatalength();
+    }
+
+    // Account for firstRowOfStripe.
+    memory += static_cast<uint64_t>(footer->stripes_size()) * sizeof(uint64_t);
+
+    // Decompressors need buffers for each stream
+    uint64_t decompressorMemory = 0;
+    if (contents->compression != CompressionKind_NONE) {
+      for (int i=0; i < footer->types_size(); i++) {
+        if (selectedColumns[static_cast<size_t>(i)]) {
+          const proto::Type& type = footer->types(i);
+          decompressorMemory += maxStreamsForType(type) * contents->blockSize;
+        }
+      }
+      if (contents->compression == CompressionKind_SNAPPY) {
+        decompressorMemory *= 2;  // Snappy decompressor uses a second buffer
+      }
+    }
+
+    return memory + decompressorMemory ;
+  }
+
+  void RowReaderImpl::startNextStripe() {
+    reader.reset(); // ColumnReaders use lots of memory; free old memory first
+    currentStripeInfo = footer->stripes(static_cast<int>(currentStripe));
+    uint64_t fileLength = contents->stream->getLength();
+    if (currentStripeInfo.offset() + currentStripeInfo.indexlength() +
+        currentStripeInfo.datalength() + currentStripeInfo.footerlength() >= fileLength) {
+      std::stringstream msg;
+      msg << "Malformed StripeInformation at stripe index " << currentStripe << ": fileLength="
+          << fileLength << ", StripeInfo=(offset=" << currentStripeInfo.offset() << ", indexLength="
+          << currentStripeInfo.indexlength() << ", dataLength=" << currentStripeInfo.datalength()
+          << ", footerLength=" << currentStripeInfo.footerlength() << ")";
+      throw ParseError(msg.str());
+    }
+    currentStripeFooter = getStripeFooter(currentStripeInfo, *contents.get());
+    rowsInCurrentStripe = currentStripeInfo.numberofrows();
+    const Timezone& writerTimezone =
+      currentStripeFooter.has_writertimezone() ?
+        getTimezoneByName(currentStripeFooter.writertimezone()) :
+        localTimezone;
+    StripeStreamsImpl stripeStreams(*this, currentStripe, currentStripeInfo,
+                                    currentStripeFooter,
+                                    currentStripeInfo.offset(),
+                                    *(contents->stream.get()),
+                                    writerTimezone);
+    reader = buildReader(*contents->schema.get(), stripeStreams);
+  }
+
+  bool RowReaderImpl::next(ColumnVectorBatch& data) {
+    if (currentStripe >= lastStripe) {
+      data.numElements = 0;
+      if (lastStripe > 0) {
+        previousRow = firstRowOfStripe[lastStripe - 1] +
+          footer->stripes(static_cast<int>(lastStripe - 1)).numberofrows();
+      } else {
+        previousRow = 0;
+      }
+      return false;
+    }
+    if (currentRowInStripe == 0) {
+      startNextStripe();
+    }
+    uint64_t rowsToRead =
+      std::min(static_cast<uint64_t>(data.capacity),
+               rowsInCurrentStripe - currentRowInStripe);
+    data.numElements = rowsToRead;
+    if (enableEncodedBlock) {
+      reader->nextEncoded(data, rowsToRead, nullptr);
+    }
+    else {
+      reader->next(data, rowsToRead, nullptr);
+    }
+    // update row number
+    previousRow = firstRowOfStripe[currentStripe] + currentRowInStripe;
+    currentRowInStripe += rowsToRead;
+    if (currentRowInStripe >= rowsInCurrentStripe) {
+      currentStripe += 1;
+      currentRowInStripe = 0;
+    }
+    return rowsToRead != 0;
+  }
+
+  std::unique_ptr<ColumnVectorBatch> RowReaderImpl::createRowBatch
+                                              (uint64_t capacity) const {
+    return getSelectedType().createRowBatch(capacity, *contents->pool, enableEncodedBlock);
+  }
+
+  void ensureOrcFooter(InputStream* stream,
+                       DataBuffer<char> *buffer,
+                       uint64_t postscriptLength) {
+
+    const std::string MAGIC("ORC");
+    const uint64_t magicLength = MAGIC.length();
+    const char * const bufferStart = buffer->data();
+    const uint64_t bufferLength = buffer->size();
+
+    if (postscriptLength < magicLength || bufferLength < magicLength) {
+      throw ParseError("Invalid ORC postscript length");
+    }
+    const char* magicStart = bufferStart + bufferLength - 1 - magicLength;
+
+    // Look for the magic string at the end of the postscript.
+    if (memcmp(magicStart, MAGIC.c_str(), magicLength) != 0) {
+      // If there is no magic string at the end, check the beginning.
+      // Only files written by Hive 0.11.0 don't have the tail ORC string.
+      std::unique_ptr<char[]> frontBuffer( new char[magicLength] );
+      stream->read(frontBuffer.get(), magicLength, 0);
+      bool foundMatch = memcmp(frontBuffer.get(), MAGIC.c_str(), magicLength) == 0;
+
+      if (!foundMatch) {
+        throw ParseError("Not an ORC file");
+      }
+    }
+  }
+
+  /**
+   * Read the file's postscript from the given buffer.
+   * @param stream the file stream
+   * @param buffer the buffer with the tail of the file.
+   * @param postscriptSize the length of postscript in bytes
+   */
+  std::unique_ptr<proto::PostScript> readPostscript(InputStream *stream,
+                                                    DataBuffer<char> *buffer,
+                                                    uint64_t postscriptSize) {
+    char *ptr = buffer->data();
+    uint64_t readSize = buffer->size();
+
+    ensureOrcFooter(stream, buffer, postscriptSize);
+
+    std::unique_ptr<proto::PostScript> postscript =
+      std::unique_ptr<proto::PostScript>(new proto::PostScript());
+    if (readSize < 1 + postscriptSize) {
+      std::stringstream msg;
+      msg << "Invalid ORC postscript length: " << postscriptSize << ", file length = "
+          << stream->getLength();
+      throw ParseError(msg.str());
+    }
+    if (!postscript->ParseFromArray(ptr + readSize - 1 - postscriptSize,
+                                   static_cast<int>(postscriptSize))) {
+      throw ParseError("Failed to parse the postscript from " +
+                       stream->getName());
+    }
+    return REDUNDANT_MOVE(postscript);
+  }
+
+  /**
+   * Check that indices in the type tree are valid, so we won't crash
+   * when we convert the proto::Types to TypeImpls.
+   */
+  void checkProtoTypeIds(const proto::Footer &footer) {
+    std::stringstream msg;
+    int maxId = footer.types_size();
+    if (maxId <= 0) {
+      throw ParseError("Footer is corrupt: no types found");
+    }
+    for (int i = 0; i < maxId; ++i) {
+      const proto::Type& type = footer.types(i);
+      for (int j = 0; j < type.subtypes_size(); ++j) {
+        int subTypeId = static_cast<int>(type.subtypes(j));
+        if (subTypeId <= i) {
+          msg << "Footer is corrupt: malformed link from type " << i << " to "
+              << subTypeId;
+          throw ParseError(msg.str());
+        }
+        if (subTypeId >= maxId) {
+          msg << "Footer is corrupt: types(" << subTypeId << ") not exists";
+          throw ParseError(msg.str());
+        }
+        if (j > 0 && static_cast<int>(type.subtypes(j - 1)) >= subTypeId) {
+          msg << "Footer is corrupt: subType(" << (j-1) << ") >= subType(" << j
+              << ") in types(" << i << "). (" << type.subtypes(j - 1) << " >= "
+              << subTypeId << ")";
+          throw ParseError(msg.str());
+        }
+      }
+    }
+  }
+
+  /**
+   * Parse the footer from the given buffer.
+   * @param stream the file's stream
+   * @param buffer the buffer to parse the footer from
+   * @param footerOffset the offset within the buffer that contains the footer
+   * @param ps the file's postscript
+   * @param memoryPool the memory pool to use
+   */
+  std::unique_ptr<proto::Footer> readFooter(InputStream* stream,
+                                            const DataBuffer<char> *buffer,
+                                            uint64_t footerOffset,
+                                            const proto::PostScript& ps,
+                                            MemoryPool& memoryPool) {
+    const char *footerPtr = buffer->data() + footerOffset;
+
+    std::unique_ptr<SeekableInputStream> pbStream =
+      createDecompressor(convertCompressionKind(ps),
+                         std::unique_ptr<SeekableInputStream>
+                         (new SeekableArrayInputStream(footerPtr,
+                                                       ps.footerlength())),
+                         getCompressionBlockSize(ps),
+                         memoryPool);
+
+    std::unique_ptr<proto::Footer> footer =
+      std::unique_ptr<proto::Footer>(new proto::Footer());
+    if (!footer->ParseFromZeroCopyStream(pbStream.get())) {
+      throw ParseError("Failed to parse the footer from " +
+                       stream->getName());
+    }
+
+    checkProtoTypeIds(*footer);
+    return REDUNDANT_MOVE(footer);
+  }
+
+  std::unique_ptr<Reader> createReader(std::unique_ptr<InputStream> stream,
+                                       const ReaderOptions& options) {
+    std::shared_ptr<FileContents> contents = std::shared_ptr<FileContents>(new FileContents());
+    contents->pool = options.getMemoryPool();
+    contents->errorStream = options.getErrorStream();
+    std::string serializedFooter = options.getSerializedFileTail();
+    uint64_t fileLength;
+    uint64_t postscriptLength;
+    if (serializedFooter.length() != 0) {
+      // Parse the file tail from the serialized one.
+      proto::FileTail tail;
+      if (!tail.ParseFromString(TString(serializedFooter))) {
+        throw ParseError("Failed to parse the file tail from string");
+      }
+      contents->postscript.reset(new proto::PostScript(tail.postscript()));
+      contents->footer.reset(new proto::Footer(tail.footer()));
+      fileLength = tail.filelength();
+      postscriptLength = tail.postscriptlength();
+    } else {
+      // figure out the size of the file using the option or filesystem
+      fileLength = std::min(options.getTailLocation(),
+                            static_cast<uint64_t>(stream->getLength()));
+
+      //read last bytes into buffer to get PostScript
+      uint64_t readSize = std::min(fileLength, DIRECTORY_SIZE_GUESS);
+      if (readSize < 4) {
+        throw ParseError("File size too small");
+      }
+      std::unique_ptr<DataBuffer<char>> buffer( new DataBuffer<char>(*contents->pool, readSize) );
+      stream->read(buffer->data(), readSize, fileLength - readSize);
+
+      postscriptLength = buffer->data()[readSize - 1] & 0xff;
+      contents->postscript = REDUNDANT_MOVE(readPostscript(stream.get(),
+        buffer.get(), postscriptLength));
+      uint64_t footerSize = contents->postscript->footerlength();
+      uint64_t tailSize = 1 + postscriptLength + footerSize;
+      if (tailSize >= fileLength) {
+        std::stringstream msg;
+        msg << "Invalid ORC tailSize=" << tailSize << ", fileLength=" << fileLength;
+        throw ParseError(msg.str());
+      }
+      uint64_t footerOffset;
+
+      if (tailSize > readSize) {
+        buffer->resize(footerSize);
+        stream->read(buffer->data(), footerSize, fileLength - tailSize);
+        footerOffset = 0;
+      } else {
+        footerOffset = readSize - tailSize;
+      }
+
+      contents->footer = REDUNDANT_MOVE(readFooter(stream.get(), buffer.get(),
+        footerOffset, *contents->postscript,  *contents->pool));
+    }
+    contents->stream = std::move(stream);
+    return std::unique_ptr<Reader>(new ReaderImpl(std::move(contents),
+                                                  options,
+                                                  fileLength,
+                                                  postscriptLength));
+  }
+
+  std::map<uint32_t, BloomFilterIndex>
+  ReaderImpl::getBloomFilters(uint32_t stripeIndex,
+                              const std::set<uint32_t>& included) const {
+    std::map<uint32_t, BloomFilterIndex> ret;
+
+    // find stripe info
+    if (stripeIndex >= static_cast<uint32_t>(footer->stripes_size())) {
+      throw std::logic_error("Illegal stripe index: " + to_string(static_cast<int64_t>(stripeIndex)));
+    }
+    const proto::StripeInformation currentStripeInfo =
+      footer->stripes(static_cast<int>(stripeIndex));
+    const proto::StripeFooter currentStripeFooter =
+      getStripeFooter(currentStripeInfo, *contents);
+
+    // iterate stripe footer to get stream of bloomfilter
+    uint64_t offset = static_cast<uint64_t>(currentStripeInfo.offset());
+    for (int i = 0; i < currentStripeFooter.streams_size(); i++) {
+      const proto::Stream& stream = currentStripeFooter.streams(i);
+      uint32_t column = static_cast<uint32_t>(stream.column());
+      uint64_t length = static_cast<uint64_t>(stream.length());
+
+      // a bloom filter stream from a selected column is found
+      if (stream.kind() == proto::Stream_Kind_BLOOM_FILTER_UTF8 &&
+          (included.empty() || included.find(column) != included.end())) {
+
+        std::unique_ptr<SeekableInputStream> pbStream =
+          createDecompressor(contents->compression,
+                             std::unique_ptr<SeekableInputStream>
+                               (new SeekableFileInputStream(contents->stream.get(),
+                                                            offset,
+                                                            length,
+                                                            *contents->pool)),
+                             contents->blockSize,
+                             *(contents->pool));
+
+        proto::BloomFilterIndex pbBFIndex;
+        if (!pbBFIndex.ParseFromZeroCopyStream(pbStream.get())) {
+          throw ParseError("Failed to parse BloomFilterIndex");
+        }
+
+        BloomFilterIndex bfIndex;
+        for (int j = 0; j < pbBFIndex.bloomfilter_size(); j++) {
+	  std::unique_ptr<BloomFilter> entry = BloomFilterUTF8Utils::deserialize(
+            stream.kind(),
+            currentStripeFooter.columns(static_cast<int>(stream.column())),
+            pbBFIndex.bloomfilter(j));
+          bfIndex.entries.push_back(std::shared_ptr<BloomFilter>(std::move(entry)));
+        }
+
+        // add bloom filters to result for one column
+        ret[column] = bfIndex;
+      }
+
+      offset += length;
+    }
+
+    return ret;
+  }
+
+  RowReader::~RowReader() {
+    // PASS
+  }
+
+  Reader::~Reader() {
+    // PASS
+  }
+
+  InputStream::~InputStream() {
+    // PASS
+  };
+
+
+
+}// namespace
diff --git a/contrib/libs/apache/orc/c++/src/Reader.hh b/contrib/libs/apache/orc/c++/src/Reader.hh
index b4ce7f6529..49e9d033d9 100644
--- a/contrib/libs/apache/orc/c++/src/Reader.hh
+++ b/contrib/libs/apache/orc/c++/src/Reader.hh
@@ -1,155 +1,155 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_READER_IMPL_HH 
-#define ORC_READER_IMPL_HH 
- 
-#include "orc/Int128.hh" 
-#include "orc/OrcFile.hh" 
-#include "orc/Reader.hh" 
- 
-#include "ColumnReader.hh" 
-#include "orc/Exceptions.hh" 
-#include "RLE.hh" 
-#include "TypeImpl.hh" 
- 
-namespace orc { 
- 
-  static const uint64_t DIRECTORY_SIZE_GUESS = 16 * 1024; 
- 
-  /** 
-  * WriterVersion Implementation 
-  */ 
-  class WriterVersionImpl { 
-  private: 
-    WriterVersion version; 
-  public: 
-    // Known Versions with issues resolved 
-    // The static method below is to fix global constructors Clang warning 
-    static const WriterVersionImpl& VERSION_HIVE_8732(); 
- 
-    WriterVersionImpl(WriterVersion ver) : version(ver) {} 
- 
-    bool compareGT(const WriterVersion other) const { 
-      return version > other; 
-    } 
-  }; 
- 
-  /** 
-  * State shared between Reader and Row Reader 
-  */ 
-  struct FileContents { 
-    std::unique_ptr<InputStream> stream; 
-    std::unique_ptr<proto::PostScript> postscript; 
-    std::unique_ptr<proto::Footer> footer; 
-    std::unique_ptr<Type> schema; 
-    uint64_t blockSize; 
-    CompressionKind compression; 
-    MemoryPool *pool; 
-    std::ostream *errorStream; 
-  }; 
- 
-  proto::StripeFooter getStripeFooter(const proto::StripeInformation& info, 
-                                      const FileContents& contents); 
- 
-  class ReaderImpl; 
- 
-  class ColumnSelector { 
-   private: 
-    std::map<std::string, uint64_t> nameIdMap; 
-    std::map<uint64_t, const Type*> idTypeMap; 
-    const FileContents* contents; 
-    std::vector<std::string> columns; 
- 
-    // build map from type name and id, id to Type 
-    void buildTypeNameIdMap(const Type* type); 
-    std::string toDotColumnPath(); 
- 
-   public: 
-    // Select a field by name 
-    void updateSelectedByName(std::vector<bool>& selectedColumns, const std::string& name); 
-    // Select a field by id 
-    void updateSelectedByFieldId(std::vector<bool>& selectedColumns, uint64_t fieldId); 
-    // Select a type by id 
-    void updateSelectedByTypeId(std::vector<bool>& selectedColumns, uint64_t typeId); 
- 
-    // Select all of the recursive children of the given type. 
-    void selectChildren(std::vector<bool>& selectedColumns, const Type& type); 
- 
-    // For each child of type, select it if one of its children 
-    // is selected. 
-    bool selectParents(std::vector<bool>& selectedColumns, const Type& type); 
-   /** 
-    * Constructor that selects columns. 
-    * @param contents of the file 
-    */ 
-    ColumnSelector(const FileContents* contents); 
- 
-    // Select the columns from the RowReaderoptions object 
-    void updateSelected(std::vector<bool>& selectedColumns, const RowReaderOptions& options); 
- 
-    // Select the columns from the Readeroptions object 
-    void updateSelected(std::vector<bool>& selectedColumns, const ReaderOptions& options); 
-  }; 
- 
- 
-  class RowReaderImpl : public RowReader { 
-  private: 
-    const Timezone& localTimezone; 
- 
-    // contents 
-    std::shared_ptr<FileContents> contents; 
-    const bool throwOnHive11DecimalOverflow; 
-    const int32_t forcedScaleOnHive11Decimal; 
- 
-    // inputs 
-    std::vector<bool> selectedColumns; 
- 
-    // footer 
-    proto::Footer* footer; 
-    DataBuffer<uint64_t> firstRowOfStripe; 
-    mutable std::unique_ptr<Type> selectedSchema; 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_READER_IMPL_HH
+#define ORC_READER_IMPL_HH
+
+#include "orc/Int128.hh"
+#include "orc/OrcFile.hh"
+#include "orc/Reader.hh"
+
+#include "ColumnReader.hh"
+#include "orc/Exceptions.hh"
+#include "RLE.hh"
+#include "TypeImpl.hh"
+
+namespace orc {
+
+  static const uint64_t DIRECTORY_SIZE_GUESS = 16 * 1024;
+
+  /**
+  * WriterVersion Implementation
+  */
+  class WriterVersionImpl {
+  private:
+    WriterVersion version;
+  public:
+    // Known Versions with issues resolved
+    // The static method below is to fix global constructors Clang warning
+    static const WriterVersionImpl& VERSION_HIVE_8732();
+
+    WriterVersionImpl(WriterVersion ver) : version(ver) {}
+
+    bool compareGT(const WriterVersion other) const {
+      return version > other;
+    }
+  };
+
+  /**
+  * State shared between Reader and Row Reader
+  */
+  struct FileContents {
+    std::unique_ptr<InputStream> stream;
+    std::unique_ptr<proto::PostScript> postscript;
+    std::unique_ptr<proto::Footer> footer;
+    std::unique_ptr<Type> schema;
+    uint64_t blockSize;
+    CompressionKind compression;
+    MemoryPool *pool;
+    std::ostream *errorStream;
+  };
+
+  proto::StripeFooter getStripeFooter(const proto::StripeInformation& info,
+                                      const FileContents& contents);
+
+  class ReaderImpl;
+
+  class ColumnSelector {
+   private:
+    std::map<std::string, uint64_t> nameIdMap;
+    std::map<uint64_t, const Type*> idTypeMap;
+    const FileContents* contents;
+    std::vector<std::string> columns;
+
+    // build map from type name and id, id to Type
+    void buildTypeNameIdMap(const Type* type);
+    std::string toDotColumnPath();
+
+   public:
+    // Select a field by name
+    void updateSelectedByName(std::vector<bool>& selectedColumns, const std::string& name);
+    // Select a field by id
+    void updateSelectedByFieldId(std::vector<bool>& selectedColumns, uint64_t fieldId);
+    // Select a type by id
+    void updateSelectedByTypeId(std::vector<bool>& selectedColumns, uint64_t typeId);
+
+    // Select all of the recursive children of the given type.
+    void selectChildren(std::vector<bool>& selectedColumns, const Type& type);
+
+    // For each child of type, select it if one of its children
+    // is selected.
+    bool selectParents(std::vector<bool>& selectedColumns, const Type& type);
+   /**
+    * Constructor that selects columns.
+    * @param contents of the file
+    */
+    ColumnSelector(const FileContents* contents);
+
+    // Select the columns from the RowReaderoptions object
+    void updateSelected(std::vector<bool>& selectedColumns, const RowReaderOptions& options);
+
+    // Select the columns from the Readeroptions object
+    void updateSelected(std::vector<bool>& selectedColumns, const ReaderOptions& options);
+  };
+
+
+  class RowReaderImpl : public RowReader {
+  private:
+    const Timezone& localTimezone;
+
+    // contents
+    std::shared_ptr<FileContents> contents;
+    const bool throwOnHive11DecimalOverflow;
+    const int32_t forcedScaleOnHive11Decimal;
+
+    // inputs
+    std::vector<bool> selectedColumns;
+
+    // footer
+    proto::Footer* footer;
+    DataBuffer<uint64_t> firstRowOfStripe;
+    mutable std::unique_ptr<Type> selectedSchema;
     bool skipBloomFilters;
- 
-    // reading state 
-    uint64_t previousRow; 
-    uint64_t firstStripe; 
-    uint64_t currentStripe; 
-    uint64_t lastStripe; // the stripe AFTER the last one 
-    uint64_t currentRowInStripe; 
-    uint64_t rowsInCurrentStripe; 
-    proto::StripeInformation currentStripeInfo; 
-    proto::StripeFooter currentStripeFooter; 
-    std::unique_ptr<ColumnReader> reader; 
- 
-    bool enableEncodedBlock; 
-    // internal methods 
-    void startNextStripe(); 
- 
-    // row index of current stripe with column id as the key 
-    std::unordered_map<uint64_t, proto::RowIndex> rowIndexes; 
- 
-    /** 
-     * Seek to the start of a row group in the current stripe 
-     * @param rowGroupEntryId the row group id to seek to 
-     */ 
-    void seekToRowGroup(uint32_t rowGroupEntryId); 
- 
+
+    // reading state
+    uint64_t previousRow;
+    uint64_t firstStripe;
+    uint64_t currentStripe;
+    uint64_t lastStripe; // the stripe AFTER the last one
+    uint64_t currentRowInStripe;
+    uint64_t rowsInCurrentStripe;
+    proto::StripeInformation currentStripeInfo;
+    proto::StripeFooter currentStripeFooter;
+    std::unique_ptr<ColumnReader> reader;
+
+    bool enableEncodedBlock;
+    // internal methods
+    void startNextStripe();
+
+    // row index of current stripe with column id as the key
+    std::unordered_map<uint64_t, proto::RowIndex> rowIndexes;
+
+    /**
+     * Seek to the start of a row group in the current stripe
+     * @param rowGroupEntryId the row group id to seek to
+     */
+    void seekToRowGroup(uint32_t rowGroupEntryId);
+
     /**
      * Check if the file has bad bloom filters. We will skip using them in the
      * following reads.
@@ -157,159 +157,159 @@ namespace orc {
      */
     bool hasBadBloomFilters();
 
-  public: 
-   /** 
-    * Constructor that lets the user specify additional options. 
-    * @param contents of the file 
-    * @param options options for reading 
-    */ 
-    RowReaderImpl(std::shared_ptr<FileContents> contents, 
-                  const RowReaderOptions& options); 
- 
-    // Select the columns from the options object 
-    void updateSelected(); 
-    const std::vector<bool> getSelectedColumns() const override; 
- 
-    const Type& getSelectedType() const override; 
- 
-    std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size 
-                                                      ) const override; 
- 
-    bool next(ColumnVectorBatch& data) override; 
- 
-    CompressionKind getCompression() const; 
- 
-    uint64_t getCompressionSize() const; 
- 
-    uint64_t getRowNumber() const override; 
- 
-    void seekToRow(uint64_t rowNumber) override; 
- 
-    const FileContents& getFileContents() const; 
-    bool getThrowOnHive11DecimalOverflow() const; 
-    int32_t getForcedScaleOnHive11Decimal() const; 
-  }; 
- 
-  class ReaderImpl : public Reader { 
-   private: 
-    // FileContents 
-    std::shared_ptr<FileContents> contents; 
- 
-    // inputs 
-    const ReaderOptions options; 
-    const uint64_t fileLength; 
-    const uint64_t postscriptLength; 
- 
-    // footer 
-    proto::Footer* footer; 
-    uint64_t numberOfStripes; 
-    uint64_t getMemoryUse(int stripeIx, std::vector<bool>& selectedColumns); 
- 
-    // internal methods 
-    void readMetadata() const; 
-    void checkOrcVersion(); 
-    void getRowIndexStatistics(const proto::StripeInformation& stripeInfo, uint64_t stripeIndex, 
-                               const proto::StripeFooter& currentStripeFooter, 
-                               std::vector<std::vector<proto::ColumnStatistics> >* indexStats) const; 
- 
-    // metadata 
-    mutable std::unique_ptr<proto::Metadata> metadata; 
-    mutable bool isMetadataLoaded; 
-   public: 
-    /** 
-     * Constructor that lets the user specify additional options. 
-     * @param contents of the file 
-     * @param options options for reading 
-     * @param fileLength the length of the file in bytes 
-     * @param postscriptLength the length of the postscript in bytes 
-     */ 
-    ReaderImpl(std::shared_ptr<FileContents> contents, 
-               const ReaderOptions& options, 
-               uint64_t fileLength, 
-               uint64_t postscriptLength); 
- 
-    const ReaderOptions& getReaderOptions() const; 
- 
-    CompressionKind getCompression() const override; 
- 
-    FileVersion getFormatVersion() const override; 
- 
-    WriterId getWriterId() const override; 
- 
-    uint32_t getWriterIdValue() const override; 
- 
+  public:
+   /**
+    * Constructor that lets the user specify additional options.
+    * @param contents of the file
+    * @param options options for reading
+    */
+    RowReaderImpl(std::shared_ptr<FileContents> contents,
+                  const RowReaderOptions& options);
+
+    // Select the columns from the options object
+    void updateSelected();
+    const std::vector<bool> getSelectedColumns() const override;
+
+    const Type& getSelectedType() const override;
+
+    std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size
+                                                      ) const override;
+
+    bool next(ColumnVectorBatch& data) override;
+
+    CompressionKind getCompression() const;
+
+    uint64_t getCompressionSize() const;
+
+    uint64_t getRowNumber() const override;
+
+    void seekToRow(uint64_t rowNumber) override;
+
+    const FileContents& getFileContents() const;
+    bool getThrowOnHive11DecimalOverflow() const;
+    int32_t getForcedScaleOnHive11Decimal() const;
+  };
+
+  class ReaderImpl : public Reader {
+   private:
+    // FileContents
+    std::shared_ptr<FileContents> contents;
+
+    // inputs
+    const ReaderOptions options;
+    const uint64_t fileLength;
+    const uint64_t postscriptLength;
+
+    // footer
+    proto::Footer* footer;
+    uint64_t numberOfStripes;
+    uint64_t getMemoryUse(int stripeIx, std::vector<bool>& selectedColumns);
+
+    // internal methods
+    void readMetadata() const;
+    void checkOrcVersion();
+    void getRowIndexStatistics(const proto::StripeInformation& stripeInfo, uint64_t stripeIndex,
+                               const proto::StripeFooter& currentStripeFooter,
+                               std::vector<std::vector<proto::ColumnStatistics> >* indexStats) const;
+
+    // metadata
+    mutable std::unique_ptr<proto::Metadata> metadata;
+    mutable bool isMetadataLoaded;
+   public:
+    /**
+     * Constructor that lets the user specify additional options.
+     * @param contents of the file
+     * @param options options for reading
+     * @param fileLength the length of the file in bytes
+     * @param postscriptLength the length of the postscript in bytes
+     */
+    ReaderImpl(std::shared_ptr<FileContents> contents,
+               const ReaderOptions& options,
+               uint64_t fileLength,
+               uint64_t postscriptLength);
+
+    const ReaderOptions& getReaderOptions() const;
+
+    CompressionKind getCompression() const override;
+
+    FileVersion getFormatVersion() const override;
+
+    WriterId getWriterId() const override;
+
+    uint32_t getWriterIdValue() const override;
+
     std::string getSoftwareVersion() const override;
 
-    WriterVersion getWriterVersion() const override; 
- 
-    uint64_t getNumberOfRows() const override; 
- 
-    uint64_t getRowIndexStride() const override; 
- 
-    std::list<std::string> getMetadataKeys() const override; 
- 
-    std::string getMetadataValue(const std::string& key) const override; 
- 
-    bool hasMetadataValue(const std::string& key) const override; 
- 
-    uint64_t getCompressionSize() const override; 
- 
-    uint64_t getNumberOfStripes() const override; 
- 
-    std::unique_ptr<StripeInformation> getStripe(uint64_t 
-                                                 ) const override; 
- 
-    uint64_t getNumberOfStripeStatistics() const override; 
- 
-    const std::string& getStreamName() const override; 
- 
-    std::unique_ptr<StripeStatistics> 
-    getStripeStatistics(uint64_t stripeIndex) const override; 
- 
-    std::unique_ptr<RowReader> createRowReader() const override; 
- 
-    std::unique_ptr<RowReader> createRowReader(const RowReaderOptions& options 
-                                               ) const override; 
- 
-    uint64_t getContentLength() const override; 
-    uint64_t getStripeStatisticsLength() const override; 
-    uint64_t getFileFooterLength() const override; 
-    uint64_t getFilePostscriptLength() const override; 
-    uint64_t getFileLength() const override; 
- 
-    std::unique_ptr<Statistics> getStatistics() const override; 
- 
-    std::unique_ptr<ColumnStatistics> getColumnStatistics(uint32_t columnId 
-                                                          ) const override; 
- 
-    std::string getSerializedFileTail() const override; 
- 
-    const Type& getType() const override; 
- 
-    bool hasCorrectStatistics() const override; 
- 
-    const proto::PostScript* getPostscript() const {return contents->postscript.get();} 
- 
-    uint64_t getBlockSize() const {return contents->blockSize;} 
- 
-    const proto::Footer* getFooter() const {return contents->footer.get();} 
- 
-    const Type* getSchema() const {return contents->schema.get();} 
- 
-    InputStream* getStream() const {return contents->stream.get();} 
- 
-    uint64_t getMemoryUse(int stripeIx = -1) override; 
- 
-    uint64_t getMemoryUseByFieldId(const std::list<uint64_t>& include, int stripeIx=-1) override; 
- 
-    uint64_t getMemoryUseByName(const std::list<std::string>& names, int stripeIx=-1) override; 
- 
-    uint64_t getMemoryUseByTypeId(const std::list<uint64_t>& include, int stripeIx=-1) override; 
- 
-    std::map<uint32_t, BloomFilterIndex> 
-    getBloomFilters(uint32_t stripeIndex, const std::set<uint32_t>& included) const override; 
-  }; 
- 
-}// namespace 
- 
-#endif 
+    WriterVersion getWriterVersion() const override;
+
+    uint64_t getNumberOfRows() const override;
+
+    uint64_t getRowIndexStride() const override;
+
+    std::list<std::string> getMetadataKeys() const override;
+
+    std::string getMetadataValue(const std::string& key) const override;
+
+    bool hasMetadataValue(const std::string& key) const override;
+
+    uint64_t getCompressionSize() const override;
+
+    uint64_t getNumberOfStripes() const override;
+
+    std::unique_ptr<StripeInformation> getStripe(uint64_t
+                                                 ) const override;
+
+    uint64_t getNumberOfStripeStatistics() const override;
+
+    const std::string& getStreamName() const override;
+
+    std::unique_ptr<StripeStatistics>
+    getStripeStatistics(uint64_t stripeIndex) const override;
+
+    std::unique_ptr<RowReader> createRowReader() const override;
+
+    std::unique_ptr<RowReader> createRowReader(const RowReaderOptions& options
+                                               ) const override;
+
+    uint64_t getContentLength() const override;
+    uint64_t getStripeStatisticsLength() const override;
+    uint64_t getFileFooterLength() const override;
+    uint64_t getFilePostscriptLength() const override;
+    uint64_t getFileLength() const override;
+
+    std::unique_ptr<Statistics> getStatistics() const override;
+
+    std::unique_ptr<ColumnStatistics> getColumnStatistics(uint32_t columnId
+                                                          ) const override;
+
+    std::string getSerializedFileTail() const override;
+
+    const Type& getType() const override;
+
+    bool hasCorrectStatistics() const override;
+
+    const proto::PostScript* getPostscript() const {return contents->postscript.get();}
+
+    uint64_t getBlockSize() const {return contents->blockSize;}
+
+    const proto::Footer* getFooter() const {return contents->footer.get();}
+
+    const Type* getSchema() const {return contents->schema.get();}
+
+    InputStream* getStream() const {return contents->stream.get();}
+
+    uint64_t getMemoryUse(int stripeIx = -1) override;
+
+    uint64_t getMemoryUseByFieldId(const std::list<uint64_t>& include, int stripeIx=-1) override;
+
+    uint64_t getMemoryUseByName(const std::list<std::string>& names, int stripeIx=-1) override;
+
+    uint64_t getMemoryUseByTypeId(const std::list<uint64_t>& include, int stripeIx=-1) override;
+
+    std::map<uint32_t, BloomFilterIndex>
+    getBloomFilters(uint32_t stripeIndex, const std::set<uint32_t>& included) const override;
+  };
+
+}// namespace
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/RleDecoderV2.cc b/contrib/libs/apache/orc/c++/src/RleDecoderV2.cc
index 2b7acb0bd5..c5c6f6a801 100644
--- a/contrib/libs/apache/orc/c++/src/RleDecoderV2.cc
+++ b/contrib/libs/apache/orc/c++/src/RleDecoderV2.cc
@@ -1,426 +1,426 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "Adaptor.hh" 
-#include "Compression.hh" 
-#include "RLEv2.hh" 
-#include "RLEV2Util.hh" 
- 
-namespace orc { 
- 
-int64_t RleDecoderV2::readLongBE(uint64_t bsz) { 
-  int64_t ret = 0, val; 
-  uint64_t n = bsz; 
-  while (n > 0) { 
-    n--; 
-    val = readByte(); 
-    ret |= (val << (n * 8)); 
-  } 
-  return ret; 
-} 
- 
-inline int64_t RleDecoderV2::readVslong() { 
-  return unZigZag(readVulong()); 
-} 
- 
-uint64_t RleDecoderV2::readVulong() { 
-  uint64_t ret = 0, b; 
-  uint64_t offset = 0; 
-  do { 
-    b = readByte(); 
-    ret |= (0x7f & b) << offset; 
-    offset += 7; 
-  } while (b >= 0x80); 
-  return ret; 
-} 
- 
-RleDecoderV2::RleDecoderV2(std::unique_ptr<SeekableInputStream> input, 
-                           bool _isSigned, MemoryPool& pool 
-                           ): inputStream(std::move(input)), 
-                              isSigned(_isSigned), 
-                              firstByte(0), 
-                              runLength(0), 
-                              runRead(0), 
-                              bufferStart(nullptr), 
-                              bufferEnd(bufferStart), 
-                              deltaBase(0), 
-                              byteSize(0), 
-                              firstValue(0), 
-                              prevValue(0), 
-                              bitSize(0), 
-                              bitsLeft(0), 
-                              curByte(0), 
-                              patchBitSize(0), 
-                              unpackedIdx(0), 
-                              patchIdx(0), 
-                              base(0), 
-                              curGap(0), 
-                              curPatch(0), 
-                              patchMask(0), 
-                              actualGap(0), 
-                              unpacked(pool, 0), 
-                              unpackedPatch(pool, 0) { 
-  // PASS 
-} 
- 
-void RleDecoderV2::seek(PositionProvider& location) { 
-  // move the input stream 
-  inputStream->seek(location); 
-  // clear state 
-  bufferEnd = bufferStart = nullptr; 
-  runRead = runLength = 0; 
-  // skip ahead the given number of records 
-  skip(location.next()); 
-} 
- 
-void RleDecoderV2::skip(uint64_t numValues) { 
-  // simple for now, until perf tests indicate something encoding specific is 
-  // needed 
-  const uint64_t N = 64; 
-  int64_t dummy[N]; 
- 
-  while (numValues) { 
-    uint64_t nRead = std::min(N, numValues); 
-    next(dummy, nRead, nullptr); 
-    numValues -= nRead; 
-  } 
-} 
- 
-void RleDecoderV2::next(int64_t* const data, 
-                        const uint64_t numValues, 
-                        const char* const notNull) { 
-  uint64_t nRead = 0; 
- 
-  while (nRead < numValues) { 
-    // Skip any nulls before attempting to read first byte. 
-    while (notNull && !notNull[nRead]) { 
-      if (++nRead == numValues) { 
-        return; // ended with null values 
-      } 
-    } 
- 
-    if (runRead == runLength) { 
-      resetRun(); 
-      firstByte = readByte(); 
-    } 
- 
-    uint64_t offset = nRead, length = numValues - nRead; 
- 
-    EncodingType enc = static_cast<EncodingType> 
-        ((firstByte >> 6) & 0x03); 
-    switch(static_cast<int64_t>(enc)) { 
-    case SHORT_REPEAT: 
-      nRead += nextShortRepeats(data, offset, length, notNull); 
-      break; 
-    case DIRECT: 
-      nRead += nextDirect(data, offset, length, notNull); 
-      break; 
-    case PATCHED_BASE: 
-      nRead += nextPatched(data, offset, length, notNull); 
-      break; 
-    case DELTA: 
-      nRead += nextDelta(data, offset, length, notNull); 
-      break; 
-    default: 
-      throw ParseError("unknown encoding"); 
-    } 
-  } 
-} 
- 
-uint64_t RleDecoderV2::nextShortRepeats(int64_t* const data, 
-                                        uint64_t offset, 
-                                        uint64_t numValues, 
-                                        const char* const notNull) { 
-  if (runRead == runLength) { 
-    // extract the number of fixed bytes 
-    byteSize = (firstByte >> 3) & 0x07; 
-    byteSize += 1; 
- 
-    runLength = firstByte & 0x07; 
-    // run lengths values are stored only after MIN_REPEAT value is met 
-    runLength += MIN_REPEAT; 
-    runRead = 0; 
- 
-    // read the repeated value which is store using fixed bytes 
-    firstValue = readLongBE(byteSize); 
- 
-    if (isSigned) { 
-      firstValue = unZigZag(static_cast<uint64_t>(firstValue)); 
-    } 
-  } 
- 
-  uint64_t nRead = std::min(runLength - runRead, numValues); 
- 
-  if (notNull) { 
-    for(uint64_t pos = offset; pos < offset + nRead; ++pos) { 
-      if (notNull[pos]) { 
-        data[pos] = firstValue; 
-        ++runRead; 
-      } 
-    } 
-  } else { 
-    for(uint64_t pos = offset; pos < offset + nRead; ++pos) { 
-      data[pos] = firstValue; 
-      ++runRead; 
-    } 
-  } 
- 
-  return nRead; 
-} 
- 
-uint64_t RleDecoderV2::nextDirect(int64_t* const data, 
-                                  uint64_t offset, 
-                                  uint64_t numValues, 
-                                  const char* const notNull) { 
-  if (runRead == runLength) { 
-    // extract the number of fixed bits 
-    unsigned char fbo = (firstByte >> 1) & 0x1f; 
-    bitSize = decodeBitWidth(fbo); 
- 
-    // extract the run length 
-    runLength = static_cast<uint64_t>(firstByte & 0x01) << 8; 
-    runLength |= readByte(); 
-    // runs are one off 
-    runLength += 1; 
-    runRead = 0; 
-  } 
- 
-  uint64_t nRead = std::min(runLength - runRead, numValues); 
- 
-  runRead += readLongs(data, offset, nRead, bitSize, notNull); 
- 
-  if (isSigned) { 
-    if (notNull) { 
-      for (uint64_t pos = offset; pos < offset + nRead; ++pos) { 
-        if (notNull[pos]) { 
-          data[pos] = unZigZag(static_cast<uint64_t>(data[pos])); 
-        } 
-      } 
-    } else { 
-      for (uint64_t pos = offset; pos < offset + nRead; ++pos) { 
-        data[pos] = unZigZag(static_cast<uint64_t>(data[pos])); 
-      } 
-    } 
-  } 
- 
-  return nRead; 
-} 
- 
-uint64_t RleDecoderV2::nextPatched(int64_t* const data, 
-                                   uint64_t offset, 
-                                   uint64_t numValues, 
-                                   const char* const notNull) { 
-  if (runRead == runLength) { 
-    // extract the number of fixed bits 
-    unsigned char fbo = (firstByte >> 1) & 0x1f; 
-    bitSize = decodeBitWidth(fbo); 
- 
-    // extract the run length 
-    runLength = static_cast<uint64_t>(firstByte & 0x01) << 8; 
-    runLength |= readByte(); 
-    // runs are one off 
-    runLength += 1; 
-    runRead = 0; 
- 
-    // extract the number of bytes occupied by base 
-    uint64_t thirdByte = readByte(); 
-    byteSize = (thirdByte >> 5) & 0x07; 
-    // base width is one off 
-    byteSize += 1; 
- 
-    // extract patch width 
-    uint32_t pwo = thirdByte & 0x1f; 
-    patchBitSize = decodeBitWidth(pwo); 
- 
-    // read fourth byte and extract patch gap width 
-    uint64_t fourthByte = readByte(); 
-    uint32_t pgw = (fourthByte >> 5) & 0x07; 
-    // patch gap width is one off 
-    pgw += 1; 
- 
-    // extract the length of the patch list 
-    size_t pl = fourthByte & 0x1f; 
-    if (pl == 0) { 
-      throw ParseError("Corrupt PATCHED_BASE encoded data (pl==0)!"); 
-    } 
- 
-    // read the next base width number of bytes to extract base value 
-    base = readLongBE(byteSize); 
-    int64_t mask = (static_cast<int64_t>(1) << ((byteSize * 8) - 1)); 
-    // if mask of base value is 1 then base is negative value else positive 
-    if ((base & mask) != 0) { 
-      base = base & ~mask; 
-      base = -base; 
-    } 
- 
-    // TODO: something more efficient than resize 
-    unpacked.resize(runLength); 
-    unpackedIdx = 0; 
-    readLongs(unpacked.data(), 0, runLength, bitSize); 
-    // any remaining bits are thrown out 
-    resetReadLongs(); 
- 
-    // TODO: something more efficient than resize 
-    unpackedPatch.resize(pl); 
-    patchIdx = 0; 
-    // TODO: Skip corrupt? 
-    //    if ((patchBitSize + pgw) > 64 && !skipCorrupt) { 
-    if ((patchBitSize + pgw) > 64) { 
-      throw ParseError("Corrupt PATCHED_BASE encoded data " 
-                       "(patchBitSize + pgw > 64)!"); 
-    } 
-    uint32_t cfb = getClosestFixedBits(patchBitSize + pgw); 
-    readLongs(unpackedPatch.data(), 0, pl, cfb); 
-    // any remaining bits are thrown out 
-    resetReadLongs(); 
- 
-    // apply the patch directly when decoding the packed data 
-    patchMask = ((static_cast<int64_t>(1) << patchBitSize) - 1); 
- 
-    adjustGapAndPatch(); 
-  } 
- 
-  uint64_t nRead = std::min(runLength - runRead, numValues); 
- 
-  for(uint64_t pos = offset; pos < offset + nRead; ++pos) { 
-    // skip null positions 
-    if (notNull && !notNull[pos]) { 
-      continue; 
-    } 
-    if (static_cast<int64_t>(unpackedIdx) != actualGap) { 
-      // no patching required. add base to unpacked value to get final value 
-      data[pos] = base + unpacked[unpackedIdx]; 
-    } else { 
-      // extract the patch value 
-      int64_t patchedVal = unpacked[unpackedIdx] | (curPatch << bitSize); 
- 
-      // add base to patched value 
-      data[pos] = base + patchedVal; 
- 
-      // increment the patch to point to next entry in patch list 
-      ++patchIdx; 
- 
-      if (patchIdx < unpackedPatch.size()) { 
-        adjustGapAndPatch(); 
- 
-        // next gap is relative to the current gap 
-        actualGap += unpackedIdx; 
-      } 
-    } 
- 
-    ++runRead; 
-    ++unpackedIdx; 
-  } 
- 
-  return nRead; 
-} 
- 
-uint64_t RleDecoderV2::nextDelta(int64_t* const data, 
-                                 uint64_t offset, 
-                                 uint64_t numValues, 
-                                 const char* const notNull) { 
-  if (runRead == runLength) { 
-    // extract the number of fixed bits 
-    unsigned char fbo = (firstByte >> 1) & 0x1f; 
-    if (fbo != 0) { 
-      bitSize = decodeBitWidth(fbo); 
-    } else { 
-      bitSize = 0; 
-    } 
- 
-    // extract the run length 
-    runLength = static_cast<uint64_t>(firstByte & 0x01) << 8; 
-    runLength |= readByte(); 
-    ++runLength; // account for first value 
-    runRead = deltaBase = 0; 
- 
-    // read the first value stored as vint 
-    if (isSigned) { 
-      firstValue = static_cast<int64_t>(readVslong()); 
-    } else { 
-      firstValue = static_cast<int64_t>(readVulong()); 
-    } 
- 
-    prevValue = firstValue; 
- 
-    // read the fixed delta value stored as vint (deltas can be negative even 
-    // if all number are positive) 
-    deltaBase = static_cast<int64_t>(readVslong()); 
-  } 
- 
-  uint64_t nRead = std::min(runLength - runRead, numValues); 
- 
-  uint64_t pos = offset; 
-  for ( ; pos < offset + nRead; ++pos) { 
-    // skip null positions 
-    if (!notNull || notNull[pos]) break; 
-  } 
-  if (runRead == 0 && pos < offset + nRead) { 
-    data[pos++] = firstValue; 
-    ++runRead; 
-  } 
- 
-  if (bitSize == 0) { 
-    // add fixed deltas to adjacent values 
-    for ( ; pos < offset + nRead; ++pos) { 
-      // skip null positions 
-      if (notNull && !notNull[pos]) { 
-        continue; 
-      } 
-      prevValue = data[pos] = prevValue + deltaBase; 
-      ++runRead; 
-    } 
-  } else { 
-    for ( ; pos < offset + nRead; ++pos) { 
-      // skip null positions 
-      if (!notNull || notNull[pos]) break; 
-    } 
-    if (runRead < 2 && pos < offset + nRead) { 
-      // add delta base and first value 
-      prevValue = data[pos++] = firstValue + deltaBase; 
-      ++runRead; 
-    } 
- 
-    // write the unpacked values, add it to previous value and store final 
-    // value to result buffer. if the delta base value is negative then it 
-    // is a decreasing sequence else an increasing sequence 
-    uint64_t remaining = (offset + nRead) - pos; 
-    runRead += readLongs(data, pos, remaining, bitSize, notNull); 
- 
-    if (deltaBase < 0) { 
-      for ( ; pos < offset + nRead; ++pos) { 
-        // skip null positions 
-        if (notNull && !notNull[pos]) { 
-          continue; 
-        } 
-        prevValue = data[pos] = prevValue - data[pos]; 
-      } 
-    } else { 
-      for ( ; pos < offset + nRead; ++pos) { 
-        // skip null positions 
-        if (notNull && !notNull[pos]) { 
-          continue; 
-        } 
-        prevValue = data[pos] = prevValue + data[pos]; 
-      } 
-    } 
-  } 
-  return nRead; 
-} 
- 
-}  // namespace orc 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "Compression.hh"
+#include "RLEv2.hh"
+#include "RLEV2Util.hh"
+
+namespace orc {
+
+int64_t RleDecoderV2::readLongBE(uint64_t bsz) {
+  int64_t ret = 0, val;
+  uint64_t n = bsz;
+  while (n > 0) {
+    n--;
+    val = readByte();
+    ret |= (val << (n * 8));
+  }
+  return ret;
+}
+
+inline int64_t RleDecoderV2::readVslong() {
+  return unZigZag(readVulong());
+}
+
+uint64_t RleDecoderV2::readVulong() {
+  uint64_t ret = 0, b;
+  uint64_t offset = 0;
+  do {
+    b = readByte();
+    ret |= (0x7f & b) << offset;
+    offset += 7;
+  } while (b >= 0x80);
+  return ret;
+}
+
+RleDecoderV2::RleDecoderV2(std::unique_ptr<SeekableInputStream> input,
+                           bool _isSigned, MemoryPool& pool
+                           ): inputStream(std::move(input)),
+                              isSigned(_isSigned),
+                              firstByte(0),
+                              runLength(0),
+                              runRead(0),
+                              bufferStart(nullptr),
+                              bufferEnd(bufferStart),
+                              deltaBase(0),
+                              byteSize(0),
+                              firstValue(0),
+                              prevValue(0),
+                              bitSize(0),
+                              bitsLeft(0),
+                              curByte(0),
+                              patchBitSize(0),
+                              unpackedIdx(0),
+                              patchIdx(0),
+                              base(0),
+                              curGap(0),
+                              curPatch(0),
+                              patchMask(0),
+                              actualGap(0),
+                              unpacked(pool, 0),
+                              unpackedPatch(pool, 0) {
+  // PASS
+}
+
+void RleDecoderV2::seek(PositionProvider& location) {
+  // move the input stream
+  inputStream->seek(location);
+  // clear state
+  bufferEnd = bufferStart = nullptr;
+  runRead = runLength = 0;
+  // skip ahead the given number of records
+  skip(location.next());
+}
+
+void RleDecoderV2::skip(uint64_t numValues) {
+  // simple for now, until perf tests indicate something encoding specific is
+  // needed
+  const uint64_t N = 64;
+  int64_t dummy[N];
+
+  while (numValues) {
+    uint64_t nRead = std::min(N, numValues);
+    next(dummy, nRead, nullptr);
+    numValues -= nRead;
+  }
+}
+
+void RleDecoderV2::next(int64_t* const data,
+                        const uint64_t numValues,
+                        const char* const notNull) {
+  uint64_t nRead = 0;
+
+  while (nRead < numValues) {
+    // Skip any nulls before attempting to read first byte.
+    while (notNull && !notNull[nRead]) {
+      if (++nRead == numValues) {
+        return; // ended with null values
+      }
+    }
+
+    if (runRead == runLength) {
+      resetRun();
+      firstByte = readByte();
+    }
+
+    uint64_t offset = nRead, length = numValues - nRead;
+
+    EncodingType enc = static_cast<EncodingType>
+        ((firstByte >> 6) & 0x03);
+    switch(static_cast<int64_t>(enc)) {
+    case SHORT_REPEAT:
+      nRead += nextShortRepeats(data, offset, length, notNull);
+      break;
+    case DIRECT:
+      nRead += nextDirect(data, offset, length, notNull);
+      break;
+    case PATCHED_BASE:
+      nRead += nextPatched(data, offset, length, notNull);
+      break;
+    case DELTA:
+      nRead += nextDelta(data, offset, length, notNull);
+      break;
+    default:
+      throw ParseError("unknown encoding");
+    }
+  }
+}
+
+uint64_t RleDecoderV2::nextShortRepeats(int64_t* const data,
+                                        uint64_t offset,
+                                        uint64_t numValues,
+                                        const char* const notNull) {
+  if (runRead == runLength) {
+    // extract the number of fixed bytes
+    byteSize = (firstByte >> 3) & 0x07;
+    byteSize += 1;
+
+    runLength = firstByte & 0x07;
+    // run lengths values are stored only after MIN_REPEAT value is met
+    runLength += MIN_REPEAT;
+    runRead = 0;
+
+    // read the repeated value which is store using fixed bytes
+    firstValue = readLongBE(byteSize);
+
+    if (isSigned) {
+      firstValue = unZigZag(static_cast<uint64_t>(firstValue));
+    }
+  }
+
+  uint64_t nRead = std::min(runLength - runRead, numValues);
+
+  if (notNull) {
+    for(uint64_t pos = offset; pos < offset + nRead; ++pos) {
+      if (notNull[pos]) {
+        data[pos] = firstValue;
+        ++runRead;
+      }
+    }
+  } else {
+    for(uint64_t pos = offset; pos < offset + nRead; ++pos) {
+      data[pos] = firstValue;
+      ++runRead;
+    }
+  }
+
+  return nRead;
+}
+
+uint64_t RleDecoderV2::nextDirect(int64_t* const data,
+                                  uint64_t offset,
+                                  uint64_t numValues,
+                                  const char* const notNull) {
+  if (runRead == runLength) {
+    // extract the number of fixed bits
+    unsigned char fbo = (firstByte >> 1) & 0x1f;
+    bitSize = decodeBitWidth(fbo);
+
+    // extract the run length
+    runLength = static_cast<uint64_t>(firstByte & 0x01) << 8;
+    runLength |= readByte();
+    // runs are one off
+    runLength += 1;
+    runRead = 0;
+  }
+
+  uint64_t nRead = std::min(runLength - runRead, numValues);
+
+  runRead += readLongs(data, offset, nRead, bitSize, notNull);
+
+  if (isSigned) {
+    if (notNull) {
+      for (uint64_t pos = offset; pos < offset + nRead; ++pos) {
+        if (notNull[pos]) {
+          data[pos] = unZigZag(static_cast<uint64_t>(data[pos]));
+        }
+      }
+    } else {
+      for (uint64_t pos = offset; pos < offset + nRead; ++pos) {
+        data[pos] = unZigZag(static_cast<uint64_t>(data[pos]));
+      }
+    }
+  }
+
+  return nRead;
+}
+
+uint64_t RleDecoderV2::nextPatched(int64_t* const data,
+                                   uint64_t offset,
+                                   uint64_t numValues,
+                                   const char* const notNull) {
+  if (runRead == runLength) {
+    // extract the number of fixed bits
+    unsigned char fbo = (firstByte >> 1) & 0x1f;
+    bitSize = decodeBitWidth(fbo);
+
+    // extract the run length
+    runLength = static_cast<uint64_t>(firstByte & 0x01) << 8;
+    runLength |= readByte();
+    // runs are one off
+    runLength += 1;
+    runRead = 0;
+
+    // extract the number of bytes occupied by base
+    uint64_t thirdByte = readByte();
+    byteSize = (thirdByte >> 5) & 0x07;
+    // base width is one off
+    byteSize += 1;
+
+    // extract patch width
+    uint32_t pwo = thirdByte & 0x1f;
+    patchBitSize = decodeBitWidth(pwo);
+
+    // read fourth byte and extract patch gap width
+    uint64_t fourthByte = readByte();
+    uint32_t pgw = (fourthByte >> 5) & 0x07;
+    // patch gap width is one off
+    pgw += 1;
+
+    // extract the length of the patch list
+    size_t pl = fourthByte & 0x1f;
+    if (pl == 0) {
+      throw ParseError("Corrupt PATCHED_BASE encoded data (pl==0)!");
+    }
+
+    // read the next base width number of bytes to extract base value
+    base = readLongBE(byteSize);
+    int64_t mask = (static_cast<int64_t>(1) << ((byteSize * 8) - 1));
+    // if mask of base value is 1 then base is negative value else positive
+    if ((base & mask) != 0) {
+      base = base & ~mask;
+      base = -base;
+    }
+
+    // TODO: something more efficient than resize
+    unpacked.resize(runLength);
+    unpackedIdx = 0;
+    readLongs(unpacked.data(), 0, runLength, bitSize);
+    // any remaining bits are thrown out
+    resetReadLongs();
+
+    // TODO: something more efficient than resize
+    unpackedPatch.resize(pl);
+    patchIdx = 0;
+    // TODO: Skip corrupt?
+    //    if ((patchBitSize + pgw) > 64 && !skipCorrupt) {
+    if ((patchBitSize + pgw) > 64) {
+      throw ParseError("Corrupt PATCHED_BASE encoded data "
+                       "(patchBitSize + pgw > 64)!");
+    }
+    uint32_t cfb = getClosestFixedBits(patchBitSize + pgw);
+    readLongs(unpackedPatch.data(), 0, pl, cfb);
+    // any remaining bits are thrown out
+    resetReadLongs();
+
+    // apply the patch directly when decoding the packed data
+    patchMask = ((static_cast<int64_t>(1) << patchBitSize) - 1);
+
+    adjustGapAndPatch();
+  }
+
+  uint64_t nRead = std::min(runLength - runRead, numValues);
+
+  for(uint64_t pos = offset; pos < offset + nRead; ++pos) {
+    // skip null positions
+    if (notNull && !notNull[pos]) {
+      continue;
+    }
+    if (static_cast<int64_t>(unpackedIdx) != actualGap) {
+      // no patching required. add base to unpacked value to get final value
+      data[pos] = base + unpacked[unpackedIdx];
+    } else {
+      // extract the patch value
+      int64_t patchedVal = unpacked[unpackedIdx] | (curPatch << bitSize);
+
+      // add base to patched value
+      data[pos] = base + patchedVal;
+
+      // increment the patch to point to next entry in patch list
+      ++patchIdx;
+
+      if (patchIdx < unpackedPatch.size()) {
+        adjustGapAndPatch();
+
+        // next gap is relative to the current gap
+        actualGap += unpackedIdx;
+      }
+    }
+
+    ++runRead;
+    ++unpackedIdx;
+  }
+
+  return nRead;
+}
+
+uint64_t RleDecoderV2::nextDelta(int64_t* const data,
+                                 uint64_t offset,
+                                 uint64_t numValues,
+                                 const char* const notNull) {
+  if (runRead == runLength) {
+    // extract the number of fixed bits
+    unsigned char fbo = (firstByte >> 1) & 0x1f;
+    if (fbo != 0) {
+      bitSize = decodeBitWidth(fbo);
+    } else {
+      bitSize = 0;
+    }
+
+    // extract the run length
+    runLength = static_cast<uint64_t>(firstByte & 0x01) << 8;
+    runLength |= readByte();
+    ++runLength; // account for first value
+    runRead = deltaBase = 0;
+
+    // read the first value stored as vint
+    if (isSigned) {
+      firstValue = static_cast<int64_t>(readVslong());
+    } else {
+      firstValue = static_cast<int64_t>(readVulong());
+    }
+
+    prevValue = firstValue;
+
+    // read the fixed delta value stored as vint (deltas can be negative even
+    // if all number are positive)
+    deltaBase = static_cast<int64_t>(readVslong());
+  }
+
+  uint64_t nRead = std::min(runLength - runRead, numValues);
+
+  uint64_t pos = offset;
+  for ( ; pos < offset + nRead; ++pos) {
+    // skip null positions
+    if (!notNull || notNull[pos]) break;
+  }
+  if (runRead == 0 && pos < offset + nRead) {
+    data[pos++] = firstValue;
+    ++runRead;
+  }
+
+  if (bitSize == 0) {
+    // add fixed deltas to adjacent values
+    for ( ; pos < offset + nRead; ++pos) {
+      // skip null positions
+      if (notNull && !notNull[pos]) {
+        continue;
+      }
+      prevValue = data[pos] = prevValue + deltaBase;
+      ++runRead;
+    }
+  } else {
+    for ( ; pos < offset + nRead; ++pos) {
+      // skip null positions
+      if (!notNull || notNull[pos]) break;
+    }
+    if (runRead < 2 && pos < offset + nRead) {
+      // add delta base and first value
+      prevValue = data[pos++] = firstValue + deltaBase;
+      ++runRead;
+    }
+
+    // write the unpacked values, add it to previous value and store final
+    // value to result buffer. if the delta base value is negative then it
+    // is a decreasing sequence else an increasing sequence
+    uint64_t remaining = (offset + nRead) - pos;
+    runRead += readLongs(data, pos, remaining, bitSize, notNull);
+
+    if (deltaBase < 0) {
+      for ( ; pos < offset + nRead; ++pos) {
+        // skip null positions
+        if (notNull && !notNull[pos]) {
+          continue;
+        }
+        prevValue = data[pos] = prevValue - data[pos];
+      }
+    } else {
+      for ( ; pos < offset + nRead; ++pos) {
+        // skip null positions
+        if (notNull && !notNull[pos]) {
+          continue;
+        }
+        prevValue = data[pos] = prevValue + data[pos];
+      }
+    }
+  }
+  return nRead;
+}
+
+}  // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/RleEncoderV2.cc b/contrib/libs/apache/orc/c++/src/RleEncoderV2.cc
index f77838a4dd..44e2761b74 100644
--- a/contrib/libs/apache/orc/c++/src/RleEncoderV2.cc
+++ b/contrib/libs/apache/orc/c++/src/RleEncoderV2.cc
@@ -1,773 +1,773 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with option work for additional information 
- * regarding copyright ownership.  The ASF licenses option file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use option file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "Adaptor.hh" 
-#include "Compression.hh" 
-#include "RLEv2.hh" 
-#include "RLEV2Util.hh" 
- 
-#define MAX_LITERAL_SIZE 512 
-#define MAX_SHORT_REPEAT_LENGTH 10 
- 
-namespace orc { 
- 
-/** 
- * Compute the bits required to represent pth percentile value 
- * @param data - array 
- * @param p - percentile value (>=0.0 to <=1.0) 
- * @return pth percentile bits 
- */ 
-uint32_t RleEncoderV2::percentileBits(int64_t* data, size_t offset, size_t length, double p, bool reuseHist) { 
-    if ((p > 1.0) || (p <= 0.0)) { 
-        throw InvalidArgument("Invalid p value: " + to_string(p)); 
-    } 
- 
-    if (!reuseHist) { 
-        // histogram that store the encoded bit requirement for each values. 
-        // maximum number of bits that can encoded is 32 (refer FixedBitSizes) 
-        memset(histgram, 0, FixedBitSizes::SIZE * sizeof(int32_t)); 
-        // compute the histogram 
-        for(size_t i = offset; i < (offset + length); i++) { 
-            uint32_t idx = encodeBitWidth(findClosestNumBits(data[i])); 
-            histgram[idx] += 1; 
-        } 
-    } 
- 
-    int32_t perLen = static_cast<int32_t>(static_cast<double>(length) * (1.0 - p)); 
- 
-    // return the bits required by pth percentile length 
-    for(int32_t i = HIST_LEN - 1; i >= 0; i--) { 
-        perLen -= histgram[i]; 
-        if (perLen < 0) { 
-            return decodeBitWidth(static_cast<uint32_t>(i)); 
-        } 
-    } 
-    return 0; 
-} 
- 
-RleEncoderV2::RleEncoderV2(std::unique_ptr<BufferedOutputStream> outStream, 
-                           bool hasSigned, bool alignBitPacking) : 
-        RleEncoder(std::move(outStream), hasSigned), 
-        alignedBitPacking(alignBitPacking), 
-        prevDelta(0){ 
-    literals = new int64_t[MAX_LITERAL_SIZE]; 
-    gapVsPatchList = new int64_t[MAX_LITERAL_SIZE]; 
-    zigzagLiterals = new int64_t[MAX_LITERAL_SIZE]; 
-    baseRedLiterals = new int64_t[MAX_LITERAL_SIZE]; 
-    adjDeltas = new int64_t[MAX_LITERAL_SIZE]; 
-} 
- 
-void RleEncoderV2::write(int64_t val) { 
-    if(numLiterals == 0) { 
-        initializeLiterals(val); 
-        return; 
-    } 
- 
-    if(numLiterals == 1) { 
-        prevDelta = val - literals[0]; 
-        literals[numLiterals++] = val; 
- 
-        if(val == literals[0]) { 
-            fixedRunLength = 2; 
-            variableRunLength = 0; 
-        } else { 
-            fixedRunLength = 0; 
-            variableRunLength = 2; 
-        } 
-        return; 
-    } 
- 
-    int64_t currentDelta = val - literals[numLiterals - 1]; 
-    EncodingOption option = {}; 
-    if (prevDelta == 0 && currentDelta == 0) { 
-        // case 1: fixed delta run 
-        literals[numLiterals++] = val; 
- 
-        if (variableRunLength > 0) { 
-            // if variable run is non-zero then we are seeing repeating 
-            // values at the end of variable run in which case fixed Run 
-            // length is 2 
-            fixedRunLength = 2; 
-        } 
-        fixedRunLength++; 
- 
-        // if fixed run met the minimum condition and if variable 
-        // run is non-zero then flush the variable run and shift the 
-        // tail fixed runs to start of the buffer 
-        if (fixedRunLength >= MIN_REPEAT && variableRunLength > 0) { 
-            numLiterals -= MIN_REPEAT; 
-            variableRunLength -= (MIN_REPEAT - 1); 
- 
-            determineEncoding(option); 
-            writeValues(option); 
- 
-            // shift tail fixed runs to beginning of the buffer 
-            for (size_t i = 0; i < MIN_REPEAT; ++i) { 
-                literals[i] = val; 
-            } 
-            numLiterals = MIN_REPEAT; 
-        } 
- 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with option work for additional information
+ * regarding copyright ownership.  The ASF licenses option file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use option file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "Compression.hh"
+#include "RLEv2.hh"
+#include "RLEV2Util.hh"
+
+#define MAX_LITERAL_SIZE 512
+#define MAX_SHORT_REPEAT_LENGTH 10
+
+namespace orc {
+
+/**
+ * Compute the bits required to represent pth percentile value
+ * @param data - array
+ * @param p - percentile value (>=0.0 to <=1.0)
+ * @return pth percentile bits
+ */
+uint32_t RleEncoderV2::percentileBits(int64_t* data, size_t offset, size_t length, double p, bool reuseHist) {
+    if ((p > 1.0) || (p <= 0.0)) {
+        throw InvalidArgument("Invalid p value: " + to_string(p));
+    }
+
+    if (!reuseHist) {
+        // histogram that store the encoded bit requirement for each values.
+        // maximum number of bits that can encoded is 32 (refer FixedBitSizes)
+        memset(histgram, 0, FixedBitSizes::SIZE * sizeof(int32_t));
+        // compute the histogram
+        for(size_t i = offset; i < (offset + length); i++) {
+            uint32_t idx = encodeBitWidth(findClosestNumBits(data[i]));
+            histgram[idx] += 1;
+        }
+    }
+
+    int32_t perLen = static_cast<int32_t>(static_cast<double>(length) * (1.0 - p));
+
+    // return the bits required by pth percentile length
+    for(int32_t i = HIST_LEN - 1; i >= 0; i--) {
+        perLen -= histgram[i];
+        if (perLen < 0) {
+            return decodeBitWidth(static_cast<uint32_t>(i));
+        }
+    }
+    return 0;
+}
+
+RleEncoderV2::RleEncoderV2(std::unique_ptr<BufferedOutputStream> outStream,
+                           bool hasSigned, bool alignBitPacking) :
+        RleEncoder(std::move(outStream), hasSigned),
+        alignedBitPacking(alignBitPacking),
+        prevDelta(0){
+    literals = new int64_t[MAX_LITERAL_SIZE];
+    gapVsPatchList = new int64_t[MAX_LITERAL_SIZE];
+    zigzagLiterals = new int64_t[MAX_LITERAL_SIZE];
+    baseRedLiterals = new int64_t[MAX_LITERAL_SIZE];
+    adjDeltas = new int64_t[MAX_LITERAL_SIZE];
+}
+
+void RleEncoderV2::write(int64_t val) {
+    if(numLiterals == 0) {
+        initializeLiterals(val);
+        return;
+    }
+
+    if(numLiterals == 1) {
+        prevDelta = val - literals[0];
+        literals[numLiterals++] = val;
+
+        if(val == literals[0]) {
+            fixedRunLength = 2;
+            variableRunLength = 0;
+        } else {
+            fixedRunLength = 0;
+            variableRunLength = 2;
+        }
+        return;
+    }
+
+    int64_t currentDelta = val - literals[numLiterals - 1];
+    EncodingOption option = {};
+    if (prevDelta == 0 && currentDelta == 0) {
+        // case 1: fixed delta run
+        literals[numLiterals++] = val;
+
+        if (variableRunLength > 0) {
+            // if variable run is non-zero then we are seeing repeating
+            // values at the end of variable run in which case fixed Run
+            // length is 2
+            fixedRunLength = 2;
+        }
+        fixedRunLength++;
+
+        // if fixed run met the minimum condition and if variable
+        // run is non-zero then flush the variable run and shift the
+        // tail fixed runs to start of the buffer
+        if (fixedRunLength >= MIN_REPEAT && variableRunLength > 0) {
+            numLiterals -= MIN_REPEAT;
+            variableRunLength -= (MIN_REPEAT - 1);
+
+            determineEncoding(option);
+            writeValues(option);
+
+            // shift tail fixed runs to beginning of the buffer
+            for (size_t i = 0; i < MIN_REPEAT; ++i) {
+                literals[i] = val;
+            }
+            numLiterals = MIN_REPEAT;
+        }
+
         if (fixedRunLength == MAX_LITERAL_SIZE) {
-            determineEncoding(option); 
-            writeValues(option); 
-        } 
-        return; 
-    } 
- 
-    // case 2: variable delta run 
- 
-    // if fixed run length is non-zero and if it satisfies the 
-    // short repeat conditions then write the values as short repeats 
-    // else use delta encoding 
-    if (fixedRunLength >= MIN_REPEAT) { 
-        if (fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) { 
-            option.encoding = SHORT_REPEAT; 
-        } else { 
-            option.encoding = DELTA; 
-            option.isFixedDelta = true; 
-        } 
-        writeValues(option); 
-    } 
- 
-    // if fixed run length is <MIN_REPEAT and current value is 
-    // different from previous then treat it as variable run 
-    if (fixedRunLength > 0 && fixedRunLength < MIN_REPEAT && val != literals[numLiterals - 1]) { 
-        variableRunLength = fixedRunLength; 
-        fixedRunLength = 0; 
-    } 
- 
-    // after writing values re-initialize the variables 
-    if (numLiterals == 0) { 
-        initializeLiterals(val); 
-    } else { 
-        prevDelta = val - literals[numLiterals - 1]; 
-        literals[numLiterals++] = val; 
-        variableRunLength++; 
- 
-        if (variableRunLength == MAX_LITERAL_SIZE) { 
-            determineEncoding(option); 
-            writeValues(option); 
-        } 
-    } 
-} 
- 
-void RleEncoderV2::computeZigZagLiterals(EncodingOption &option) { 
-    int64_t zzEncVal = 0; 
-    for (size_t i = 0; i < numLiterals; i++) { 
-        if (isSigned) { 
-            zzEncVal = zigZag(literals[i]); 
-        } else { 
-            zzEncVal = literals[i]; 
-        } 
-        zigzagLiterals[option.zigzagLiteralsCount++] = zzEncVal; 
-    } 
-} 
- 
-void RleEncoderV2::preparePatchedBlob(EncodingOption& option) { 
-    // mask will be max value beyond which patch will be generated 
-    int64_t mask = static_cast<int64_t>(static_cast<uint64_t>(1) << option.brBits95p) - 1; 
- 
-    // since we are considering only 95 percentile, the size of gap and 
-    // patch array can contain only be 5% values 
-    option.patchLength = static_cast<uint32_t>(std::ceil((numLiterals / 20))); 
- 
-    // #bit for patch 
-    option.patchWidth = option.brBits100p - option.brBits95p; 
-    option.patchWidth = getClosestFixedBits(option.patchWidth); 
- 
-    // if patch bit requirement is 64 then it will not possible to pack 
-    // gap and patch together in a long. To make sure gap and patch can be 
-    // packed together adjust the patch width 
-    if (option.patchWidth == 64) { 
-        option.patchWidth = 56; 
-        option.brBits95p = 8; 
-        mask = static_cast<int64_t>(static_cast<uint64_t>(1) << option.brBits95p) - 1; 
-    } 
- 
-    uint32_t gapIdx = 0; 
-    uint32_t patchIdx = 0; 
-    size_t prev = 0; 
-    size_t maxGap = 0; 
- 
-    std::vector<int64_t> gapList; 
-    std::vector<int64_t> patchList; 
- 
-    for(size_t i = 0; i < numLiterals; i++) { 
-        // if value is above mask then create the patch and record the gap 
-        if (baseRedLiterals[i] > mask) { 
-            size_t gap = i - prev; 
-            if (gap > maxGap) { 
-                maxGap = gap; 
-            } 
- 
-            // gaps are relative, so store the previous patched value index 
-            prev = i; 
-            gapList.push_back(static_cast<int64_t>(gap)); 
-            gapIdx++; 
- 
-            // extract the most significant bits that are over mask bits 
-            int64_t patch = baseRedLiterals[i] >> option.brBits95p; 
-            patchList.push_back(patch); 
-            patchIdx++; 
- 
-            // strip off the MSB to enable safe bit packing 
-            baseRedLiterals[i] &= mask; 
-        } 
-    } 
- 
-    // adjust the patch length to number of entries in gap list 
-    option.patchLength = gapIdx; 
- 
-    // if the element to be patched is the first and only element then 
-    // max gap will be 0, but to store the gap as 0 we need atleast 1 bit 
-    if (maxGap == 0 && option.patchLength != 0) { 
-        option.patchGapWidth = 1; 
-    } else { 
-        option.patchGapWidth = findClosestNumBits(static_cast<int64_t>(maxGap)); 
-    } 
- 
-    // special case: if the patch gap width is greater than 256, then 
-    // we need 9 bits to encode the gap width. But we only have 3 bits in 
-    // header to record the gap width. To deal with this case, we will save 
-    // two entries in patch list in the following way 
-    // 256 gap width => 0 for patch value 
-    // actual gap - 256 => actual patch value 
-    // We will do the same for gap width = 511. If the element to be patched is 
-    // the last element in the scope then gap width will be 511. In this case we 
-    // will have 3 entries in the patch list in the following way 
-    // 255 gap width => 0 for patch value 
-    // 255 gap width => 0 for patch value 
-    // 1 gap width => actual patch value 
-    if (option.patchGapWidth > 8) { 
-        option.patchGapWidth = 8; 
-        // for gap = 511, we need two additional entries in patch list 
-        if (maxGap == 511) { 
-            option.patchLength += 2; 
-        } else { 
-            option.patchLength += 1; 
-        } 
-    } 
- 
-    // create gap vs patch list 
-    gapIdx = 0; 
-    patchIdx = 0; 
-    for(size_t i = 0; i < option.patchLength; i++) { 
-        int64_t g = gapList[gapIdx++]; 
-        int64_t p = patchList[patchIdx++]; 
-        while (g > 255) { 
-            gapVsPatchList[option.gapVsPatchListCount++] = (255L << option.patchWidth); 
-            i++; 
-            g -= 255; 
-        } 
- 
-        // store patch value in LSBs and gap in MSBs 
-        gapVsPatchList[option.gapVsPatchListCount++] = ((g << option.patchWidth) | p); 
-    } 
-} 
- 
-void RleEncoderV2::determineEncoding(EncodingOption& option) { 
-    // We need to compute zigzag values for DIRECT and PATCHED_BASE encodings, 
-    // but not for SHORT_REPEAT or DELTA. So we only perform the zigzag 
-    // computation when it's determined to be necessary. 
- 
-    // not a big win for shorter runs to determine encoding 
-    if (numLiterals <= MIN_REPEAT) { 
-        // we need to compute zigzag values for DIRECT encoding if we decide to 
-        // break early for delta overflows or for shorter runs 
-        computeZigZagLiterals(option); 
-        option.zzBits100p = percentileBits(zigzagLiterals, 0, numLiterals, 1.0); 
-        option.encoding = DIRECT; 
-        return; 
-    } 
- 
-    // DELTA encoding check 
- 
-    // for identifying monotonic sequences 
-    bool isIncreasing = true; 
-    bool isDecreasing = true; 
-    option.isFixedDelta = true; 
- 
-    option.min = literals[0]; 
-    int64_t max = literals[0]; 
-    int64_t initialDelta = literals[1] - literals[0]; 
-    int64_t currDelta = 0; 
-    int64_t deltaMax = 0; 
-    adjDeltas[option.adjDeltasCount++] = initialDelta; 
- 
-    for (size_t i = 1; i < numLiterals; i++) { 
-        const int64_t l1 = literals[i]; 
-        const int64_t l0 = literals[i - 1]; 
-        currDelta = l1 - l0; 
-        option.min = std::min(option.min, l1); 
-        max = std::max(max, l1); 
- 
-        isIncreasing &= (l0 <= l1); 
-        isDecreasing &= (l0 >= l1); 
- 
-        option.isFixedDelta &= (currDelta == initialDelta); 
-        if (i > 1) { 
-            adjDeltas[option.adjDeltasCount++] = std::abs(currDelta); 
-            deltaMax = std::max(deltaMax, adjDeltas[i - 1]); 
-        } 
-    } 
- 
-    // it's faster to exit under delta overflow condition without checking for 
-    // PATCHED_BASE condition as encoding using DIRECT is faster and has less 
-    // overhead than PATCHED_BASE 
-    if (!isSafeSubtract(max, option.min)) { 
-        computeZigZagLiterals(option); 
-        option.zzBits100p = percentileBits(zigzagLiterals, 0, numLiterals, 1.0); 
-        option.encoding = DIRECT; 
-        return; 
-    } 
- 
-    // invariant - subtracting any number from any other in the literals after 
-    // option point won't overflow 
- 
-    // if min is equal to max then the delta is 0, option condition happens for 
-    // fixed values run >10 which cannot be encoded with SHORT_REPEAT 
-    if (option.min == max) { 
-        if (!option.isFixedDelta) { 
-            throw InvalidArgument(to_string(option.min) + "==" + 
-              to_string(max) + ", isFixedDelta cannot be false"); 
-        } 
- 
-        if(currDelta != 0) { 
-            throw InvalidArgument(to_string(option.min) + "==" + 
-            to_string(max) + ", currDelta should be zero"); 
-        } 
-        option.fixedDelta = 0; 
-        option.encoding = DELTA; 
-        return; 
-    } 
- 
-    if (option.isFixedDelta) { 
-        if (currDelta != initialDelta) { 
-            throw InvalidArgument("currDelta should be equal to initialDelta for fixed delta encoding"); 
-        } 
- 
-        option.encoding = DELTA; 
-        option.fixedDelta = currDelta; 
-        return; 
-    } 
- 
-    // if initialDelta is 0 then we cannot delta encode as we cannot identify 
-    // the sign of deltas (increasing or decreasing) 
-    if (initialDelta != 0) { 
-        // stores the number of bits required for packing delta blob in 
-        // delta encoding 
-        option.bitsDeltaMax = findClosestNumBits(deltaMax); 
- 
-        // monotonic condition 
-        if (isIncreasing || isDecreasing) { 
-            option.encoding = DELTA; 
-            return; 
-        } 
-    } 
- 
-    // PATCHED_BASE encoding check 
- 
-    // percentile values are computed for the zigzag encoded values. if the 
-    // number of bit requirement between 90th and 100th percentile varies 
-    // beyond a threshold then we need to patch the values. if the variation 
-    // is not significant then we can use direct encoding 
- 
-    computeZigZagLiterals(option); 
-    option.zzBits100p = percentileBits(zigzagLiterals, 0, numLiterals, 1.0); 
-    option.zzBits90p = percentileBits(zigzagLiterals, 0, numLiterals, 0.9, true); 
-    uint32_t diffBitsLH = option.zzBits100p - option.zzBits90p; 
- 
-    // if the difference between 90th percentile and 100th percentile fixed 
-    // bits is > 1 then we need patch the values 
-    if (diffBitsLH > 1) { 
- 
-        // patching is done only on base reduced values. 
-        // remove base from literals 
-        for (size_t i = 0; i < numLiterals; i++) { 
-            baseRedLiterals[option.baseRedLiteralsCount++] = (literals[i] - option.min); 
-        } 
- 
-        // 95th percentile width is used to determine max allowed value 
-        // after which patching will be done 
-        option.brBits95p = percentileBits(baseRedLiterals, 0, numLiterals, 0.95); 
- 
-        // 100th percentile is used to compute the max patch width 
-        option.brBits100p = percentileBits(baseRedLiterals, 0, numLiterals, 1.0, true); 
- 
-        // after base reducing the values, if the difference in bits between 
-        // 95th percentile and 100th percentile value is zero then there 
-        // is no point in patching the values, in which case we will 
-        // fallback to DIRECT encoding. 
-        // The decision to use patched base was based on zigzag values, but the 
-        // actual patching is done on base reduced literals. 
-        if ((option.brBits100p - option.brBits95p) != 0) { 
-            option.encoding = PATCHED_BASE; 
-            preparePatchedBlob(option); 
-            return; 
-        } else { 
-            option.encoding = DIRECT; 
-            return; 
-        } 
-    } else { 
-        // if difference in bits between 95th percentile and 100th percentile is 
-        // 0, then patch length will become 0. Hence we will fallback to direct 
-        option.encoding = DIRECT; 
-        return; 
-    } 
-} 
- 
-uint64_t RleEncoderV2::flush() { 
-    if (numLiterals != 0) { 
-        EncodingOption option = {}; 
-        if (variableRunLength != 0) { 
-            determineEncoding(option); 
-            writeValues(option); 
-        } else if (fixedRunLength != 0) { 
-            if (fixedRunLength < MIN_REPEAT) { 
-                variableRunLength = fixedRunLength; 
-                fixedRunLength = 0; 
-                determineEncoding(option); 
-                writeValues(option); 
-            } else if (fixedRunLength >= MIN_REPEAT 
-                       && fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) { 
-                option.encoding = SHORT_REPEAT; 
-                writeValues(option); 
-            } else { 
-                option.encoding = DELTA; 
-                option.isFixedDelta = true; 
-                writeValues(option); 
-            } 
-        } 
-    } 
- 
-    outputStream->BackUp(static_cast<int>(bufferLength - bufferPosition)); 
-    uint64_t dataSize = outputStream->flush(); 
-    bufferLength = bufferPosition = 0; 
-    return dataSize; 
-} 
- 
-void RleEncoderV2::writeValues(EncodingOption& option) { 
-    if (numLiterals != 0) { 
-        switch (option.encoding) { 
-            case SHORT_REPEAT: 
-                writeShortRepeatValues(option); 
-                break; 
-            case DIRECT: 
-                writeDirectValues(option); 
-                break; 
-            case PATCHED_BASE: 
-                writePatchedBasedValues(option); 
-                break; 
-            case DELTA: 
-                writeDeltaValues(option); 
-                break; 
-            default: 
-                throw NotImplementedYet("Not implemented yet"); 
-        } 
- 
-        numLiterals = 0; 
-        prevDelta = 0; 
-    } 
-} 
- 
-void RleEncoderV2::writeShortRepeatValues(EncodingOption&) { 
-    int64_t repeatVal; 
-    if (isSigned) { 
-        repeatVal = zigZag(literals[0]); 
-    } else { 
-        repeatVal = literals[0]; 
-    } 
- 
-    const uint32_t numBitsRepeatVal = findClosestNumBits(repeatVal); 
-    const uint32_t numBytesRepeatVal = numBitsRepeatVal % 8 == 0 ? (numBitsRepeatVal >> 3) : ((numBitsRepeatVal >> 3) + 1); 
- 
-    uint32_t header = getOpCode(SHORT_REPEAT); 
- 
-    fixedRunLength -= MIN_REPEAT; 
-    header |= fixedRunLength; 
-    header |= ((numBytesRepeatVal - 1) << 3); 
- 
-    writeByte(static_cast<char>(header)); 
- 
-    for(int32_t i = static_cast<int32_t>(numBytesRepeatVal - 1); i >= 0; i--) { 
-        int64_t b = ((repeatVal >> (i * 8)) & 0xff); 
-        writeByte(static_cast<char>(b)); 
-    } 
- 
-    fixedRunLength = 0; 
-} 
- 
-void RleEncoderV2::writeDirectValues(EncodingOption& option) { 
-    // write the number of fixed bits required in next 5 bits 
-    uint32_t fb = option.zzBits100p; 
-    if (alignedBitPacking) { 
-        fb = getClosestAlignedFixedBits(fb); 
-    } 
- 
-    const uint32_t efb = encodeBitWidth(fb) << 1; 
- 
-    // adjust variable run length 
-    variableRunLength -= 1; 
- 
-    // extract the 9th bit of run length 
-    const uint32_t tailBits = (variableRunLength & 0x100) >> 8; 
- 
-    // create first byte of the header 
-    const char headerFirstByte = static_cast<char>(getOpCode(DIRECT) | efb | tailBits); 
- 
-    // second byte of the header stores the remaining 8 bits of runlength 
-    const char headerSecondByte = static_cast<char>(variableRunLength & 0xff); 
- 
-    // write header 
-    writeByte(headerFirstByte); 
-    writeByte(headerSecondByte); 
- 
-    // bit packing the zigzag encoded literals 
-    writeInts(zigzagLiterals, 0, numLiterals, fb); 
- 
-    // reset run length 
-    variableRunLength = 0; 
-} 
- 
-void RleEncoderV2::writePatchedBasedValues(EncodingOption& option) { 
-    // NOTE: Aligned bit packing cannot be applied for PATCHED_BASE encoding 
-    // because patch is applied to MSB bits. For example: If fixed bit width of 
-    // base value is 7 bits and if patch is 3 bits, the actual value is 
-    // constructed by shifting the patch to left by 7 positions. 
-    // actual_value = patch << 7 | base_value 
-    // So, if we align base_value then actual_value can not be reconstructed. 
- 
-    // write the number of fixed bits required in next 5 bits 
-    const uint32_t efb = encodeBitWidth(option.brBits95p) << 1; 
- 
-    // adjust variable run length, they are one off 
-    variableRunLength -= 1; 
- 
-    // extract the 9th bit of run length 
-    const uint32_t tailBits = (variableRunLength & 0x100) >> 8; 
- 
-    // create first byte of the header 
-    const char headerFirstByte = static_cast<char>(getOpCode(PATCHED_BASE) | efb | tailBits); 
- 
-    // second byte of the header stores the remaining 8 bits of runlength 
-    const char headerSecondByte = static_cast<char>(variableRunLength & 0xff); 
- 
-    // if the min value is negative toggle the sign 
-    const bool isNegative = (option.min < 0); 
-    if (isNegative) { 
-        option.min = -option.min; 
-    } 
- 
-    // find the number of bytes required for base and shift it by 5 bits 
-    // to accommodate patch width. The additional bit is used to store the sign 
-    // of the base value. 
-    const uint32_t baseWidth = findClosestNumBits(option.min) + 1; 
-    const uint32_t baseBytes = baseWidth % 8 == 0 ? baseWidth / 8 : (baseWidth / 8) + 1; 
-    const uint32_t bb = (baseBytes - 1) << 5; 
- 
-    // if the base value is negative then set MSB to 1 
-    if (isNegative) { 
-        option.min |= (1LL << ((baseBytes * 8) - 1)); 
-    } 
- 
-    // third byte contains 3 bits for number of bytes occupied by base 
-    // and 5 bits for patchWidth 
-    const char headerThirdByte = static_cast<char>(bb | encodeBitWidth(option.patchWidth)); 
- 
-    // fourth byte contains 3 bits for page gap width and 5 bits for 
-    // patch length 
-    const char headerFourthByte = static_cast<char>((option.patchGapWidth - 1) << 5 | option.patchLength); 
- 
-    // write header 
-    writeByte(headerFirstByte); 
-    writeByte(headerSecondByte); 
-    writeByte(headerThirdByte); 
-    writeByte(headerFourthByte); 
- 
-    // write the base value using fixed bytes in big endian order 
-    for(int32_t i = static_cast<int32_t>(baseBytes - 1); i >= 0; i--) { 
-        char b = static_cast<char>(((option.min >> (i * 8)) & 0xff)); 
-        writeByte(b); 
-    } 
- 
-    // base reduced literals are bit packed 
-    uint32_t closestFixedBits = getClosestFixedBits(option.brBits95p); 
- 
-    writeInts(baseRedLiterals, 0, numLiterals, closestFixedBits); 
- 
-    // write patch list 
-    closestFixedBits = getClosestFixedBits(option.patchGapWidth + option.patchWidth); 
- 
-    writeInts(gapVsPatchList, 0, option.patchLength, closestFixedBits); 
- 
-    // reset run length 
-    variableRunLength = 0; 
-} 
- 
-void RleEncoderV2::writeDeltaValues(EncodingOption& option) { 
-    uint32_t len = 0; 
-    uint32_t fb = option.bitsDeltaMax; 
-    uint32_t efb = 0; 
- 
-    if (alignedBitPacking) { 
-        fb = getClosestAlignedFixedBits(fb); 
-    } 
- 
-    if (option.isFixedDelta) { 
-        // if fixed run length is greater than threshold then it will be fixed 
-        // delta sequence with delta value 0 else fixed delta sequence with 
-        // non-zero delta value 
-        if (fixedRunLength > MIN_REPEAT) { 
-            // ex. sequence: 2 2 2 2 2 2 2 2 
-            len = fixedRunLength - 1; 
-            fixedRunLength = 0; 
-        } else { 
-            // ex. sequence: 4 6 8 10 12 14 16 
-            len = variableRunLength - 1; 
-            variableRunLength = 0; 
-        } 
-    } else { 
-        // fixed width 0 is used for long repeating values. 
-        // sequences that require only 1 bit to encode will have an additional bit 
-        if (fb == 1) { 
-            fb = 2; 
-        } 
-        efb = encodeBitWidth(fb) << 1; 
-        len = variableRunLength - 1; 
-        variableRunLength = 0; 
-    } 
- 
-    // extract the 9th bit of run length 
-    const uint32_t tailBits = (len & 0x100) >> 8; 
- 
-    // create first byte of the header 
-    const char headerFirstByte = static_cast<char>(getOpCode(DELTA) | efb | tailBits); 
- 
-    // second byte of the header stores the remaining 8 bits of runlength 
-    const char headerSecondByte = static_cast<char>(len & 0xff); 
- 
-    // write header 
-    writeByte(headerFirstByte); 
-    writeByte(headerSecondByte); 
- 
-    // store the first value from zigzag literal array 
-    if (isSigned) { 
-        writeVslong(literals[0]); 
-    } else { 
-        writeVulong(literals[0]); 
-    } 
- 
-    if (option.isFixedDelta) { 
-        // if delta is fixed then we don't need to store delta blob 
-        writeVslong(option.fixedDelta); 
-    } else { 
-        // store the first value as delta value using zigzag encoding 
-        writeVslong(adjDeltas[0]); 
- 
-        // adjacent delta values are bit packed. The length of adjDeltas array is 
-        // always one less than the number of literals (delta difference for n 
-        // elements is n-1). We have already written one element, write the 
-        // remaining numLiterals - 2 elements here 
-        writeInts(adjDeltas, 1, numLiterals - 2, fb); 
-    } 
-} 
- 
-void RleEncoderV2::writeInts(int64_t* input, uint32_t offset, size_t len, uint32_t bitSize) { 
-  if(input == nullptr || len < 1 || bitSize < 1) { 
-      return; 
-  } 
- 
-  if (getClosestAlignedFixedBits(bitSize) == bitSize) { 
-    uint32_t numBytes; 
-    uint32_t endOffSet = static_cast<uint32_t>(offset + len); 
+            determineEncoding(option);
+            writeValues(option);
+        }
+        return;
+    }
+
+    // case 2: variable delta run
+
+    // if fixed run length is non-zero and if it satisfies the
+    // short repeat conditions then write the values as short repeats
+    // else use delta encoding
+    if (fixedRunLength >= MIN_REPEAT) {
+        if (fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
+            option.encoding = SHORT_REPEAT;
+        } else {
+            option.encoding = DELTA;
+            option.isFixedDelta = true;
+        }
+        writeValues(option);
+    }
+
+    // if fixed run length is <MIN_REPEAT and current value is
+    // different from previous then treat it as variable run
+    if (fixedRunLength > 0 && fixedRunLength < MIN_REPEAT && val != literals[numLiterals - 1]) {
+        variableRunLength = fixedRunLength;
+        fixedRunLength = 0;
+    }
+
+    // after writing values re-initialize the variables
+    if (numLiterals == 0) {
+        initializeLiterals(val);
+    } else {
+        prevDelta = val - literals[numLiterals - 1];
+        literals[numLiterals++] = val;
+        variableRunLength++;
+
+        if (variableRunLength == MAX_LITERAL_SIZE) {
+            determineEncoding(option);
+            writeValues(option);
+        }
+    }
+}
+
+void RleEncoderV2::computeZigZagLiterals(EncodingOption &option) {
+    int64_t zzEncVal = 0;
+    for (size_t i = 0; i < numLiterals; i++) {
+        if (isSigned) {
+            zzEncVal = zigZag(literals[i]);
+        } else {
+            zzEncVal = literals[i];
+        }
+        zigzagLiterals[option.zigzagLiteralsCount++] = zzEncVal;
+    }
+}
+
+void RleEncoderV2::preparePatchedBlob(EncodingOption& option) {
+    // mask will be max value beyond which patch will be generated
+    int64_t mask = static_cast<int64_t>(static_cast<uint64_t>(1) << option.brBits95p) - 1;
+
+    // since we are considering only 95 percentile, the size of gap and
+    // patch array can contain only be 5% values
+    option.patchLength = static_cast<uint32_t>(std::ceil((numLiterals / 20)));
+
+    // #bit for patch
+    option.patchWidth = option.brBits100p - option.brBits95p;
+    option.patchWidth = getClosestFixedBits(option.patchWidth);
+
+    // if patch bit requirement is 64 then it will not possible to pack
+    // gap and patch together in a long. To make sure gap and patch can be
+    // packed together adjust the patch width
+    if (option.patchWidth == 64) {
+        option.patchWidth = 56;
+        option.brBits95p = 8;
+        mask = static_cast<int64_t>(static_cast<uint64_t>(1) << option.brBits95p) - 1;
+    }
+
+    uint32_t gapIdx = 0;
+    uint32_t patchIdx = 0;
+    size_t prev = 0;
+    size_t maxGap = 0;
+
+    std::vector<int64_t> gapList;
+    std::vector<int64_t> patchList;
+
+    for(size_t i = 0; i < numLiterals; i++) {
+        // if value is above mask then create the patch and record the gap
+        if (baseRedLiterals[i] > mask) {
+            size_t gap = i - prev;
+            if (gap > maxGap) {
+                maxGap = gap;
+            }
+
+            // gaps are relative, so store the previous patched value index
+            prev = i;
+            gapList.push_back(static_cast<int64_t>(gap));
+            gapIdx++;
+
+            // extract the most significant bits that are over mask bits
+            int64_t patch = baseRedLiterals[i] >> option.brBits95p;
+            patchList.push_back(patch);
+            patchIdx++;
+
+            // strip off the MSB to enable safe bit packing
+            baseRedLiterals[i] &= mask;
+        }
+    }
+
+    // adjust the patch length to number of entries in gap list
+    option.patchLength = gapIdx;
+
+    // if the element to be patched is the first and only element then
+    // max gap will be 0, but to store the gap as 0 we need atleast 1 bit
+    if (maxGap == 0 && option.patchLength != 0) {
+        option.patchGapWidth = 1;
+    } else {
+        option.patchGapWidth = findClosestNumBits(static_cast<int64_t>(maxGap));
+    }
+
+    // special case: if the patch gap width is greater than 256, then
+    // we need 9 bits to encode the gap width. But we only have 3 bits in
+    // header to record the gap width. To deal with this case, we will save
+    // two entries in patch list in the following way
+    // 256 gap width => 0 for patch value
+    // actual gap - 256 => actual patch value
+    // We will do the same for gap width = 511. If the element to be patched is
+    // the last element in the scope then gap width will be 511. In this case we
+    // will have 3 entries in the patch list in the following way
+    // 255 gap width => 0 for patch value
+    // 255 gap width => 0 for patch value
+    // 1 gap width => actual patch value
+    if (option.patchGapWidth > 8) {
+        option.patchGapWidth = 8;
+        // for gap = 511, we need two additional entries in patch list
+        if (maxGap == 511) {
+            option.patchLength += 2;
+        } else {
+            option.patchLength += 1;
+        }
+    }
+
+    // create gap vs patch list
+    gapIdx = 0;
+    patchIdx = 0;
+    for(size_t i = 0; i < option.patchLength; i++) {
+        int64_t g = gapList[gapIdx++];
+        int64_t p = patchList[patchIdx++];
+        while (g > 255) {
+            gapVsPatchList[option.gapVsPatchListCount++] = (255L << option.patchWidth);
+            i++;
+            g -= 255;
+        }
+
+        // store patch value in LSBs and gap in MSBs
+        gapVsPatchList[option.gapVsPatchListCount++] = ((g << option.patchWidth) | p);
+    }
+}
+
+void RleEncoderV2::determineEncoding(EncodingOption& option) {
+    // We need to compute zigzag values for DIRECT and PATCHED_BASE encodings,
+    // but not for SHORT_REPEAT or DELTA. So we only perform the zigzag
+    // computation when it's determined to be necessary.
+
+    // not a big win for shorter runs to determine encoding
+    if (numLiterals <= MIN_REPEAT) {
+        // we need to compute zigzag values for DIRECT encoding if we decide to
+        // break early for delta overflows or for shorter runs
+        computeZigZagLiterals(option);
+        option.zzBits100p = percentileBits(zigzagLiterals, 0, numLiterals, 1.0);
+        option.encoding = DIRECT;
+        return;
+    }
+
+    // DELTA encoding check
+
+    // for identifying monotonic sequences
+    bool isIncreasing = true;
+    bool isDecreasing = true;
+    option.isFixedDelta = true;
+
+    option.min = literals[0];
+    int64_t max = literals[0];
+    int64_t initialDelta = literals[1] - literals[0];
+    int64_t currDelta = 0;
+    int64_t deltaMax = 0;
+    adjDeltas[option.adjDeltasCount++] = initialDelta;
+
+    for (size_t i = 1; i < numLiterals; i++) {
+        const int64_t l1 = literals[i];
+        const int64_t l0 = literals[i - 1];
+        currDelta = l1 - l0;
+        option.min = std::min(option.min, l1);
+        max = std::max(max, l1);
+
+        isIncreasing &= (l0 <= l1);
+        isDecreasing &= (l0 >= l1);
+
+        option.isFixedDelta &= (currDelta == initialDelta);
+        if (i > 1) {
+            adjDeltas[option.adjDeltasCount++] = std::abs(currDelta);
+            deltaMax = std::max(deltaMax, adjDeltas[i - 1]);
+        }
+    }
+
+    // it's faster to exit under delta overflow condition without checking for
+    // PATCHED_BASE condition as encoding using DIRECT is faster and has less
+    // overhead than PATCHED_BASE
+    if (!isSafeSubtract(max, option.min)) {
+        computeZigZagLiterals(option);
+        option.zzBits100p = percentileBits(zigzagLiterals, 0, numLiterals, 1.0);
+        option.encoding = DIRECT;
+        return;
+    }
+
+    // invariant - subtracting any number from any other in the literals after
+    // option point won't overflow
+
+    // if min is equal to max then the delta is 0, option condition happens for
+    // fixed values run >10 which cannot be encoded with SHORT_REPEAT
+    if (option.min == max) {
+        if (!option.isFixedDelta) {
+            throw InvalidArgument(to_string(option.min) + "==" +
+              to_string(max) + ", isFixedDelta cannot be false");
+        }
+
+        if(currDelta != 0) {
+            throw InvalidArgument(to_string(option.min) + "==" +
+            to_string(max) + ", currDelta should be zero");
+        }
+        option.fixedDelta = 0;
+        option.encoding = DELTA;
+        return;
+    }
+
+    if (option.isFixedDelta) {
+        if (currDelta != initialDelta) {
+            throw InvalidArgument("currDelta should be equal to initialDelta for fixed delta encoding");
+        }
+
+        option.encoding = DELTA;
+        option.fixedDelta = currDelta;
+        return;
+    }
+
+    // if initialDelta is 0 then we cannot delta encode as we cannot identify
+    // the sign of deltas (increasing or decreasing)
+    if (initialDelta != 0) {
+        // stores the number of bits required for packing delta blob in
+        // delta encoding
+        option.bitsDeltaMax = findClosestNumBits(deltaMax);
+
+        // monotonic condition
+        if (isIncreasing || isDecreasing) {
+            option.encoding = DELTA;
+            return;
+        }
+    }
+
+    // PATCHED_BASE encoding check
+
+    // percentile values are computed for the zigzag encoded values. if the
+    // number of bit requirement between 90th and 100th percentile varies
+    // beyond a threshold then we need to patch the values. if the variation
+    // is not significant then we can use direct encoding
+
+    computeZigZagLiterals(option);
+    option.zzBits100p = percentileBits(zigzagLiterals, 0, numLiterals, 1.0);
+    option.zzBits90p = percentileBits(zigzagLiterals, 0, numLiterals, 0.9, true);
+    uint32_t diffBitsLH = option.zzBits100p - option.zzBits90p;
+
+    // if the difference between 90th percentile and 100th percentile fixed
+    // bits is > 1 then we need patch the values
+    if (diffBitsLH > 1) {
+
+        // patching is done only on base reduced values.
+        // remove base from literals
+        for (size_t i = 0; i < numLiterals; i++) {
+            baseRedLiterals[option.baseRedLiteralsCount++] = (literals[i] - option.min);
+        }
+
+        // 95th percentile width is used to determine max allowed value
+        // after which patching will be done
+        option.brBits95p = percentileBits(baseRedLiterals, 0, numLiterals, 0.95);
+
+        // 100th percentile is used to compute the max patch width
+        option.brBits100p = percentileBits(baseRedLiterals, 0, numLiterals, 1.0, true);
+
+        // after base reducing the values, if the difference in bits between
+        // 95th percentile and 100th percentile value is zero then there
+        // is no point in patching the values, in which case we will
+        // fallback to DIRECT encoding.
+        // The decision to use patched base was based on zigzag values, but the
+        // actual patching is done on base reduced literals.
+        if ((option.brBits100p - option.brBits95p) != 0) {
+            option.encoding = PATCHED_BASE;
+            preparePatchedBlob(option);
+            return;
+        } else {
+            option.encoding = DIRECT;
+            return;
+        }
+    } else {
+        // if difference in bits between 95th percentile and 100th percentile is
+        // 0, then patch length will become 0. Hence we will fallback to direct
+        option.encoding = DIRECT;
+        return;
+    }
+}
+
+uint64_t RleEncoderV2::flush() {
+    if (numLiterals != 0) {
+        EncodingOption option = {};
+        if (variableRunLength != 0) {
+            determineEncoding(option);
+            writeValues(option);
+        } else if (fixedRunLength != 0) {
+            if (fixedRunLength < MIN_REPEAT) {
+                variableRunLength = fixedRunLength;
+                fixedRunLength = 0;
+                determineEncoding(option);
+                writeValues(option);
+            } else if (fixedRunLength >= MIN_REPEAT
+                       && fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
+                option.encoding = SHORT_REPEAT;
+                writeValues(option);
+            } else {
+                option.encoding = DELTA;
+                option.isFixedDelta = true;
+                writeValues(option);
+            }
+        }
+    }
+
+    outputStream->BackUp(static_cast<int>(bufferLength - bufferPosition));
+    uint64_t dataSize = outputStream->flush();
+    bufferLength = bufferPosition = 0;
+    return dataSize;
+}
+
+void RleEncoderV2::writeValues(EncodingOption& option) {
+    if (numLiterals != 0) {
+        switch (option.encoding) {
+            case SHORT_REPEAT:
+                writeShortRepeatValues(option);
+                break;
+            case DIRECT:
+                writeDirectValues(option);
+                break;
+            case PATCHED_BASE:
+                writePatchedBasedValues(option);
+                break;
+            case DELTA:
+                writeDeltaValues(option);
+                break;
+            default:
+                throw NotImplementedYet("Not implemented yet");
+        }
+
+        numLiterals = 0;
+        prevDelta = 0;
+    }
+}
+
+void RleEncoderV2::writeShortRepeatValues(EncodingOption&) {
+    int64_t repeatVal;
+    if (isSigned) {
+        repeatVal = zigZag(literals[0]);
+    } else {
+        repeatVal = literals[0];
+    }
+
+    const uint32_t numBitsRepeatVal = findClosestNumBits(repeatVal);
+    const uint32_t numBytesRepeatVal = numBitsRepeatVal % 8 == 0 ? (numBitsRepeatVal >> 3) : ((numBitsRepeatVal >> 3) + 1);
+
+    uint32_t header = getOpCode(SHORT_REPEAT);
+
+    fixedRunLength -= MIN_REPEAT;
+    header |= fixedRunLength;
+    header |= ((numBytesRepeatVal - 1) << 3);
+
+    writeByte(static_cast<char>(header));
+
+    for(int32_t i = static_cast<int32_t>(numBytesRepeatVal - 1); i >= 0; i--) {
+        int64_t b = ((repeatVal >> (i * 8)) & 0xff);
+        writeByte(static_cast<char>(b));
+    }
+
+    fixedRunLength = 0;
+}
+
+void RleEncoderV2::writeDirectValues(EncodingOption& option) {
+    // write the number of fixed bits required in next 5 bits
+    uint32_t fb = option.zzBits100p;
+    if (alignedBitPacking) {
+        fb = getClosestAlignedFixedBits(fb);
+    }
+
+    const uint32_t efb = encodeBitWidth(fb) << 1;
+
+    // adjust variable run length
+    variableRunLength -= 1;
+
+    // extract the 9th bit of run length
+    const uint32_t tailBits = (variableRunLength & 0x100) >> 8;
+
+    // create first byte of the header
+    const char headerFirstByte = static_cast<char>(getOpCode(DIRECT) | efb | tailBits);
+
+    // second byte of the header stores the remaining 8 bits of runlength
+    const char headerSecondByte = static_cast<char>(variableRunLength & 0xff);
+
+    // write header
+    writeByte(headerFirstByte);
+    writeByte(headerSecondByte);
+
+    // bit packing the zigzag encoded literals
+    writeInts(zigzagLiterals, 0, numLiterals, fb);
+
+    // reset run length
+    variableRunLength = 0;
+}
+
+void RleEncoderV2::writePatchedBasedValues(EncodingOption& option) {
+    // NOTE: Aligned bit packing cannot be applied for PATCHED_BASE encoding
+    // because patch is applied to MSB bits. For example: If fixed bit width of
+    // base value is 7 bits and if patch is 3 bits, the actual value is
+    // constructed by shifting the patch to left by 7 positions.
+    // actual_value = patch << 7 | base_value
+    // So, if we align base_value then actual_value can not be reconstructed.
+
+    // write the number of fixed bits required in next 5 bits
+    const uint32_t efb = encodeBitWidth(option.brBits95p) << 1;
+
+    // adjust variable run length, they are one off
+    variableRunLength -= 1;
+
+    // extract the 9th bit of run length
+    const uint32_t tailBits = (variableRunLength & 0x100) >> 8;
+
+    // create first byte of the header
+    const char headerFirstByte = static_cast<char>(getOpCode(PATCHED_BASE) | efb | tailBits);
+
+    // second byte of the header stores the remaining 8 bits of runlength
+    const char headerSecondByte = static_cast<char>(variableRunLength & 0xff);
+
+    // if the min value is negative toggle the sign
+    const bool isNegative = (option.min < 0);
+    if (isNegative) {
+        option.min = -option.min;
+    }
+
+    // find the number of bytes required for base and shift it by 5 bits
+    // to accommodate patch width. The additional bit is used to store the sign
+    // of the base value.
+    const uint32_t baseWidth = findClosestNumBits(option.min) + 1;
+    const uint32_t baseBytes = baseWidth % 8 == 0 ? baseWidth / 8 : (baseWidth / 8) + 1;
+    const uint32_t bb = (baseBytes - 1) << 5;
+
+    // if the base value is negative then set MSB to 1
+    if (isNegative) {
+        option.min |= (1LL << ((baseBytes * 8) - 1));
+    }
+
+    // third byte contains 3 bits for number of bytes occupied by base
+    // and 5 bits for patchWidth
+    const char headerThirdByte = static_cast<char>(bb | encodeBitWidth(option.patchWidth));
+
+    // fourth byte contains 3 bits for page gap width and 5 bits for
+    // patch length
+    const char headerFourthByte = static_cast<char>((option.patchGapWidth - 1) << 5 | option.patchLength);
+
+    // write header
+    writeByte(headerFirstByte);
+    writeByte(headerSecondByte);
+    writeByte(headerThirdByte);
+    writeByte(headerFourthByte);
+
+    // write the base value using fixed bytes in big endian order
+    for(int32_t i = static_cast<int32_t>(baseBytes - 1); i >= 0; i--) {
+        char b = static_cast<char>(((option.min >> (i * 8)) & 0xff));
+        writeByte(b);
+    }
+
+    // base reduced literals are bit packed
+    uint32_t closestFixedBits = getClosestFixedBits(option.brBits95p);
+
+    writeInts(baseRedLiterals, 0, numLiterals, closestFixedBits);
+
+    // write patch list
+    closestFixedBits = getClosestFixedBits(option.patchGapWidth + option.patchWidth);
+
+    writeInts(gapVsPatchList, 0, option.patchLength, closestFixedBits);
+
+    // reset run length
+    variableRunLength = 0;
+}
+
+void RleEncoderV2::writeDeltaValues(EncodingOption& option) {
+    uint32_t len = 0;
+    uint32_t fb = option.bitsDeltaMax;
+    uint32_t efb = 0;
+
+    if (alignedBitPacking) {
+        fb = getClosestAlignedFixedBits(fb);
+    }
+
+    if (option.isFixedDelta) {
+        // if fixed run length is greater than threshold then it will be fixed
+        // delta sequence with delta value 0 else fixed delta sequence with
+        // non-zero delta value
+        if (fixedRunLength > MIN_REPEAT) {
+            // ex. sequence: 2 2 2 2 2 2 2 2
+            len = fixedRunLength - 1;
+            fixedRunLength = 0;
+        } else {
+            // ex. sequence: 4 6 8 10 12 14 16
+            len = variableRunLength - 1;
+            variableRunLength = 0;
+        }
+    } else {
+        // fixed width 0 is used for long repeating values.
+        // sequences that require only 1 bit to encode will have an additional bit
+        if (fb == 1) {
+            fb = 2;
+        }
+        efb = encodeBitWidth(fb) << 1;
+        len = variableRunLength - 1;
+        variableRunLength = 0;
+    }
+
+    // extract the 9th bit of run length
+    const uint32_t tailBits = (len & 0x100) >> 8;
+
+    // create first byte of the header
+    const char headerFirstByte = static_cast<char>(getOpCode(DELTA) | efb | tailBits);
+
+    // second byte of the header stores the remaining 8 bits of runlength
+    const char headerSecondByte = static_cast<char>(len & 0xff);
+
+    // write header
+    writeByte(headerFirstByte);
+    writeByte(headerSecondByte);
+
+    // store the first value from zigzag literal array
+    if (isSigned) {
+        writeVslong(literals[0]);
+    } else {
+        writeVulong(literals[0]);
+    }
+
+    if (option.isFixedDelta) {
+        // if delta is fixed then we don't need to store delta blob
+        writeVslong(option.fixedDelta);
+    } else {
+        // store the first value as delta value using zigzag encoding
+        writeVslong(adjDeltas[0]);
+
+        // adjacent delta values are bit packed. The length of adjDeltas array is
+        // always one less than the number of literals (delta difference for n
+        // elements is n-1). We have already written one element, write the
+        // remaining numLiterals - 2 elements here
+        writeInts(adjDeltas, 1, numLiterals - 2, fb);
+    }
+}
+
+void RleEncoderV2::writeInts(int64_t* input, uint32_t offset, size_t len, uint32_t bitSize) {
+  if(input == nullptr || len < 1 || bitSize < 1) {
+      return;
+  }
+
+  if (getClosestAlignedFixedBits(bitSize) == bitSize) {
+    uint32_t numBytes;
+    uint32_t endOffSet = static_cast<uint32_t>(offset + len);
     if (bitSize < 8 ) {
-      char bitMask = static_cast<char>((1 << bitSize) - 1); 
-      uint32_t numHops = 8 / bitSize; 
-      uint32_t remainder = static_cast<uint32_t>(len % numHops); 
-      uint32_t endUnroll = endOffSet - remainder; 
-      for (uint32_t i = offset; i < endUnroll; i+=numHops) { 
-        char toWrite = 0; 
-        for (uint32_t j = 0; j < numHops; ++j) { 
-          toWrite |= static_cast<char>((input[i+j] & bitMask) << (8 - (j + 1) * bitSize)); 
-        } 
-        writeByte(toWrite); 
-      } 
- 
-      if (remainder > 0) { 
-        uint32_t startShift = 8 - bitSize; 
-        char toWrite = 0; 
-        for (uint32_t i = endUnroll; i < endOffSet; ++i) { 
-          toWrite |= static_cast<char>((input[i] & bitMask) << startShift); 
-          startShift -= bitSize; 
-        } 
-        writeByte(toWrite); 
-      } 
- 
-    } else { 
-      numBytes = bitSize / 8; 
- 
-      for (uint32_t i = offset; i < endOffSet; ++i) { 
-        for (uint32_t j = 0; j < numBytes; ++j) { 
-          char toWrite = static_cast<char>((input[i] >> (8 * (numBytes - j - 1))) & 255); 
-          writeByte(toWrite); 
-        } 
-      } 
-    } 
- 
-    return; 
-  } 
- 
-  // write for unaligned bit size 
-  uint32_t bitsLeft = 8; 
-  char current = 0; 
-  for(uint32_t i = offset; i < (offset + len); i++) { 
-    int64_t value = input[i]; 
-    uint32_t bitsToWrite = bitSize; 
-    while (bitsToWrite > bitsLeft) { 
-      // add the bits to the bottom of the current word 
-      current |= static_cast<char>(value >> (bitsToWrite - bitsLeft)); 
-      // subtract out the bits we just added 
-      bitsToWrite -= bitsLeft; 
-      // zero out the bits above bitsToWrite 
-      value &= (static_cast<uint64_t>(1) << bitsToWrite) - 1; 
-      writeByte(current); 
-      current = 0; 
-      bitsLeft = 8; 
-    } 
-    bitsLeft -= bitsToWrite; 
-    current |= static_cast<char>(value << bitsLeft); 
-    if (bitsLeft == 0) { 
-      writeByte(current); 
-      current = 0; 
-      bitsLeft = 8; 
-    } 
-  } 
- 
-  // flush 
-  if (bitsLeft != 8) { 
-    writeByte(current); 
-  } 
-} 
- 
-void RleEncoderV2::initializeLiterals(int64_t val) { 
-    literals[numLiterals++] = val; 
-    fixedRunLength = 1; 
-    variableRunLength = 1; 
-} 
-} 
+      char bitMask = static_cast<char>((1 << bitSize) - 1);
+      uint32_t numHops = 8 / bitSize;
+      uint32_t remainder = static_cast<uint32_t>(len % numHops);
+      uint32_t endUnroll = endOffSet - remainder;
+      for (uint32_t i = offset; i < endUnroll; i+=numHops) {
+        char toWrite = 0;
+        for (uint32_t j = 0; j < numHops; ++j) {
+          toWrite |= static_cast<char>((input[i+j] & bitMask) << (8 - (j + 1) * bitSize));
+        }
+        writeByte(toWrite);
+      }
+
+      if (remainder > 0) {
+        uint32_t startShift = 8 - bitSize;
+        char toWrite = 0;
+        for (uint32_t i = endUnroll; i < endOffSet; ++i) {
+          toWrite |= static_cast<char>((input[i] & bitMask) << startShift);
+          startShift -= bitSize;
+        }
+        writeByte(toWrite);
+      }
+
+    } else {
+      numBytes = bitSize / 8;
+
+      for (uint32_t i = offset; i < endOffSet; ++i) {
+        for (uint32_t j = 0; j < numBytes; ++j) {
+          char toWrite = static_cast<char>((input[i] >> (8 * (numBytes - j - 1))) & 255);
+          writeByte(toWrite);
+        }
+      }
+    }
+
+    return;
+  }
+
+  // write for unaligned bit size
+  uint32_t bitsLeft = 8;
+  char current = 0;
+  for(uint32_t i = offset; i < (offset + len); i++) {
+    int64_t value = input[i];
+    uint32_t bitsToWrite = bitSize;
+    while (bitsToWrite > bitsLeft) {
+      // add the bits to the bottom of the current word
+      current |= static_cast<char>(value >> (bitsToWrite - bitsLeft));
+      // subtract out the bits we just added
+      bitsToWrite -= bitsLeft;
+      // zero out the bits above bitsToWrite
+      value &= (static_cast<uint64_t>(1) << bitsToWrite) - 1;
+      writeByte(current);
+      current = 0;
+      bitsLeft = 8;
+    }
+    bitsLeft -= bitsToWrite;
+    current |= static_cast<char>(value << bitsLeft);
+    if (bitsLeft == 0) {
+      writeByte(current);
+      current = 0;
+      bitsLeft = 8;
+    }
+  }
+
+  // flush
+  if (bitsLeft != 8) {
+    writeByte(current);
+  }
+}
+
+void RleEncoderV2::initializeLiterals(int64_t val) {
+    literals[numLiterals++] = val;
+    fixedRunLength = 1;
+    variableRunLength = 1;
+}
+}
diff --git a/contrib/libs/apache/orc/c++/src/Statistics.cc b/contrib/libs/apache/orc/c++/src/Statistics.cc
index f13381b5b0..2401f5e0cb 100644
--- a/contrib/libs/apache/orc/c++/src/Statistics.cc
+++ b/contrib/libs/apache/orc/c++/src/Statistics.cc
@@ -1,408 +1,408 @@
- /** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/Exceptions.hh" 
-#include "RLE.hh" 
-#include "Statistics.hh" 
- 
-#include "wrap/coded-stream-wrapper.h" 
- 
-namespace orc { 
- 
-  ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s, 
-                                            const StatContext& statContext) { 
-    if (s.has_intstatistics()) { 
-      return new IntegerColumnStatisticsImpl(s); 
-    } else if (s.has_doublestatistics()) { 
-      return new DoubleColumnStatisticsImpl(s); 
-    } else if (s.has_stringstatistics()) { 
-      return new StringColumnStatisticsImpl(s, statContext); 
-    } else if (s.has_bucketstatistics()) { 
-      return new BooleanColumnStatisticsImpl(s, statContext); 
-    } else if (s.has_decimalstatistics()) { 
-      return new DecimalColumnStatisticsImpl(s, statContext); 
-    } else if (s.has_timestampstatistics()) { 
-      return new TimestampColumnStatisticsImpl(s, statContext); 
-    } else if (s.has_datestatistics()) { 
-      return new DateColumnStatisticsImpl(s, statContext); 
-    } else if (s.has_binarystatistics()) { 
-      return new BinaryColumnStatisticsImpl(s, statContext); 
-    } else { 
-      return new ColumnStatisticsImpl(s); 
-    } 
-  } 
- 
-  StatisticsImpl::StatisticsImpl(const proto::StripeStatistics& stripeStats, 
-                                 const StatContext& statContext) { 
-    for(int i = 0; i < stripeStats.colstats_size(); i++) { 
-      colStats.push_back( 
-                convertColumnStatistics(stripeStats.colstats(i), statContext)); 
-    } 
-  } 
- 
-  StatisticsImpl::StatisticsImpl(const proto::Footer& footer, 
-                                 const StatContext& statContext) { 
-    for(int i = 0; i < footer.statistics_size(); i++) { 
-      colStats.push_back( 
-                convertColumnStatistics(footer.statistics(i), statContext)); 
-    } 
-  } 
- 
-  StatisticsImpl::~StatisticsImpl() { 
-    for(std::vector<ColumnStatistics*>::iterator ptr = colStats.begin(); 
-        ptr != colStats.end(); 
-        ++ptr) { 
-      delete *ptr; 
-    } 
-  } 
- 
-  Statistics::~Statistics() { 
-    // PASS 
-  } 
- 
-  StripeStatistics::~StripeStatistics() { 
-    // PASS 
-  } 
- 
-  StripeStatisticsImpl::~StripeStatisticsImpl() { 
-    // PASS 
-  } 
- 
-  StripeStatisticsImpl::StripeStatisticsImpl( 
-                const proto::StripeStatistics& stripeStats, 
-                std::vector<std::vector<proto::ColumnStatistics> >& indexStats, 
-                const StatContext& statContext) { 
-    columnStats.reset(new StatisticsImpl(stripeStats, statContext)); 
-    rowIndexStats.resize(indexStats.size()); 
-    for(size_t i = 0; i < rowIndexStats.size(); i++) { 
-      for(size_t j = 0; j < indexStats[i].size(); j++) { 
-        rowIndexStats[i].push_back( 
-            std::shared_ptr<const ColumnStatistics>( 
-                convertColumnStatistics(indexStats[i][j], statContext))); 
-      } 
-    } 
-  } 
- 
- 
-  ColumnStatistics::~ColumnStatistics() { 
-    // PASS 
-  } 
- 
-  BinaryColumnStatistics::~BinaryColumnStatistics() { 
-    // PASS 
-  } 
- 
-  BooleanColumnStatistics::~BooleanColumnStatistics() { 
-    // PASS 
-  } 
- 
-  DateColumnStatistics::~DateColumnStatistics() { 
-    // PASS 
-  } 
- 
-  DecimalColumnStatistics::~DecimalColumnStatistics() { 
-    // PASS 
-  } 
- 
-  DoubleColumnStatistics::~DoubleColumnStatistics() { 
-    // PASS 
-  } 
- 
-  IntegerColumnStatistics::~IntegerColumnStatistics() { 
-    // PASS 
-  } 
- 
-  StringColumnStatistics::~StringColumnStatistics() { 
-    // PASS 
-  } 
- 
-  TimestampColumnStatistics::~TimestampColumnStatistics() { 
-    // PASS 
-  } 
- 
-  MutableColumnStatistics::~MutableColumnStatistics() { 
-    // PASS 
-  } 
- 
-  ColumnStatisticsImpl::~ColumnStatisticsImpl() { 
-    // PASS 
-  } 
- 
-  BinaryColumnStatisticsImpl::~BinaryColumnStatisticsImpl() { 
-    // PASS 
-  } 
- 
-  BooleanColumnStatisticsImpl::~BooleanColumnStatisticsImpl() { 
-    // PASS 
-  } 
- 
-  DateColumnStatisticsImpl::~DateColumnStatisticsImpl() { 
-    // PASS 
-  } 
- 
-  DecimalColumnStatisticsImpl::~DecimalColumnStatisticsImpl() { 
-    // PASS 
-  } 
- 
-  DoubleColumnStatisticsImpl::~DoubleColumnStatisticsImpl() { 
-    // PASS 
-  } 
- 
-  IntegerColumnStatisticsImpl::~IntegerColumnStatisticsImpl() { 
-    // PASS 
-  } 
- 
-  StringColumnStatisticsImpl::~StringColumnStatisticsImpl() { 
-    // PASS 
-  } 
- 
-  TimestampColumnStatisticsImpl::~TimestampColumnStatisticsImpl() { 
-    // PASS 
-  } 
- 
-  ColumnStatisticsImpl::ColumnStatisticsImpl 
-  (const proto::ColumnStatistics& pb) { 
-    _stats.setNumberOfValues(pb.numberofvalues()); 
-    _stats.setHasNull(pb.hasnull()); 
-  } 
- 
-  BinaryColumnStatisticsImpl::BinaryColumnStatisticsImpl 
-  (const proto::ColumnStatistics& pb, const StatContext& statContext){ 
-    _stats.setNumberOfValues(pb.numberofvalues()); 
-    _stats.setHasNull(pb.hasnull()); 
-    if (pb.has_binarystatistics() && statContext.correctStats) { 
-      _stats.setHasTotalLength(pb.binarystatistics().has_sum()); 
-      _stats.setTotalLength( 
-          static_cast<uint64_t>(pb.binarystatistics().sum())); 
-    } 
-  } 
- 
-  BooleanColumnStatisticsImpl::BooleanColumnStatisticsImpl 
-  (const proto::ColumnStatistics& pb, const StatContext& statContext){ 
-    _stats.setNumberOfValues(pb.numberofvalues()); 
-    _stats.setHasNull(pb.hasnull()); 
-    if (pb.has_bucketstatistics() && statContext.correctStats) { 
-      _hasCount = true; 
-      _trueCount = pb.bucketstatistics().count(0); 
-    } else { 
-      _hasCount = false; 
-      _trueCount = 0; 
-    } 
-  } 
- 
-  DateColumnStatisticsImpl::DateColumnStatisticsImpl 
-  (const proto::ColumnStatistics& pb, const StatContext& statContext){ 
-    _stats.setNumberOfValues(pb.numberofvalues()); 
-    _stats.setHasNull(pb.hasnull()); 
-    if (!pb.has_datestatistics() || !statContext.correctStats) { 
-      // hasMinimum_ is false by default; 
-      // hasMaximum_ is false by default; 
-      _stats.setMinimum(0); 
-      _stats.setMaximum(0); 
-    } else { 
-      _stats.setHasMinimum(pb.datestatistics().has_minimum()); 
-      _stats.setHasMaximum(pb.datestatistics().has_maximum()); 
-      _stats.setMinimum(pb.datestatistics().minimum()); 
-      _stats.setMaximum(pb.datestatistics().maximum()); 
-    } 
-  } 
- 
-  DecimalColumnStatisticsImpl::DecimalColumnStatisticsImpl 
-  (const proto::ColumnStatistics& pb, const StatContext& statContext){ 
-    _stats.setNumberOfValues(pb.numberofvalues()); 
-    _stats.setHasNull(pb.hasnull()); 
-    if (pb.has_decimalstatistics() && statContext.correctStats) { 
-      const proto::DecimalStatistics& stats = pb.decimalstatistics(); 
-      _stats.setHasMinimum(stats.has_minimum()); 
-      _stats.setHasMaximum(stats.has_maximum()); 
-      _stats.setHasSum(stats.has_sum()); 
- 
-      _stats.setMinimum(Decimal(stats.minimum())); 
-      _stats.setMaximum(Decimal(stats.maximum())); 
-      _stats.setSum(Decimal(stats.sum())); 
-    } 
-  } 
- 
-  DoubleColumnStatisticsImpl::DoubleColumnStatisticsImpl 
-  (const proto::ColumnStatistics& pb){ 
-    _stats.setNumberOfValues(pb.numberofvalues()); 
-    _stats.setHasNull(pb.hasnull()); 
-    if (!pb.has_doublestatistics()) { 
-      _stats.setMinimum(0); 
-      _stats.setMaximum(0); 
-      _stats.setSum(0); 
-    }else{ 
-      const proto::DoubleStatistics& stats = pb.doublestatistics(); 
-      _stats.setHasMinimum(stats.has_minimum()); 
-      _stats.setHasMaximum(stats.has_maximum()); 
-      _stats.setHasSum(stats.has_sum()); 
- 
-      _stats.setMinimum(stats.minimum()); 
-      _stats.setMaximum(stats.maximum()); 
-      _stats.setSum(stats.sum()); 
-    } 
-  } 
- 
-  IntegerColumnStatisticsImpl::IntegerColumnStatisticsImpl 
-  (const proto::ColumnStatistics& pb){ 
-    _stats.setNumberOfValues(pb.numberofvalues()); 
-    _stats.setHasNull(pb.hasnull()); 
-    if (!pb.has_intstatistics()) { 
-      _stats.setMinimum(0); 
-      _stats.setMaximum(0); 
-      _stats.setSum(0); 
-    }else{ 
-      const proto::IntegerStatistics& stats = pb.intstatistics(); 
-      _stats.setHasMinimum(stats.has_minimum()); 
-      _stats.setHasMaximum(stats.has_maximum()); 
-      _stats.setHasSum(stats.has_sum()); 
- 
-      _stats.setMinimum(stats.minimum()); 
-      _stats.setMaximum(stats.maximum()); 
-      _stats.setSum(stats.sum()); 
-    } 
-  } 
- 
-  StringColumnStatisticsImpl::StringColumnStatisticsImpl 
-  (const proto::ColumnStatistics& pb, const StatContext& statContext){ 
-    _stats.setNumberOfValues(pb.numberofvalues()); 
-    _stats.setHasNull(pb.hasnull()); 
-    if (!pb.has_stringstatistics() || !statContext.correctStats) { 
-      _stats.setTotalLength(0); 
-    }else{ 
-      const proto::StringStatistics& stats = pb.stringstatistics(); 
-      _stats.setHasMinimum(stats.has_minimum()); 
-      _stats.setHasMaximum(stats.has_maximum()); 
-      _stats.setHasTotalLength(stats.has_sum()); 
- 
-      _stats.setMinimum(stats.minimum()); 
-      _stats.setMaximum(stats.maximum()); 
-      _stats.setTotalLength(static_cast<uint64_t>(stats.sum())); 
-    } 
-  } 
- 
-  TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl 
-  (const proto::ColumnStatistics& pb, const StatContext& statContext) { 
-    _stats.setNumberOfValues(pb.numberofvalues()); 
-    _stats.setHasNull(pb.hasnull()); 
-    if (!pb.has_timestampstatistics() || !statContext.correctStats) { 
-      _stats.setMinimum(0); 
-      _stats.setMaximum(0); 
-      _lowerBound = 0; 
-      _upperBound = 0; 
-    }else{ 
-      const proto::TimestampStatistics& stats = pb.timestampstatistics(); 
-      _stats.setHasMinimum( 
-                stats.has_minimumutc() || 
-                (stats.has_minimum() && (statContext.writerTimezone != nullptr))); 
-      _stats.setHasMaximum( 
-                stats.has_maximumutc() || 
-                (stats.has_maximum() && (statContext.writerTimezone != nullptr))); 
-      _hasLowerBound = stats.has_minimumutc() || stats.has_minimum(); 
-      _hasUpperBound = stats.has_maximumutc() || stats.has_maximum(); 
- 
-      // Timestamp stats are stored in milliseconds 
-      if (stats.has_minimumutc()) { 
-        int64_t minimum = stats.minimumutc(); 
-        _stats.setMinimum(minimum); 
-        _lowerBound = minimum; 
-      } else if (statContext.writerTimezone) { 
-        int64_t writerTimeSec = stats.minimum() / 1000; 
-        // multiply the offset by 1000 to convert to millisecond 
-        int64_t minimum = 
-          stats.minimum() + 
-            (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset) 
-              * 1000; 
-        _stats.setMinimum(minimum); 
-        _lowerBound = minimum; 
-      } else { 
-        _stats.setMinimum(0); 
-        // subtract 1 day 1 hour (25 hours) in milliseconds to handle unknown 
-        // TZ and daylight savings 
-        _lowerBound = stats.minimum() - (25 * SECONDS_PER_HOUR * 1000); 
-      } 
- 
-      // Timestamp stats are stored in milliseconds 
-      if (stats.has_maximumutc()) { 
-        int64_t maximum = stats.maximumutc(); 
-        _stats.setMaximum(maximum); 
-        _upperBound = maximum; 
-      } else if (statContext.writerTimezone) { 
-        int64_t writerTimeSec = stats.maximum() / 1000; 
-        // multiply the offset by 1000 to convert to millisecond 
-        int64_t maximum = stats.maximum() + 
-          (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset) 
-            * 1000; 
-        _stats.setMaximum(maximum); 
-        _upperBound = maximum; 
-      } else { 
-        _stats.setMaximum(0); 
-        // add 1 day 1 hour (25 hours) in milliseconds to handle unknown 
-        // TZ and daylight savings 
-        _upperBound = stats.maximum() +  (25 * SECONDS_PER_HOUR * 1000); 
-      } 
-      // Add 1 millisecond to account for microsecond precision of values 
-      _upperBound += 1; 
-    } 
-  } 
- 
-  std::unique_ptr<MutableColumnStatistics> createColumnStatistics( 
-    const Type& type) { 
-    switch (static_cast<int64_t>(type.getKind())) { 
-      case BOOLEAN: 
-        return std::unique_ptr<MutableColumnStatistics>( 
-          new BooleanColumnStatisticsImpl()); 
-      case BYTE: 
-      case INT: 
-      case LONG: 
-      case SHORT: 
-        return std::unique_ptr<MutableColumnStatistics>( 
-          new IntegerColumnStatisticsImpl()); 
-      case STRUCT: 
-      case MAP: 
-      case LIST: 
-      case UNION: 
-        return std::unique_ptr<MutableColumnStatistics>( 
-          new ColumnStatisticsImpl()); 
-      case FLOAT: 
-      case DOUBLE: 
-        return std::unique_ptr<MutableColumnStatistics>( 
-          new DoubleColumnStatisticsImpl()); 
-      case BINARY: 
-        return std::unique_ptr<MutableColumnStatistics>( 
-          new BinaryColumnStatisticsImpl()); 
-      case STRING: 
-      case CHAR: 
-      case VARCHAR: 
-        return std::unique_ptr<MutableColumnStatistics>( 
-          new StringColumnStatisticsImpl()); 
-      case DATE: 
-        return std::unique_ptr<MutableColumnStatistics>( 
-          new DateColumnStatisticsImpl()); 
-      case TIMESTAMP: 
-        return std::unique_ptr<MutableColumnStatistics>( 
-          new TimestampColumnStatisticsImpl()); 
-      case DECIMAL: 
-        return std::unique_ptr<MutableColumnStatistics>( 
-          new DecimalColumnStatisticsImpl()); 
-      default: 
-        throw NotImplementedYet("Not supported type: " + type.toString()); 
-    } 
-  } 
- 
-}// namespace 
+ /**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Exceptions.hh"
+#include "RLE.hh"
+#include "Statistics.hh"
+
+#include "wrap/coded-stream-wrapper.h"
+
+namespace orc {
+
+  ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s,
+                                            const StatContext& statContext) {
+    if (s.has_intstatistics()) {
+      return new IntegerColumnStatisticsImpl(s);
+    } else if (s.has_doublestatistics()) {
+      return new DoubleColumnStatisticsImpl(s);
+    } else if (s.has_stringstatistics()) {
+      return new StringColumnStatisticsImpl(s, statContext);
+    } else if (s.has_bucketstatistics()) {
+      return new BooleanColumnStatisticsImpl(s, statContext);
+    } else if (s.has_decimalstatistics()) {
+      return new DecimalColumnStatisticsImpl(s, statContext);
+    } else if (s.has_timestampstatistics()) {
+      return new TimestampColumnStatisticsImpl(s, statContext);
+    } else if (s.has_datestatistics()) {
+      return new DateColumnStatisticsImpl(s, statContext);
+    } else if (s.has_binarystatistics()) {
+      return new BinaryColumnStatisticsImpl(s, statContext);
+    } else {
+      return new ColumnStatisticsImpl(s);
+    }
+  }
+
+  StatisticsImpl::StatisticsImpl(const proto::StripeStatistics& stripeStats,
+                                 const StatContext& statContext) {
+    for(int i = 0; i < stripeStats.colstats_size(); i++) {
+      colStats.push_back(
+                convertColumnStatistics(stripeStats.colstats(i), statContext));
+    }
+  }
+
+  StatisticsImpl::StatisticsImpl(const proto::Footer& footer,
+                                 const StatContext& statContext) {
+    for(int i = 0; i < footer.statistics_size(); i++) {
+      colStats.push_back(
+                convertColumnStatistics(footer.statistics(i), statContext));
+    }
+  }
+
+  StatisticsImpl::~StatisticsImpl() {
+    for(std::vector<ColumnStatistics*>::iterator ptr = colStats.begin();
+        ptr != colStats.end();
+        ++ptr) {
+      delete *ptr;
+    }
+  }
+
+  Statistics::~Statistics() {
+    // PASS
+  }
+
+  StripeStatistics::~StripeStatistics() {
+    // PASS
+  }
+
+  StripeStatisticsImpl::~StripeStatisticsImpl() {
+    // PASS
+  }
+
+  StripeStatisticsImpl::StripeStatisticsImpl(
+                const proto::StripeStatistics& stripeStats,
+                std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
+                const StatContext& statContext) {
+    columnStats.reset(new StatisticsImpl(stripeStats, statContext));
+    rowIndexStats.resize(indexStats.size());
+    for(size_t i = 0; i < rowIndexStats.size(); i++) {
+      for(size_t j = 0; j < indexStats[i].size(); j++) {
+        rowIndexStats[i].push_back(
+            std::shared_ptr<const ColumnStatistics>(
+                convertColumnStatistics(indexStats[i][j], statContext)));
+      }
+    }
+  }
+
+
+  ColumnStatistics::~ColumnStatistics() {
+    // PASS
+  }
+
+  BinaryColumnStatistics::~BinaryColumnStatistics() {
+    // PASS
+  }
+
+  BooleanColumnStatistics::~BooleanColumnStatistics() {
+    // PASS
+  }
+
+  DateColumnStatistics::~DateColumnStatistics() {
+    // PASS
+  }
+
+  DecimalColumnStatistics::~DecimalColumnStatistics() {
+    // PASS
+  }
+
+  DoubleColumnStatistics::~DoubleColumnStatistics() {
+    // PASS
+  }
+
+  IntegerColumnStatistics::~IntegerColumnStatistics() {
+    // PASS
+  }
+
+  StringColumnStatistics::~StringColumnStatistics() {
+    // PASS
+  }
+
+  TimestampColumnStatistics::~TimestampColumnStatistics() {
+    // PASS
+  }
+
+  MutableColumnStatistics::~MutableColumnStatistics() {
+    // PASS
+  }
+
+  ColumnStatisticsImpl::~ColumnStatisticsImpl() {
+    // PASS
+  }
+
+  BinaryColumnStatisticsImpl::~BinaryColumnStatisticsImpl() {
+    // PASS
+  }
+
+  BooleanColumnStatisticsImpl::~BooleanColumnStatisticsImpl() {
+    // PASS
+  }
+
+  DateColumnStatisticsImpl::~DateColumnStatisticsImpl() {
+    // PASS
+  }
+
+  DecimalColumnStatisticsImpl::~DecimalColumnStatisticsImpl() {
+    // PASS
+  }
+
+  DoubleColumnStatisticsImpl::~DoubleColumnStatisticsImpl() {
+    // PASS
+  }
+
+  IntegerColumnStatisticsImpl::~IntegerColumnStatisticsImpl() {
+    // PASS
+  }
+
+  StringColumnStatisticsImpl::~StringColumnStatisticsImpl() {
+    // PASS
+  }
+
+  TimestampColumnStatisticsImpl::~TimestampColumnStatisticsImpl() {
+    // PASS
+  }
+
+  ColumnStatisticsImpl::ColumnStatisticsImpl
+  (const proto::ColumnStatistics& pb) {
+    _stats.setNumberOfValues(pb.numberofvalues());
+    _stats.setHasNull(pb.hasnull());
+  }
+
+  BinaryColumnStatisticsImpl::BinaryColumnStatisticsImpl
+  (const proto::ColumnStatistics& pb, const StatContext& statContext){
+    _stats.setNumberOfValues(pb.numberofvalues());
+    _stats.setHasNull(pb.hasnull());
+    if (pb.has_binarystatistics() && statContext.correctStats) {
+      _stats.setHasTotalLength(pb.binarystatistics().has_sum());
+      _stats.setTotalLength(
+          static_cast<uint64_t>(pb.binarystatistics().sum()));
+    }
+  }
+
+  BooleanColumnStatisticsImpl::BooleanColumnStatisticsImpl
+  (const proto::ColumnStatistics& pb, const StatContext& statContext){
+    _stats.setNumberOfValues(pb.numberofvalues());
+    _stats.setHasNull(pb.hasnull());
+    if (pb.has_bucketstatistics() && statContext.correctStats) {
+      _hasCount = true;
+      _trueCount = pb.bucketstatistics().count(0);
+    } else {
+      _hasCount = false;
+      _trueCount = 0;
+    }
+  }
+
+  DateColumnStatisticsImpl::DateColumnStatisticsImpl
+  (const proto::ColumnStatistics& pb, const StatContext& statContext){
+    _stats.setNumberOfValues(pb.numberofvalues());
+    _stats.setHasNull(pb.hasnull());
+    if (!pb.has_datestatistics() || !statContext.correctStats) {
+      // hasMinimum_ is false by default;
+      // hasMaximum_ is false by default;
+      _stats.setMinimum(0);
+      _stats.setMaximum(0);
+    } else {
+      _stats.setHasMinimum(pb.datestatistics().has_minimum());
+      _stats.setHasMaximum(pb.datestatistics().has_maximum());
+      _stats.setMinimum(pb.datestatistics().minimum());
+      _stats.setMaximum(pb.datestatistics().maximum());
+    }
+  }
+
+  DecimalColumnStatisticsImpl::DecimalColumnStatisticsImpl
+  (const proto::ColumnStatistics& pb, const StatContext& statContext){
+    _stats.setNumberOfValues(pb.numberofvalues());
+    _stats.setHasNull(pb.hasnull());
+    if (pb.has_decimalstatistics() && statContext.correctStats) {
+      const proto::DecimalStatistics& stats = pb.decimalstatistics();
+      _stats.setHasMinimum(stats.has_minimum());
+      _stats.setHasMaximum(stats.has_maximum());
+      _stats.setHasSum(stats.has_sum());
+
+      _stats.setMinimum(Decimal(stats.minimum()));
+      _stats.setMaximum(Decimal(stats.maximum()));
+      _stats.setSum(Decimal(stats.sum()));
+    }
+  }
+
+  DoubleColumnStatisticsImpl::DoubleColumnStatisticsImpl
+  (const proto::ColumnStatistics& pb){
+    _stats.setNumberOfValues(pb.numberofvalues());
+    _stats.setHasNull(pb.hasnull());
+    if (!pb.has_doublestatistics()) {
+      _stats.setMinimum(0);
+      _stats.setMaximum(0);
+      _stats.setSum(0);
+    }else{
+      const proto::DoubleStatistics& stats = pb.doublestatistics();
+      _stats.setHasMinimum(stats.has_minimum());
+      _stats.setHasMaximum(stats.has_maximum());
+      _stats.setHasSum(stats.has_sum());
+
+      _stats.setMinimum(stats.minimum());
+      _stats.setMaximum(stats.maximum());
+      _stats.setSum(stats.sum());
+    }
+  }
+
+  IntegerColumnStatisticsImpl::IntegerColumnStatisticsImpl
+  (const proto::ColumnStatistics& pb){
+    _stats.setNumberOfValues(pb.numberofvalues());
+    _stats.setHasNull(pb.hasnull());
+    if (!pb.has_intstatistics()) {
+      _stats.setMinimum(0);
+      _stats.setMaximum(0);
+      _stats.setSum(0);
+    }else{
+      const proto::IntegerStatistics& stats = pb.intstatistics();
+      _stats.setHasMinimum(stats.has_minimum());
+      _stats.setHasMaximum(stats.has_maximum());
+      _stats.setHasSum(stats.has_sum());
+
+      _stats.setMinimum(stats.minimum());
+      _stats.setMaximum(stats.maximum());
+      _stats.setSum(stats.sum());
+    }
+  }
+
+  StringColumnStatisticsImpl::StringColumnStatisticsImpl
+  (const proto::ColumnStatistics& pb, const StatContext& statContext){
+    _stats.setNumberOfValues(pb.numberofvalues());
+    _stats.setHasNull(pb.hasnull());
+    if (!pb.has_stringstatistics() || !statContext.correctStats) {
+      _stats.setTotalLength(0);
+    }else{
+      const proto::StringStatistics& stats = pb.stringstatistics();
+      _stats.setHasMinimum(stats.has_minimum());
+      _stats.setHasMaximum(stats.has_maximum());
+      _stats.setHasTotalLength(stats.has_sum());
+
+      _stats.setMinimum(stats.minimum());
+      _stats.setMaximum(stats.maximum());
+      _stats.setTotalLength(static_cast<uint64_t>(stats.sum()));
+    }
+  }
+
+  TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl
+  (const proto::ColumnStatistics& pb, const StatContext& statContext) {
+    _stats.setNumberOfValues(pb.numberofvalues());
+    _stats.setHasNull(pb.hasnull());
+    if (!pb.has_timestampstatistics() || !statContext.correctStats) {
+      _stats.setMinimum(0);
+      _stats.setMaximum(0);
+      _lowerBound = 0;
+      _upperBound = 0;
+    }else{
+      const proto::TimestampStatistics& stats = pb.timestampstatistics();
+      _stats.setHasMinimum(
+                stats.has_minimumutc() ||
+                (stats.has_minimum() && (statContext.writerTimezone != nullptr)));
+      _stats.setHasMaximum(
+                stats.has_maximumutc() ||
+                (stats.has_maximum() && (statContext.writerTimezone != nullptr)));
+      _hasLowerBound = stats.has_minimumutc() || stats.has_minimum();
+      _hasUpperBound = stats.has_maximumutc() || stats.has_maximum();
+
+      // Timestamp stats are stored in milliseconds
+      if (stats.has_minimumutc()) {
+        int64_t minimum = stats.minimumutc();
+        _stats.setMinimum(minimum);
+        _lowerBound = minimum;
+      } else if (statContext.writerTimezone) {
+        int64_t writerTimeSec = stats.minimum() / 1000;
+        // multiply the offset by 1000 to convert to millisecond
+        int64_t minimum =
+          stats.minimum() +
+            (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset)
+              * 1000;
+        _stats.setMinimum(minimum);
+        _lowerBound = minimum;
+      } else {
+        _stats.setMinimum(0);
+        // subtract 1 day 1 hour (25 hours) in milliseconds to handle unknown
+        // TZ and daylight savings
+        _lowerBound = stats.minimum() - (25 * SECONDS_PER_HOUR * 1000);
+      }
+
+      // Timestamp stats are stored in milliseconds
+      if (stats.has_maximumutc()) {
+        int64_t maximum = stats.maximumutc();
+        _stats.setMaximum(maximum);
+        _upperBound = maximum;
+      } else if (statContext.writerTimezone) {
+        int64_t writerTimeSec = stats.maximum() / 1000;
+        // multiply the offset by 1000 to convert to millisecond
+        int64_t maximum = stats.maximum() +
+          (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset)
+            * 1000;
+        _stats.setMaximum(maximum);
+        _upperBound = maximum;
+      } else {
+        _stats.setMaximum(0);
+        // add 1 day 1 hour (25 hours) in milliseconds to handle unknown
+        // TZ and daylight savings
+        _upperBound = stats.maximum() +  (25 * SECONDS_PER_HOUR * 1000);
+      }
+      // Add 1 millisecond to account for microsecond precision of values
+      _upperBound += 1;
+    }
+  }
+
+  std::unique_ptr<MutableColumnStatistics> createColumnStatistics(
+    const Type& type) {
+    switch (static_cast<int64_t>(type.getKind())) {
+      case BOOLEAN:
+        return std::unique_ptr<MutableColumnStatistics>(
+          new BooleanColumnStatisticsImpl());
+      case BYTE:
+      case INT:
+      case LONG:
+      case SHORT:
+        return std::unique_ptr<MutableColumnStatistics>(
+          new IntegerColumnStatisticsImpl());
+      case STRUCT:
+      case MAP:
+      case LIST:
+      case UNION:
+        return std::unique_ptr<MutableColumnStatistics>(
+          new ColumnStatisticsImpl());
+      case FLOAT:
+      case DOUBLE:
+        return std::unique_ptr<MutableColumnStatistics>(
+          new DoubleColumnStatisticsImpl());
+      case BINARY:
+        return std::unique_ptr<MutableColumnStatistics>(
+          new BinaryColumnStatisticsImpl());
+      case STRING:
+      case CHAR:
+      case VARCHAR:
+        return std::unique_ptr<MutableColumnStatistics>(
+          new StringColumnStatisticsImpl());
+      case DATE:
+        return std::unique_ptr<MutableColumnStatistics>(
+          new DateColumnStatisticsImpl());
+      case TIMESTAMP:
+        return std::unique_ptr<MutableColumnStatistics>(
+          new TimestampColumnStatisticsImpl());
+      case DECIMAL:
+        return std::unique_ptr<MutableColumnStatistics>(
+          new DecimalColumnStatisticsImpl());
+      default:
+        throw NotImplementedYet("Not supported type: " + type.toString());
+    }
+  }
+
+}// namespace
diff --git a/contrib/libs/apache/orc/c++/src/Statistics.hh b/contrib/libs/apache/orc/c++/src/Statistics.hh
index 849019d8d7..ee9db23f86 100644
--- a/contrib/libs/apache/orc/c++/src/Statistics.hh
+++ b/contrib/libs/apache/orc/c++/src/Statistics.hh
@@ -1,971 +1,971 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_STATISTICS_IMPL_HH 
-#define ORC_STATISTICS_IMPL_HH 
- 
-#include "orc/Common.hh" 
-#include "orc/Int128.hh" 
-#include "orc/OrcFile.hh" 
-#include "orc/Reader.hh" 
- 
-#include "Timezone.hh" 
-#include "TypeImpl.hh" 
- 
-namespace orc { 
- 
-/** 
- * StatContext contains fields required to compute statistics 
- */ 
- 
-  struct StatContext { 
-    const bool correctStats; 
-    const Timezone* const writerTimezone; 
-    StatContext() : correctStats(false), writerTimezone(nullptr) {} 
-    StatContext(bool cStat, const Timezone* const timezone = nullptr) : 
-        correctStats(cStat), writerTimezone(timezone) {} 
-  }; 
- 
-/** 
- * Internal Statistics Implementation 
- */ 
- 
-  template <typename T> 
-  class InternalStatisticsImpl { 
-  private: 
-    bool _hasNull; 
-    bool _hasMinimum; 
-    bool _hasMaximum; 
-    bool _hasSum; 
-    bool _hasTotalLength; 
-    uint64_t _totalLength; 
-    uint64_t _valueCount; 
-    T _minimum; 
-    T _maximum; 
-    T _sum; 
-  public: 
-    InternalStatisticsImpl() { 
-      _hasNull = false; 
-      _hasMinimum = false; 
-      _hasMaximum = false; 
-      _hasSum = false; 
-      _hasTotalLength = false; 
-      _totalLength = 0; 
-      _valueCount = 0; 
-    } 
- 
-    ~InternalStatisticsImpl() {} 
- 
-    // GET / SET _totalLength 
-    bool hasTotalLength() const { return _hasTotalLength; } 
- 
-    void setHasTotalLength(bool hasTotalLength) { 
-      _hasTotalLength = hasTotalLength; 
-    } 
- 
-    uint64_t getTotalLength() const { return _totalLength; } 
- 
-    void setTotalLength(uint64_t totalLength) { _totalLength = totalLength; } 
- 
-    // GET / SET _sum 
-    bool hasSum() const { return _hasSum; } 
- 
-    void setHasSum(bool hasSum) { _hasSum = hasSum; } 
- 
-    T getSum() const { return _sum; } 
- 
-    void setSum(T sum) { _sum = sum; } 
- 
-    // GET / SET _maximum 
-    bool hasMaximum() const { return _hasMaximum; } 
- 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_STATISTICS_IMPL_HH
+#define ORC_STATISTICS_IMPL_HH
+
+#include "orc/Common.hh"
+#include "orc/Int128.hh"
+#include "orc/OrcFile.hh"
+#include "orc/Reader.hh"
+
+#include "Timezone.hh"
+#include "TypeImpl.hh"
+
+namespace orc {
+
+/**
+ * StatContext contains fields required to compute statistics
+ */
+
+  struct StatContext {
+    const bool correctStats;
+    const Timezone* const writerTimezone;
+    StatContext() : correctStats(false), writerTimezone(nullptr) {}
+    StatContext(bool cStat, const Timezone* const timezone = nullptr) :
+        correctStats(cStat), writerTimezone(timezone) {}
+  };
+
+/**
+ * Internal Statistics Implementation
+ */
+
+  template <typename T>
+  class InternalStatisticsImpl {
+  private:
+    bool _hasNull;
+    bool _hasMinimum;
+    bool _hasMaximum;
+    bool _hasSum;
+    bool _hasTotalLength;
+    uint64_t _totalLength;
+    uint64_t _valueCount;
+    T _minimum;
+    T _maximum;
+    T _sum;
+  public:
+    InternalStatisticsImpl() {
+      _hasNull = false;
+      _hasMinimum = false;
+      _hasMaximum = false;
+      _hasSum = false;
+      _hasTotalLength = false;
+      _totalLength = 0;
+      _valueCount = 0;
+    }
+
+    ~InternalStatisticsImpl() {}
+
+    // GET / SET _totalLength
+    bool hasTotalLength() const { return _hasTotalLength; }
+
+    void setHasTotalLength(bool hasTotalLength) {
+      _hasTotalLength = hasTotalLength;
+    }
+
+    uint64_t getTotalLength() const { return _totalLength; }
+
+    void setTotalLength(uint64_t totalLength) { _totalLength = totalLength; }
+
+    // GET / SET _sum
+    bool hasSum() const { return _hasSum; }
+
+    void setHasSum(bool hasSum) { _hasSum = hasSum; }
+
+    T getSum() const { return _sum; }
+
+    void setSum(T sum) { _sum = sum; }
+
+    // GET / SET _maximum
+    bool hasMaximum() const { return _hasMaximum; }
+
     const T & getMaximum() const { return _maximum; }
- 
-    void setHasMaximum(bool hasMax) { _hasMaximum = hasMax; } 
- 
-    void setMaximum(T max) { _maximum = max; } 
- 
-    // GET / SET _minimum 
-    bool hasMinimum() const { return _hasMinimum; } 
- 
-    void setHasMinimum(bool hasMin) { _hasMinimum = hasMin; } 
- 
+
+    void setHasMaximum(bool hasMax) { _hasMaximum = hasMax; }
+
+    void setMaximum(T max) { _maximum = max; }
+
+    // GET / SET _minimum
+    bool hasMinimum() const { return _hasMinimum; }
+
+    void setHasMinimum(bool hasMin) { _hasMinimum = hasMin; }
+
     const T & getMinimum() const { return _minimum; }
- 
-    void setMinimum(T min) { _minimum = min; } 
- 
-    // GET / SET _valueCount 
-    uint64_t getNumberOfValues() const { return _valueCount; } 
- 
-    void setNumberOfValues(uint64_t numValues) { _valueCount = numValues; } 
- 
-    // GET / SET _hasNullValue 
-    bool hasNull() const { return _hasNull; } 
- 
-    void setHasNull(bool hasNull) { _hasNull = hasNull; } 
- 
-    void reset() { 
-      _hasNull = false; 
-      _hasMinimum = false; 
-      _hasMaximum = false; 
-      _hasSum = false; 
-      _hasTotalLength = false; 
-      _totalLength = 0; 
-      _valueCount = 0; 
-    } 
- 
-    void updateMinMax(T value) { 
-      if (!_hasMinimum) { 
-        _hasMinimum = _hasMaximum = true; 
-        _minimum = _maximum = value; 
-      } else if (compare(value, _minimum)) { 
-        _minimum = value; 
-      } else if (compare(_maximum, value)) { 
-        _maximum = value; 
-      } 
-    } 
- 
-    // sum is not merged here as we need to check overflow 
-    void merge(const InternalStatisticsImpl& other) { 
-      _hasNull = _hasNull || other._hasNull; 
-      _valueCount += other._valueCount; 
- 
-      if (other._hasMinimum) { 
-        if (!_hasMinimum) { 
-          _hasMinimum = _hasMaximum = true; 
-          _minimum = other._minimum; 
-          _maximum = other._maximum; 
-        } else { 
-          // all template types should support operator< 
-          if (compare(_maximum, other._maximum)) { 
-            _maximum = other._maximum; 
-          } 
-          if (compare(other._minimum, _minimum)) { 
-            _minimum = other._minimum; 
-          } 
-        } 
-      } 
- 
-      _hasTotalLength = _hasTotalLength && other._hasTotalLength; 
-      _totalLength += other._totalLength; 
-    } 
-   }; 
- 
-  typedef InternalStatisticsImpl<char> InternalCharStatistics; 
-  typedef InternalStatisticsImpl<char> InternalBooleanStatistics; 
-  typedef InternalStatisticsImpl<int64_t> InternalIntegerStatistics; 
-  typedef InternalStatisticsImpl<int32_t> InternalDateStatistics; 
-  typedef InternalStatisticsImpl<double> InternalDoubleStatistics; 
-  typedef InternalStatisticsImpl<Decimal> InternalDecimalStatistics; 
-  typedef InternalStatisticsImpl<std::string> InternalStringStatistics; 
- 
-  /** 
-   * Mutable column statistics for use by the writer. 
-   */ 
-  class MutableColumnStatistics { 
-  public: 
-    virtual ~MutableColumnStatistics(); 
- 
-    virtual void increase(uint64_t count) = 0; 
- 
-    virtual void setNumberOfValues(uint64_t value) = 0; 
- 
-    virtual void setHasNull(bool hasNull) = 0; 
- 
-    virtual void merge(const MutableColumnStatistics& other) = 0; 
- 
-    virtual void reset() = 0; 
- 
-    virtual void toProtoBuf(proto::ColumnStatistics& pbStats) const = 0; 
-  }; 
- 
-/** 
- * ColumnStatistics Implementation 
- */ 
- 
-  class ColumnStatisticsImpl: public ColumnStatistics, 
-			      public MutableColumnStatistics { 
-  private: 
-    InternalCharStatistics _stats; 
-  public: 
-    ColumnStatisticsImpl() { reset(); } 
-    ColumnStatisticsImpl(const proto::ColumnStatistics& stats); 
-    virtual ~ColumnStatisticsImpl() override; 
- 
-    uint64_t getNumberOfValues() const override { 
-      return _stats.getNumberOfValues(); 
-    } 
- 
-    void setNumberOfValues(uint64_t value) override { 
-      _stats.setNumberOfValues(value); 
-    } 
- 
-    void increase(uint64_t count) override { 
-      _stats.setNumberOfValues(_stats.getNumberOfValues() + count); 
-    } 
- 
-    bool hasNull() const override { 
-      return _stats.hasNull(); 
-    } 
- 
-    void setHasNull(bool hasNull) override { 
-      _stats.setHasNull(hasNull); 
-    } 
- 
-    void merge(const MutableColumnStatistics& other) override { 
-      _stats.merge(dynamic_cast<const ColumnStatisticsImpl&>(other)._stats); 
-    } 
- 
-    void reset() override { 
-      _stats.reset(); 
-    } 
- 
-    void toProtoBuf(proto::ColumnStatistics& pbStats) const override { 
-      pbStats.set_hasnull(_stats.hasNull()); 
-      pbStats.set_numberofvalues(_stats.getNumberOfValues()); 
-    } 
- 
-    std::string toString() const override { 
-      std::ostringstream buffer; 
-      buffer << "Column has " << getNumberOfValues() << " values" 
-             << " and has null value: " << (hasNull() ? "yes" : "no") 
-             << std::endl; 
-      return buffer.str(); 
-    } 
-  }; 
- 
-  class BinaryColumnStatisticsImpl: public BinaryColumnStatistics, 
-                                    public MutableColumnStatistics { 
-  private: 
-    InternalCharStatistics _stats; 
-  public: 
-    BinaryColumnStatisticsImpl() { reset(); } 
-    BinaryColumnStatisticsImpl(const proto::ColumnStatistics& stats, 
-                               const StatContext& statContext); 
-    virtual ~BinaryColumnStatisticsImpl() override; 
- 
-    uint64_t getNumberOfValues() const override { 
-      return _stats.getNumberOfValues(); 
-    } 
- 
-    void setNumberOfValues(uint64_t value) override { 
-      _stats.setNumberOfValues(value); 
-    } 
- 
-    void increase(uint64_t count) override { 
-      _stats.setNumberOfValues(_stats.getNumberOfValues() + count); 
-    } 
- 
-    bool hasNull() const override { 
-      return _stats.hasNull(); 
-    } 
- 
-    void setHasNull(bool hasNull) override { 
-      _stats.setHasNull(hasNull); 
-    } 
- 
-    bool hasTotalLength() const override { 
-      return _stats.hasTotalLength(); 
-    } 
- 
-    uint64_t getTotalLength() const override { 
-      if(hasTotalLength()){ 
-        return _stats.getTotalLength(); 
-      }else{ 
-        throw ParseError("Total length is not defined."); 
-      } 
-    } 
- 
-    void setTotalLength(uint64_t length) { 
-      _stats.setHasTotalLength(true); 
-      _stats.setTotalLength(length); 
-    } 
- 
-    void update(size_t length) { 
-      _stats.setTotalLength(_stats.getTotalLength() + length); 
-    } 
- 
-    void merge(const MutableColumnStatistics& other) override { 
-      const BinaryColumnStatisticsImpl& binStats = 
-        dynamic_cast<const BinaryColumnStatisticsImpl&>(other); 
-      _stats.merge(binStats._stats); 
-    } 
- 
-    void reset() override { 
-      _stats.reset(); 
-      setTotalLength(0); 
-    } 
- 
-    void toProtoBuf(proto::ColumnStatistics& pbStats) const override { 
-      pbStats.set_hasnull(_stats.hasNull()); 
-      pbStats.set_numberofvalues(_stats.getNumberOfValues()); 
- 
-      proto::BinaryStatistics* binStats = pbStats.mutable_binarystatistics(); 
-      binStats->set_sum(static_cast<int64_t>(_stats.getTotalLength())); 
-    } 
- 
-    std::string toString() const override { 
-      std::ostringstream buffer; 
-      buffer << "Data type: Binary" << std::endl 
-             << "Values: " << getNumberOfValues() << std::endl 
-             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; 
-      if(hasTotalLength()){ 
-        buffer << "Total length: " << getTotalLength() << std::endl; 
-      }else{ 
-        buffer << "Total length: not defined" << std::endl; 
-      } 
-      return buffer.str(); 
-    } 
-  }; 
- 
-  class BooleanColumnStatisticsImpl: public BooleanColumnStatistics, 
-                                     public MutableColumnStatistics { 
-  private: 
-    InternalBooleanStatistics _stats; 
-    bool _hasCount; 
-    uint64_t _trueCount; 
- 
-  public: 
-    BooleanColumnStatisticsImpl() { reset(); } 
-    BooleanColumnStatisticsImpl(const proto::ColumnStatistics& stats, 
-                                const StatContext& statContext); 
-    virtual ~BooleanColumnStatisticsImpl() override; 
- 
-    bool hasCount() const override { 
-      return _hasCount; 
-    } 
- 
-    void increase(uint64_t count) override { 
-      _stats.setNumberOfValues(_stats.getNumberOfValues() + count); 
-      _hasCount = true; 
-    } 
- 
-    uint64_t getNumberOfValues() const override { 
-      return _stats.getNumberOfValues(); 
-    } 
- 
-    void setNumberOfValues(uint64_t value) override { 
-      _stats.setNumberOfValues(value); 
-    } 
- 
-    bool hasNull() const override { 
-      return _stats.hasNull(); 
-    } 
- 
-    void setHasNull(bool hasNull) override { 
-      _stats.setHasNull(hasNull); 
-    } 
- 
-    uint64_t getFalseCount() const override { 
-      if(hasCount()){ 
-        return getNumberOfValues() - _trueCount; 
-      }else{ 
-        throw ParseError("False count is not defined."); 
-      } 
-    } 
- 
-    uint64_t getTrueCount() const override { 
-      if(hasCount()){ 
-        return _trueCount; 
-      }else{ 
-        throw ParseError("True count is not defined."); 
-      } 
-    } 
- 
-    void setTrueCount(uint64_t trueCount) { 
-      _hasCount = true; 
-      _trueCount = trueCount; 
-    } 
- 
-    void update(bool value, size_t repetitions) { 
-      if (value) { 
-        _trueCount += repetitions; 
-      } 
-    } 
- 
-    void merge(const MutableColumnStatistics& other) override { 
-      const BooleanColumnStatisticsImpl& boolStats = 
-        dynamic_cast<const BooleanColumnStatisticsImpl&>(other); 
-      _stats.merge(boolStats._stats); 
-      _hasCount = _hasCount && boolStats._hasCount; 
-      _trueCount += boolStats._trueCount; 
-    } 
- 
-    void reset() override { 
-      _stats.reset(); 
-      setTrueCount(0); 
-    } 
- 
-    void toProtoBuf(proto::ColumnStatistics& pbStats) const override { 
-      pbStats.set_hasnull(_stats.hasNull()); 
-      pbStats.set_numberofvalues(_stats.getNumberOfValues()); 
- 
-      proto::BucketStatistics* bucketStats = pbStats.mutable_bucketstatistics(); 
-      if (_hasCount) { 
-        bucketStats->add_count(_trueCount); 
-      } else { 
-        bucketStats->clear_count(); 
-      } 
-    } 
- 
-    std::string toString() const override { 
-      std::ostringstream buffer; 
-      buffer << "Data type: Boolean" << std::endl 
-             << "Values: " << getNumberOfValues() << std::endl 
-             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; 
-      if(hasCount()){ 
-        buffer << "(true: " << getTrueCount() << "; false: " 
-               << getFalseCount() << ")" << std::endl; 
-      } else { 
-        buffer << "(true: not defined; false: not defined)" << std::endl; 
-        buffer << "True and false counts are not defined" << std::endl; 
-      } 
-      return buffer.str(); 
-    } 
-  }; 
- 
-  class DateColumnStatisticsImpl: public DateColumnStatistics, 
-                                  public MutableColumnStatistics{ 
-  private: 
-    InternalDateStatistics _stats; 
-  public: 
-    DateColumnStatisticsImpl() { reset(); } 
-    DateColumnStatisticsImpl(const proto::ColumnStatistics& stats, 
-                             const StatContext& statContext); 
-    virtual ~DateColumnStatisticsImpl() override; 
- 
-    bool hasMinimum() const override { 
-      return _stats.hasMinimum(); 
-    } 
- 
-    bool hasMaximum() const override { 
-      return _stats.hasMaximum(); 
-    } 
- 
-    void increase(uint64_t count) override { 
-      _stats.setNumberOfValues(_stats.getNumberOfValues() + count); 
-    } 
- 
-    uint64_t getNumberOfValues() const override { 
-      return _stats.getNumberOfValues(); 
-    } 
- 
-    void setNumberOfValues(uint64_t value) override { 
-      _stats.setNumberOfValues(value); 
-    } 
- 
-    bool hasNull() const override { 
-      return _stats.hasNull(); 
-    } 
- 
-    void setHasNull(bool hasNull) override { 
-      _stats.setHasNull(hasNull); 
-    } 
- 
-    int32_t getMinimum() const override { 
-      if(hasMinimum()){ 
-        return _stats.getMinimum(); 
-      }else{ 
-        throw ParseError("Minimum is not defined."); 
-      } 
-    } 
- 
-    int32_t getMaximum() const override { 
-      if(hasMaximum()){ 
-        return _stats.getMaximum(); 
-      }else{ 
-        throw ParseError("Maximum is not defined."); 
-      } 
-    } 
- 
-    void setMinimum(int32_t minimum) { 
-      _stats.setHasMinimum(true); 
-      _stats.setMinimum(minimum); 
-    } 
- 
-    void setMaximum(int32_t maximum) { 
-      _stats.setHasMaximum(true); 
-      _stats.setMaximum(maximum); 
-    } 
- 
-    void update(int32_t value) { 
-      _stats.updateMinMax(value); 
-    } 
- 
-    void merge(const MutableColumnStatistics& other) override { 
-      const DateColumnStatisticsImpl& dateStats = 
-        dynamic_cast<const DateColumnStatisticsImpl&>(other); 
-      _stats.merge(dateStats._stats); 
-    } 
- 
-    void reset() override { 
-      _stats.reset(); 
-    } 
- 
-    void toProtoBuf(proto::ColumnStatistics& pbStats) const override { 
-      pbStats.set_hasnull(_stats.hasNull()); 
-      pbStats.set_numberofvalues(_stats.getNumberOfValues()); 
- 
-      proto::DateStatistics* dateStatistics = 
-        pbStats.mutable_datestatistics(); 
-      if (_stats.hasMinimum()) { 
-        dateStatistics->set_maximum(_stats.getMaximum()); 
-        dateStatistics->set_minimum(_stats.getMinimum()); 
-      } else { 
-        dateStatistics->clear_minimum(); 
-        dateStatistics->clear_maximum(); 
-      } 
-    } 
- 
-    std::string toString() const override { 
-      std::ostringstream buffer; 
-      buffer << "Data type: Date" << std::endl 
-             << "Values: " << getNumberOfValues() << std::endl 
-             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; 
-      if(hasMinimum()){ 
-        buffer << "Minimum: " << getMinimum() << std::endl; 
-      }else{ 
-        buffer << "Minimum: not defined" << std::endl; 
-      } 
- 
-      if(hasMaximum()){ 
-        buffer << "Maximum: " << getMaximum() << std::endl; 
-      }else{ 
-        buffer << "Maximum: not defined" << std::endl; 
-      } 
-      return buffer.str(); 
-    } 
-  }; 
- 
-  class DecimalColumnStatisticsImpl: public DecimalColumnStatistics, 
-                                     public MutableColumnStatistics { 
-  private: 
-    InternalDecimalStatistics _stats; 
- 
-  public: 
-    DecimalColumnStatisticsImpl() { reset(); } 
-    DecimalColumnStatisticsImpl(const proto::ColumnStatistics& stats, 
-                                const StatContext& statContext); 
-    virtual ~DecimalColumnStatisticsImpl() override; 
- 
-    bool hasMinimum() const override { 
-      return _stats.hasMinimum(); 
-    } 
- 
-    bool hasMaximum() const override { 
-      return _stats.hasMaximum(); 
-    } 
- 
-    bool hasSum() const override { 
-      return _stats.hasSum(); 
-    } 
- 
-    void increase(uint64_t count) override { 
-      _stats.setNumberOfValues(_stats.getNumberOfValues() + count); 
-    } 
- 
-    uint64_t getNumberOfValues() const override { 
-      return _stats.getNumberOfValues(); 
-    } 
- 
-    void setNumberOfValues(uint64_t value) override { 
-      _stats.setNumberOfValues(value); 
-    } 
- 
-    bool hasNull() const override { 
-      return _stats.hasNull(); 
-    } 
- 
-    void setHasNull(bool hasNull) override { 
-      _stats.setHasNull(hasNull); 
-    } 
- 
-    Decimal getMinimum() const override { 
-      if(hasMinimum()){ 
-        return _stats.getMinimum(); 
-      }else{ 
-        throw ParseError("Minimum is not defined."); 
-      } 
-    } 
- 
-    Decimal getMaximum() const override { 
-      if(hasMaximum()){ 
-        return _stats.getMaximum(); 
-      }else{ 
-        throw ParseError("Maximum is not defined."); 
-      } 
-    } 
- 
-    void setMinimum(Decimal minimum) { 
-      _stats.setHasMinimum(true); 
-      _stats.setMinimum(minimum); 
-    } 
- 
-    void setMaximum(Decimal maximum) { 
-      _stats.setHasMaximum(true); 
-      _stats.setMaximum(maximum); 
-    } 
- 
-    Decimal getSum() const override { 
-      if(hasSum()){ 
-        return _stats.getSum(); 
-      }else{ 
-        throw ParseError("Sum is not defined."); 
-      } 
-    } 
- 
-    void setSum(Decimal sum) { 
-      _stats.setHasSum(true); 
-      _stats.setSum(sum); 
-    } 
- 
-    void update(const Decimal& value) { 
-      _stats.updateMinMax(value); 
- 
-      if (_stats.hasSum()) { 
-        updateSum(value); 
-      } 
-    } 
- 
-    void merge(const MutableColumnStatistics& other) override { 
-      const DecimalColumnStatisticsImpl& decStats = 
-        dynamic_cast<const DecimalColumnStatisticsImpl&>(other); 
- 
-      _stats.merge(decStats._stats); 
- 
-      _stats.setHasSum(_stats.hasSum() && decStats.hasSum()); 
-      if (_stats.hasSum()) { 
-        updateSum(decStats.getSum()); 
-      } 
-    } 
- 
-    void reset() override { 
-      _stats.reset(); 
-      setSum(Decimal()); 
-    } 
- 
-    void toProtoBuf(proto::ColumnStatistics& pbStats) const override { 
-      pbStats.set_hasnull(_stats.hasNull()); 
-      pbStats.set_numberofvalues(_stats.getNumberOfValues()); 
- 
-      proto::DecimalStatistics* decStats = pbStats.mutable_decimalstatistics(); 
-      if (_stats.hasMinimum()) { 
-        decStats->set_minimum(TString(_stats.getMinimum().toString())); 
-        decStats->set_maximum(TString(_stats.getMaximum().toString())); 
-      } else { 
-        decStats->clear_minimum(); 
-        decStats->clear_maximum(); 
-      } 
-      if (_stats.hasSum()) { 
-        decStats->set_sum(TString(_stats.getSum().toString())); 
-      } else { 
-        decStats->clear_sum(); 
-      } 
-    } 
- 
-    std::string toString() const override { 
-      std::ostringstream buffer; 
-      buffer << "Data type: Decimal" << std::endl 
-             << "Values: " << getNumberOfValues() << std::endl 
-             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; 
-      if(hasMinimum()){ 
-        buffer << "Minimum: " << getMinimum().toString() << std::endl; 
-      }else{ 
-        buffer << "Minimum: not defined" << std::endl; 
-      } 
- 
-      if(hasMaximum()){ 
-        buffer << "Maximum: " << getMaximum().toString() << std::endl; 
-      }else{ 
-        buffer << "Maximum: not defined" << std::endl; 
-      } 
- 
-      if(hasSum()){ 
-        buffer << "Sum: " << getSum().toString() << std::endl; 
-      }else{ 
-        buffer << "Sum: not defined" << std::endl; 
-      } 
- 
-      return buffer.str(); 
-    } 
- 
-  private: 
-    void updateSum(Decimal value) { 
-      if (_stats.hasSum()) { 
-        bool overflow = false; 
-        Decimal sum = _stats.getSum(); 
-        if (sum.scale > value.scale) { 
-          value.value = scaleUpInt128ByPowerOfTen(value.value, 
-                                                  sum.scale - value.scale, 
-                                                  overflow); 
-        } else if (sum.scale < value.scale) { 
-          sum.value = scaleUpInt128ByPowerOfTen(sum.value, 
-                                                value.scale - sum.scale, 
-                                                overflow); 
-          sum.scale = value.scale; 
-        } 
- 
-        if (!overflow) { 
-          bool wasPositive = sum.value >= 0; 
-          sum.value += value.value; 
-          if ((value.value >= 0) == wasPositive) { 
-            _stats.setHasSum((sum.value >= 0) == wasPositive); 
-          } 
-        } else { 
-          _stats.setHasSum(false); 
-        } 
- 
-        if (_stats.hasSum()) { 
-          _stats.setSum(sum); 
-        } 
-      } 
-    } 
-  }; 
- 
-  class DoubleColumnStatisticsImpl: public DoubleColumnStatistics, 
-                                    public MutableColumnStatistics { 
-  private: 
-    InternalDoubleStatistics _stats; 
-  public: 
-    DoubleColumnStatisticsImpl() { reset(); } 
-    DoubleColumnStatisticsImpl(const proto::ColumnStatistics& stats); 
-    virtual ~DoubleColumnStatisticsImpl() override; 
- 
-    bool hasMinimum() const override { 
-      return _stats.hasMinimum(); 
-    } 
- 
-    bool hasMaximum() const override { 
-      return _stats.hasMaximum(); 
-    } 
- 
-    bool hasSum() const override { 
-      return _stats.hasSum(); 
-    } 
- 
-    void increase(uint64_t count) override { 
-      _stats.setNumberOfValues(_stats.getNumberOfValues() + count); 
-    } 
- 
-    uint64_t getNumberOfValues() const override { 
-      return _stats.getNumberOfValues(); 
-    } 
- 
-    void setNumberOfValues(uint64_t value) override { 
-      _stats.setNumberOfValues(value); 
-    } 
- 
-    bool hasNull() const override { 
-      return _stats.hasNull(); 
-    } 
- 
-    void setHasNull(bool hasNull) override { 
-      _stats.setHasNull(hasNull); 
-    } 
- 
-    double getMinimum() const override { 
-      if(hasMinimum()){ 
-        return _stats.getMinimum(); 
-      }else{ 
-        throw ParseError("Minimum is not defined."); 
-      } 
-    } 
- 
-    double getMaximum() const override { 
-      if(hasMaximum()){ 
-        return _stats.getMaximum(); 
-      }else{ 
-        throw ParseError("Maximum is not defined."); 
-      } 
-    } 
- 
-    void setMinimum(double minimum) { 
-      _stats.setHasMinimum(true); 
-      _stats.setMinimum(minimum); 
-    } 
- 
-    void setMaximum(double maximum) { 
-      _stats.setHasMaximum(true); 
-      _stats.setMaximum(maximum); 
-    } 
- 
-    double getSum() const override { 
-      if(hasSum()){ 
-        return _stats.getSum(); 
-      }else{ 
-        throw ParseError("Sum is not defined."); 
-      } 
-    } 
- 
-    void setSum(double sum) { 
-      _stats.setHasSum(true); 
-      _stats.setSum(sum); 
-    } 
- 
-    void update(double value) { 
-      _stats.updateMinMax(value); 
-      _stats.setSum(_stats.getSum() + value); 
-    } 
- 
-    void merge(const MutableColumnStatistics& other) override { 
-      const DoubleColumnStatisticsImpl& doubleStats = 
-        dynamic_cast<const DoubleColumnStatisticsImpl&>(other); 
-      _stats.merge(doubleStats._stats); 
- 
-      _stats.setHasSum(_stats.hasSum() && doubleStats.hasSum()); 
-      if (_stats.hasSum()) { 
-        _stats.setSum(_stats.getSum() + doubleStats.getSum()); 
-      } 
-    } 
- 
-    void reset() override { 
-      _stats.reset(); 
-      setSum(0.0); 
-    } 
- 
-    void toProtoBuf(proto::ColumnStatistics& pbStats) const override { 
-      pbStats.set_hasnull(_stats.hasNull()); 
-      pbStats.set_numberofvalues(_stats.getNumberOfValues()); 
- 
-      proto::DoubleStatistics* doubleStats = pbStats.mutable_doublestatistics(); 
-      if (_stats.hasMinimum()) { 
-        doubleStats->set_minimum(_stats.getMinimum()); 
-        doubleStats->set_maximum(_stats.getMaximum()); 
-      } else { 
-        doubleStats->clear_minimum(); 
-        doubleStats->clear_maximum(); 
-      } 
-      if (_stats.hasSum()) { 
-        doubleStats->set_sum(_stats.getSum()); 
-      } else { 
-        doubleStats->clear_sum(); 
-      } 
-    } 
- 
-    std::string toString() const override { 
-      std::ostringstream buffer; 
-      buffer << "Data type: Double" << std::endl 
-             << "Values: " << getNumberOfValues() << std::endl 
-             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; 
-      if(hasMinimum()){ 
-        buffer << "Minimum: " << getMinimum() << std::endl; 
-      }else{ 
-        buffer << "Minimum: not defined" << std::endl; 
-      } 
- 
-      if(hasMaximum()){ 
-        buffer << "Maximum: " << getMaximum() << std::endl; 
-      }else{ 
-        buffer << "Maximum: not defined" << std::endl; 
-      } 
- 
-      if(hasSum()){ 
-        buffer << "Sum: " << getSum() << std::endl; 
-      }else{ 
-        buffer << "Sum: not defined" << std::endl; 
-      } 
-      return buffer.str(); 
-    } 
-  }; 
- 
-  class IntegerColumnStatisticsImpl: public IntegerColumnStatistics, 
-                                     public MutableColumnStatistics { 
-  private: 
-    InternalIntegerStatistics _stats; 
-  public: 
-    IntegerColumnStatisticsImpl() { reset(); } 
-    IntegerColumnStatisticsImpl(const proto::ColumnStatistics& stats); 
-    virtual ~IntegerColumnStatisticsImpl() override; 
- 
-    bool hasMinimum() const override { 
-      return _stats.hasMinimum(); 
-    } 
- 
-    bool hasMaximum() const override { 
-      return _stats.hasMaximum(); 
-    } 
- 
-    bool hasSum() const override { 
-      return _stats.hasSum(); 
-    } 
- 
-    void increase(uint64_t count) override { 
-      _stats.setNumberOfValues(_stats.getNumberOfValues() + count); 
-    } 
- 
-    uint64_t getNumberOfValues() const override { 
-      return _stats.getNumberOfValues(); 
-    } 
- 
-    void setNumberOfValues(uint64_t value) override { 
-      _stats.setNumberOfValues(value); 
-    } 
- 
-    bool hasNull() const override { 
-      return _stats.hasNull(); 
-    } 
- 
-    void setHasNull(bool hasNull) override { 
-      _stats.setHasNull(hasNull); 
-    } 
- 
-    int64_t getMinimum() const override { 
-      if(hasMinimum()){ 
-        return _stats.getMinimum(); 
-      }else{ 
-        throw ParseError("Minimum is not defined."); 
-      } 
-    } 
- 
-    int64_t getMaximum() const override { 
-      if(hasMaximum()){ 
-        return _stats.getMaximum(); 
-      }else{ 
-        throw ParseError("Maximum is not defined."); 
-      } 
-    } 
- 
-    void setMinimum(int64_t minimum) { 
-      _stats.setHasMinimum(true); 
-      _stats.setMinimum(minimum); 
-    } 
- 
-    void setMaximum(int64_t maximum) { 
-      _stats.setHasMaximum(true); 
-      _stats.setMaximum(maximum); 
-    } 
- 
-    int64_t getSum() const override { 
-      if(hasSum()){ 
-        return _stats.getSum(); 
-      }else{ 
-        throw ParseError("Sum is not defined."); 
-      } 
-    } 
- 
-    void setSum(int64_t sum) { 
-      _stats.setHasSum(true); 
-      _stats.setSum(sum); 
-    } 
- 
+
+    void setMinimum(T min) { _minimum = min; }
+
+    // GET / SET _valueCount
+    uint64_t getNumberOfValues() const { return _valueCount; }
+
+    void setNumberOfValues(uint64_t numValues) { _valueCount = numValues; }
+
+    // GET / SET _hasNullValue
+    bool hasNull() const { return _hasNull; }
+
+    void setHasNull(bool hasNull) { _hasNull = hasNull; }
+
+    void reset() {
+      _hasNull = false;
+      _hasMinimum = false;
+      _hasMaximum = false;
+      _hasSum = false;
+      _hasTotalLength = false;
+      _totalLength = 0;
+      _valueCount = 0;
+    }
+
+    void updateMinMax(T value) {
+      if (!_hasMinimum) {
+        _hasMinimum = _hasMaximum = true;
+        _minimum = _maximum = value;
+      } else if (compare(value, _minimum)) {
+        _minimum = value;
+      } else if (compare(_maximum, value)) {
+        _maximum = value;
+      }
+    }
+
+    // sum is not merged here as we need to check overflow
+    void merge(const InternalStatisticsImpl& other) {
+      _hasNull = _hasNull || other._hasNull;
+      _valueCount += other._valueCount;
+
+      if (other._hasMinimum) {
+        if (!_hasMinimum) {
+          _hasMinimum = _hasMaximum = true;
+          _minimum = other._minimum;
+          _maximum = other._maximum;
+        } else {
+          // all template types should support operator<
+          if (compare(_maximum, other._maximum)) {
+            _maximum = other._maximum;
+          }
+          if (compare(other._minimum, _minimum)) {
+            _minimum = other._minimum;
+          }
+        }
+      }
+
+      _hasTotalLength = _hasTotalLength && other._hasTotalLength;
+      _totalLength += other._totalLength;
+    }
+   };
+
+  typedef InternalStatisticsImpl<char> InternalCharStatistics;
+  typedef InternalStatisticsImpl<char> InternalBooleanStatistics;
+  typedef InternalStatisticsImpl<int64_t> InternalIntegerStatistics;
+  typedef InternalStatisticsImpl<int32_t> InternalDateStatistics;
+  typedef InternalStatisticsImpl<double> InternalDoubleStatistics;
+  typedef InternalStatisticsImpl<Decimal> InternalDecimalStatistics;
+  typedef InternalStatisticsImpl<std::string> InternalStringStatistics;
+
+  /**
+   * Mutable column statistics for use by the writer.
+   */
+  class MutableColumnStatistics {
+  public:
+    virtual ~MutableColumnStatistics();
+
+    virtual void increase(uint64_t count) = 0;
+
+    virtual void setNumberOfValues(uint64_t value) = 0;
+
+    virtual void setHasNull(bool hasNull) = 0;
+
+    virtual void merge(const MutableColumnStatistics& other) = 0;
+
+    virtual void reset() = 0;
+
+    virtual void toProtoBuf(proto::ColumnStatistics& pbStats) const = 0;
+  };
+
+/**
+ * ColumnStatistics Implementation
+ */
+
+  class ColumnStatisticsImpl: public ColumnStatistics,
+			      public MutableColumnStatistics {
+  private:
+    InternalCharStatistics _stats;
+  public:
+    ColumnStatisticsImpl() { reset(); }
+    ColumnStatisticsImpl(const proto::ColumnStatistics& stats);
+    virtual ~ColumnStatisticsImpl() override;
+
+    uint64_t getNumberOfValues() const override {
+      return _stats.getNumberOfValues();
+    }
+
+    void setNumberOfValues(uint64_t value) override {
+      _stats.setNumberOfValues(value);
+    }
+
+    void increase(uint64_t count) override {
+      _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+    }
+
+    bool hasNull() const override {
+      return _stats.hasNull();
+    }
+
+    void setHasNull(bool hasNull) override {
+      _stats.setHasNull(hasNull);
+    }
+
+    void merge(const MutableColumnStatistics& other) override {
+      _stats.merge(dynamic_cast<const ColumnStatisticsImpl&>(other)._stats);
+    }
+
+    void reset() override {
+      _stats.reset();
+    }
+
+    void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+      pbStats.set_hasnull(_stats.hasNull());
+      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+    }
+
+    std::string toString() const override {
+      std::ostringstream buffer;
+      buffer << "Column has " << getNumberOfValues() << " values"
+             << " and has null value: " << (hasNull() ? "yes" : "no")
+             << std::endl;
+      return buffer.str();
+    }
+  };
+
+  class BinaryColumnStatisticsImpl: public BinaryColumnStatistics,
+                                    public MutableColumnStatistics {
+  private:
+    InternalCharStatistics _stats;
+  public:
+    BinaryColumnStatisticsImpl() { reset(); }
+    BinaryColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+                               const StatContext& statContext);
+    virtual ~BinaryColumnStatisticsImpl() override;
+
+    uint64_t getNumberOfValues() const override {
+      return _stats.getNumberOfValues();
+    }
+
+    void setNumberOfValues(uint64_t value) override {
+      _stats.setNumberOfValues(value);
+    }
+
+    void increase(uint64_t count) override {
+      _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+    }
+
+    bool hasNull() const override {
+      return _stats.hasNull();
+    }
+
+    void setHasNull(bool hasNull) override {
+      _stats.setHasNull(hasNull);
+    }
+
+    bool hasTotalLength() const override {
+      return _stats.hasTotalLength();
+    }
+
+    uint64_t getTotalLength() const override {
+      if(hasTotalLength()){
+        return _stats.getTotalLength();
+      }else{
+        throw ParseError("Total length is not defined.");
+      }
+    }
+
+    void setTotalLength(uint64_t length) {
+      _stats.setHasTotalLength(true);
+      _stats.setTotalLength(length);
+    }
+
+    void update(size_t length) {
+      _stats.setTotalLength(_stats.getTotalLength() + length);
+    }
+
+    void merge(const MutableColumnStatistics& other) override {
+      const BinaryColumnStatisticsImpl& binStats =
+        dynamic_cast<const BinaryColumnStatisticsImpl&>(other);
+      _stats.merge(binStats._stats);
+    }
+
+    void reset() override {
+      _stats.reset();
+      setTotalLength(0);
+    }
+
+    void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+      pbStats.set_hasnull(_stats.hasNull());
+      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+      proto::BinaryStatistics* binStats = pbStats.mutable_binarystatistics();
+      binStats->set_sum(static_cast<int64_t>(_stats.getTotalLength()));
+    }
+
+    std::string toString() const override {
+      std::ostringstream buffer;
+      buffer << "Data type: Binary" << std::endl
+             << "Values: " << getNumberOfValues() << std::endl
+             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+      if(hasTotalLength()){
+        buffer << "Total length: " << getTotalLength() << std::endl;
+      }else{
+        buffer << "Total length: not defined" << std::endl;
+      }
+      return buffer.str();
+    }
+  };
+
+  class BooleanColumnStatisticsImpl: public BooleanColumnStatistics,
+                                     public MutableColumnStatistics {
+  private:
+    InternalBooleanStatistics _stats;
+    bool _hasCount;
+    uint64_t _trueCount;
+
+  public:
+    BooleanColumnStatisticsImpl() { reset(); }
+    BooleanColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+                                const StatContext& statContext);
+    virtual ~BooleanColumnStatisticsImpl() override;
+
+    bool hasCount() const override {
+      return _hasCount;
+    }
+
+    void increase(uint64_t count) override {
+      _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+      _hasCount = true;
+    }
+
+    uint64_t getNumberOfValues() const override {
+      return _stats.getNumberOfValues();
+    }
+
+    void setNumberOfValues(uint64_t value) override {
+      _stats.setNumberOfValues(value);
+    }
+
+    bool hasNull() const override {
+      return _stats.hasNull();
+    }
+
+    void setHasNull(bool hasNull) override {
+      _stats.setHasNull(hasNull);
+    }
+
+    uint64_t getFalseCount() const override {
+      if(hasCount()){
+        return getNumberOfValues() - _trueCount;
+      }else{
+        throw ParseError("False count is not defined.");
+      }
+    }
+
+    uint64_t getTrueCount() const override {
+      if(hasCount()){
+        return _trueCount;
+      }else{
+        throw ParseError("True count is not defined.");
+      }
+    }
+
+    void setTrueCount(uint64_t trueCount) {
+      _hasCount = true;
+      _trueCount = trueCount;
+    }
+
+    void update(bool value, size_t repetitions) {
+      if (value) {
+        _trueCount += repetitions;
+      }
+    }
+
+    void merge(const MutableColumnStatistics& other) override {
+      const BooleanColumnStatisticsImpl& boolStats =
+        dynamic_cast<const BooleanColumnStatisticsImpl&>(other);
+      _stats.merge(boolStats._stats);
+      _hasCount = _hasCount && boolStats._hasCount;
+      _trueCount += boolStats._trueCount;
+    }
+
+    void reset() override {
+      _stats.reset();
+      setTrueCount(0);
+    }
+
+    void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+      pbStats.set_hasnull(_stats.hasNull());
+      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+      proto::BucketStatistics* bucketStats = pbStats.mutable_bucketstatistics();
+      if (_hasCount) {
+        bucketStats->add_count(_trueCount);
+      } else {
+        bucketStats->clear_count();
+      }
+    }
+
+    std::string toString() const override {
+      std::ostringstream buffer;
+      buffer << "Data type: Boolean" << std::endl
+             << "Values: " << getNumberOfValues() << std::endl
+             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+      if(hasCount()){
+        buffer << "(true: " << getTrueCount() << "; false: "
+               << getFalseCount() << ")" << std::endl;
+      } else {
+        buffer << "(true: not defined; false: not defined)" << std::endl;
+        buffer << "True and false counts are not defined" << std::endl;
+      }
+      return buffer.str();
+    }
+  };
+
+  class DateColumnStatisticsImpl: public DateColumnStatistics,
+                                  public MutableColumnStatistics{
+  private:
+    InternalDateStatistics _stats;
+  public:
+    DateColumnStatisticsImpl() { reset(); }
+    DateColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+                             const StatContext& statContext);
+    virtual ~DateColumnStatisticsImpl() override;
+
+    bool hasMinimum() const override {
+      return _stats.hasMinimum();
+    }
+
+    bool hasMaximum() const override {
+      return _stats.hasMaximum();
+    }
+
+    void increase(uint64_t count) override {
+      _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+    }
+
+    uint64_t getNumberOfValues() const override {
+      return _stats.getNumberOfValues();
+    }
+
+    void setNumberOfValues(uint64_t value) override {
+      _stats.setNumberOfValues(value);
+    }
+
+    bool hasNull() const override {
+      return _stats.hasNull();
+    }
+
+    void setHasNull(bool hasNull) override {
+      _stats.setHasNull(hasNull);
+    }
+
+    int32_t getMinimum() const override {
+      if(hasMinimum()){
+        return _stats.getMinimum();
+      }else{
+        throw ParseError("Minimum is not defined.");
+      }
+    }
+
+    int32_t getMaximum() const override {
+      if(hasMaximum()){
+        return _stats.getMaximum();
+      }else{
+        throw ParseError("Maximum is not defined.");
+      }
+    }
+
+    void setMinimum(int32_t minimum) {
+      _stats.setHasMinimum(true);
+      _stats.setMinimum(minimum);
+    }
+
+    void setMaximum(int32_t maximum) {
+      _stats.setHasMaximum(true);
+      _stats.setMaximum(maximum);
+    }
+
+    void update(int32_t value) {
+      _stats.updateMinMax(value);
+    }
+
+    void merge(const MutableColumnStatistics& other) override {
+      const DateColumnStatisticsImpl& dateStats =
+        dynamic_cast<const DateColumnStatisticsImpl&>(other);
+      _stats.merge(dateStats._stats);
+    }
+
+    void reset() override {
+      _stats.reset();
+    }
+
+    void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+      pbStats.set_hasnull(_stats.hasNull());
+      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+      proto::DateStatistics* dateStatistics =
+        pbStats.mutable_datestatistics();
+      if (_stats.hasMinimum()) {
+        dateStatistics->set_maximum(_stats.getMaximum());
+        dateStatistics->set_minimum(_stats.getMinimum());
+      } else {
+        dateStatistics->clear_minimum();
+        dateStatistics->clear_maximum();
+      }
+    }
+
+    std::string toString() const override {
+      std::ostringstream buffer;
+      buffer << "Data type: Date" << std::endl
+             << "Values: " << getNumberOfValues() << std::endl
+             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+      if(hasMinimum()){
+        buffer << "Minimum: " << getMinimum() << std::endl;
+      }else{
+        buffer << "Minimum: not defined" << std::endl;
+      }
+
+      if(hasMaximum()){
+        buffer << "Maximum: " << getMaximum() << std::endl;
+      }else{
+        buffer << "Maximum: not defined" << std::endl;
+      }
+      return buffer.str();
+    }
+  };
+
+  class DecimalColumnStatisticsImpl: public DecimalColumnStatistics,
+                                     public MutableColumnStatistics {
+  private:
+    InternalDecimalStatistics _stats;
+
+  public:
+    DecimalColumnStatisticsImpl() { reset(); }
+    DecimalColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+                                const StatContext& statContext);
+    virtual ~DecimalColumnStatisticsImpl() override;
+
+    bool hasMinimum() const override {
+      return _stats.hasMinimum();
+    }
+
+    bool hasMaximum() const override {
+      return _stats.hasMaximum();
+    }
+
+    bool hasSum() const override {
+      return _stats.hasSum();
+    }
+
+    void increase(uint64_t count) override {
+      _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+    }
+
+    uint64_t getNumberOfValues() const override {
+      return _stats.getNumberOfValues();
+    }
+
+    void setNumberOfValues(uint64_t value) override {
+      _stats.setNumberOfValues(value);
+    }
+
+    bool hasNull() const override {
+      return _stats.hasNull();
+    }
+
+    void setHasNull(bool hasNull) override {
+      _stats.setHasNull(hasNull);
+    }
+
+    Decimal getMinimum() const override {
+      if(hasMinimum()){
+        return _stats.getMinimum();
+      }else{
+        throw ParseError("Minimum is not defined.");
+      }
+    }
+
+    Decimal getMaximum() const override {
+      if(hasMaximum()){
+        return _stats.getMaximum();
+      }else{
+        throw ParseError("Maximum is not defined.");
+      }
+    }
+
+    void setMinimum(Decimal minimum) {
+      _stats.setHasMinimum(true);
+      _stats.setMinimum(minimum);
+    }
+
+    void setMaximum(Decimal maximum) {
+      _stats.setHasMaximum(true);
+      _stats.setMaximum(maximum);
+    }
+
+    Decimal getSum() const override {
+      if(hasSum()){
+        return _stats.getSum();
+      }else{
+        throw ParseError("Sum is not defined.");
+      }
+    }
+
+    void setSum(Decimal sum) {
+      _stats.setHasSum(true);
+      _stats.setSum(sum);
+    }
+
+    void update(const Decimal& value) {
+      _stats.updateMinMax(value);
+
+      if (_stats.hasSum()) {
+        updateSum(value);
+      }
+    }
+
+    void merge(const MutableColumnStatistics& other) override {
+      const DecimalColumnStatisticsImpl& decStats =
+        dynamic_cast<const DecimalColumnStatisticsImpl&>(other);
+
+      _stats.merge(decStats._stats);
+
+      _stats.setHasSum(_stats.hasSum() && decStats.hasSum());
+      if (_stats.hasSum()) {
+        updateSum(decStats.getSum());
+      }
+    }
+
+    void reset() override {
+      _stats.reset();
+      setSum(Decimal());
+    }
+
+    void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+      pbStats.set_hasnull(_stats.hasNull());
+      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+      proto::DecimalStatistics* decStats = pbStats.mutable_decimalstatistics();
+      if (_stats.hasMinimum()) {
+        decStats->set_minimum(TString(_stats.getMinimum().toString()));
+        decStats->set_maximum(TString(_stats.getMaximum().toString()));
+      } else {
+        decStats->clear_minimum();
+        decStats->clear_maximum();
+      }
+      if (_stats.hasSum()) {
+        decStats->set_sum(TString(_stats.getSum().toString()));
+      } else {
+        decStats->clear_sum();
+      }
+    }
+
+    std::string toString() const override {
+      std::ostringstream buffer;
+      buffer << "Data type: Decimal" << std::endl
+             << "Values: " << getNumberOfValues() << std::endl
+             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+      if(hasMinimum()){
+        buffer << "Minimum: " << getMinimum().toString() << std::endl;
+      }else{
+        buffer << "Minimum: not defined" << std::endl;
+      }
+
+      if(hasMaximum()){
+        buffer << "Maximum: " << getMaximum().toString() << std::endl;
+      }else{
+        buffer << "Maximum: not defined" << std::endl;
+      }
+
+      if(hasSum()){
+        buffer << "Sum: " << getSum().toString() << std::endl;
+      }else{
+        buffer << "Sum: not defined" << std::endl;
+      }
+
+      return buffer.str();
+    }
+
+  private:
+    void updateSum(Decimal value) {
+      if (_stats.hasSum()) {
+        bool overflow = false;
+        Decimal sum = _stats.getSum();
+        if (sum.scale > value.scale) {
+          value.value = scaleUpInt128ByPowerOfTen(value.value,
+                                                  sum.scale - value.scale,
+                                                  overflow);
+        } else if (sum.scale < value.scale) {
+          sum.value = scaleUpInt128ByPowerOfTen(sum.value,
+                                                value.scale - sum.scale,
+                                                overflow);
+          sum.scale = value.scale;
+        }
+
+        if (!overflow) {
+          bool wasPositive = sum.value >= 0;
+          sum.value += value.value;
+          if ((value.value >= 0) == wasPositive) {
+            _stats.setHasSum((sum.value >= 0) == wasPositive);
+          }
+        } else {
+          _stats.setHasSum(false);
+        }
+
+        if (_stats.hasSum()) {
+          _stats.setSum(sum);
+        }
+      }
+    }
+  };
+
+  class DoubleColumnStatisticsImpl: public DoubleColumnStatistics,
+                                    public MutableColumnStatistics {
+  private:
+    InternalDoubleStatistics _stats;
+  public:
+    DoubleColumnStatisticsImpl() { reset(); }
+    DoubleColumnStatisticsImpl(const proto::ColumnStatistics& stats);
+    virtual ~DoubleColumnStatisticsImpl() override;
+
+    bool hasMinimum() const override {
+      return _stats.hasMinimum();
+    }
+
+    bool hasMaximum() const override {
+      return _stats.hasMaximum();
+    }
+
+    bool hasSum() const override {
+      return _stats.hasSum();
+    }
+
+    void increase(uint64_t count) override {
+      _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+    }
+
+    uint64_t getNumberOfValues() const override {
+      return _stats.getNumberOfValues();
+    }
+
+    void setNumberOfValues(uint64_t value) override {
+      _stats.setNumberOfValues(value);
+    }
+
+    bool hasNull() const override {
+      return _stats.hasNull();
+    }
+
+    void setHasNull(bool hasNull) override {
+      _stats.setHasNull(hasNull);
+    }
+
+    double getMinimum() const override {
+      if(hasMinimum()){
+        return _stats.getMinimum();
+      }else{
+        throw ParseError("Minimum is not defined.");
+      }
+    }
+
+    double getMaximum() const override {
+      if(hasMaximum()){
+        return _stats.getMaximum();
+      }else{
+        throw ParseError("Maximum is not defined.");
+      }
+    }
+
+    void setMinimum(double minimum) {
+      _stats.setHasMinimum(true);
+      _stats.setMinimum(minimum);
+    }
+
+    void setMaximum(double maximum) {
+      _stats.setHasMaximum(true);
+      _stats.setMaximum(maximum);
+    }
+
+    double getSum() const override {
+      if(hasSum()){
+        return _stats.getSum();
+      }else{
+        throw ParseError("Sum is not defined.");
+      }
+    }
+
+    void setSum(double sum) {
+      _stats.setHasSum(true);
+      _stats.setSum(sum);
+    }
+
+    void update(double value) {
+      _stats.updateMinMax(value);
+      _stats.setSum(_stats.getSum() + value);
+    }
+
+    void merge(const MutableColumnStatistics& other) override {
+      const DoubleColumnStatisticsImpl& doubleStats =
+        dynamic_cast<const DoubleColumnStatisticsImpl&>(other);
+      _stats.merge(doubleStats._stats);
+
+      _stats.setHasSum(_stats.hasSum() && doubleStats.hasSum());
+      if (_stats.hasSum()) {
+        _stats.setSum(_stats.getSum() + doubleStats.getSum());
+      }
+    }
+
+    void reset() override {
+      _stats.reset();
+      setSum(0.0);
+    }
+
+    void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+      pbStats.set_hasnull(_stats.hasNull());
+      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+      proto::DoubleStatistics* doubleStats = pbStats.mutable_doublestatistics();
+      if (_stats.hasMinimum()) {
+        doubleStats->set_minimum(_stats.getMinimum());
+        doubleStats->set_maximum(_stats.getMaximum());
+      } else {
+        doubleStats->clear_minimum();
+        doubleStats->clear_maximum();
+      }
+      if (_stats.hasSum()) {
+        doubleStats->set_sum(_stats.getSum());
+      } else {
+        doubleStats->clear_sum();
+      }
+    }
+
+    std::string toString() const override {
+      std::ostringstream buffer;
+      buffer << "Data type: Double" << std::endl
+             << "Values: " << getNumberOfValues() << std::endl
+             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+      if(hasMinimum()){
+        buffer << "Minimum: " << getMinimum() << std::endl;
+      }else{
+        buffer << "Minimum: not defined" << std::endl;
+      }
+
+      if(hasMaximum()){
+        buffer << "Maximum: " << getMaximum() << std::endl;
+      }else{
+        buffer << "Maximum: not defined" << std::endl;
+      }
+
+      if(hasSum()){
+        buffer << "Sum: " << getSum() << std::endl;
+      }else{
+        buffer << "Sum: not defined" << std::endl;
+      }
+      return buffer.str();
+    }
+  };
+
+  class IntegerColumnStatisticsImpl: public IntegerColumnStatistics,
+                                     public MutableColumnStatistics {
+  private:
+    InternalIntegerStatistics _stats;
+  public:
+    IntegerColumnStatisticsImpl() { reset(); }
+    IntegerColumnStatisticsImpl(const proto::ColumnStatistics& stats);
+    virtual ~IntegerColumnStatisticsImpl() override;
+
+    bool hasMinimum() const override {
+      return _stats.hasMinimum();
+    }
+
+    bool hasMaximum() const override {
+      return _stats.hasMaximum();
+    }
+
+    bool hasSum() const override {
+      return _stats.hasSum();
+    }
+
+    void increase(uint64_t count) override {
+      _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+    }
+
+    uint64_t getNumberOfValues() const override {
+      return _stats.getNumberOfValues();
+    }
+
+    void setNumberOfValues(uint64_t value) override {
+      _stats.setNumberOfValues(value);
+    }
+
+    bool hasNull() const override {
+      return _stats.hasNull();
+    }
+
+    void setHasNull(bool hasNull) override {
+      _stats.setHasNull(hasNull);
+    }
+
+    int64_t getMinimum() const override {
+      if(hasMinimum()){
+        return _stats.getMinimum();
+      }else{
+        throw ParseError("Minimum is not defined.");
+      }
+    }
+
+    int64_t getMaximum() const override {
+      if(hasMaximum()){
+        return _stats.getMaximum();
+      }else{
+        throw ParseError("Maximum is not defined.");
+      }
+    }
+
+    void setMinimum(int64_t minimum) {
+      _stats.setHasMinimum(true);
+      _stats.setMinimum(minimum);
+    }
+
+    void setMaximum(int64_t maximum) {
+      _stats.setHasMaximum(true);
+      _stats.setMaximum(maximum);
+    }
+
+    int64_t getSum() const override {
+      if(hasSum()){
+        return _stats.getSum();
+      }else{
+        throw ParseError("Sum is not defined.");
+      }
+    }
+
+    void setSum(int64_t sum) {
+      _stats.setHasSum(true);
+      _stats.setSum(sum);
+    }
+
     void update(int64_t value, int repetitions) {
       _stats.updateMinMax(value);
- 
+
       if (_stats.hasSum()) {
         if (repetitions > 1) {
           _stats.setHasSum(multiplyExact(value, repetitions, &value));
@@ -981,498 +981,498 @@ namespace orc {
       }
     }
 
-    void merge(const MutableColumnStatistics& other) override { 
-      const IntegerColumnStatisticsImpl& intStats = 
-        dynamic_cast<const IntegerColumnStatisticsImpl&>(other); 
- 
-      _stats.merge(intStats._stats); 
- 
-      // update sum and check overflow 
-      _stats.setHasSum(_stats.hasSum() && intStats.hasSum()); 
-      if (_stats.hasSum()) { 
+    void merge(const MutableColumnStatistics& other) override {
+      const IntegerColumnStatisticsImpl& intStats =
+        dynamic_cast<const IntegerColumnStatisticsImpl&>(other);
+
+      _stats.merge(intStats._stats);
+
+      // update sum and check overflow
+      _stats.setHasSum(_stats.hasSum() && intStats.hasSum());
+      if (_stats.hasSum()) {
         int64_t value;
         _stats.setHasSum(addExact(_stats.getSum(), intStats.getSum(), &value));
         if (_stats.hasSum()) {
           _stats.setSum(value);
-        } 
-      } 
-    } 
- 
-    void reset() override { 
-      _stats.reset(); 
-      setSum(0); 
-    } 
- 
-    void toProtoBuf(proto::ColumnStatistics& pbStats) const override { 
-      pbStats.set_hasnull(_stats.hasNull()); 
-      pbStats.set_numberofvalues(_stats.getNumberOfValues()); 
- 
-      proto::IntegerStatistics* intStats = pbStats.mutable_intstatistics(); 
-      if (_stats.hasMinimum()) { 
-        intStats->set_minimum(_stats.getMinimum()); 
-        intStats->set_maximum(_stats.getMaximum()); 
-      } else { 
-        intStats->clear_minimum(); 
-        intStats->clear_maximum(); 
-      } 
-      if (_stats.hasSum()) { 
-        intStats->set_sum(_stats.getSum()); 
-      } else { 
-        intStats->clear_sum(); 
-      } 
-    } 
- 
-    std::string toString() const override { 
-      std::ostringstream buffer; 
-      buffer << "Data type: Integer" << std::endl 
-             << "Values: " << getNumberOfValues() << std::endl 
-             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; 
-      if(hasMinimum()){ 
-        buffer << "Minimum: " << getMinimum() << std::endl; 
-      }else{ 
-        buffer << "Minimum: not defined" << std::endl; 
-      } 
- 
-      if(hasMaximum()){ 
-        buffer << "Maximum: " << getMaximum() << std::endl; 
-      }else{ 
-        buffer << "Maximum: not defined" << std::endl; 
-      } 
- 
-      if(hasSum()){ 
-        buffer << "Sum: " << getSum() << std::endl; 
-      }else{ 
-        buffer << "Sum: not defined" << std::endl; 
-      } 
-      return buffer.str(); 
-    } 
-  }; 
- 
-  class StringColumnStatisticsImpl: public StringColumnStatistics, 
-                                    public MutableColumnStatistics{ 
-  private: 
-    InternalStringStatistics _stats; 
- 
-  public: 
-    StringColumnStatisticsImpl() { 
-      reset(); 
-    } 
-    StringColumnStatisticsImpl(const proto::ColumnStatistics& stats, 
-                               const StatContext& statContext); 
-    virtual ~StringColumnStatisticsImpl() override; 
- 
-    bool hasMinimum() const override { 
-      return _stats.hasMinimum(); 
-    } 
- 
-    bool hasMaximum() const override { 
-      return _stats.hasMaximum(); 
-    } 
- 
-    bool hasTotalLength() const override { 
-      return _stats.hasTotalLength(); 
-    } 
- 
-    void increase(uint64_t count) override { 
-      _stats.setNumberOfValues(_stats.getNumberOfValues() + count); 
-    } 
- 
-    uint64_t getNumberOfValues() const override { 
-      return _stats.getNumberOfValues(); 
-    } 
- 
-    void setNumberOfValues(uint64_t value) override { 
-      _stats.setNumberOfValues(value); 
-    } 
- 
-    bool hasNull() const override { 
-      return _stats.hasNull(); 
-    } 
- 
-    void setHasNull(bool hasNull) override { 
-      _stats.setHasNull(hasNull); 
-    } 
- 
+        }
+      }
+    }
+
+    void reset() override {
+      _stats.reset();
+      setSum(0);
+    }
+
+    void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+      pbStats.set_hasnull(_stats.hasNull());
+      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+      proto::IntegerStatistics* intStats = pbStats.mutable_intstatistics();
+      if (_stats.hasMinimum()) {
+        intStats->set_minimum(_stats.getMinimum());
+        intStats->set_maximum(_stats.getMaximum());
+      } else {
+        intStats->clear_minimum();
+        intStats->clear_maximum();
+      }
+      if (_stats.hasSum()) {
+        intStats->set_sum(_stats.getSum());
+      } else {
+        intStats->clear_sum();
+      }
+    }
+
+    std::string toString() const override {
+      std::ostringstream buffer;
+      buffer << "Data type: Integer" << std::endl
+             << "Values: " << getNumberOfValues() << std::endl
+             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+      if(hasMinimum()){
+        buffer << "Minimum: " << getMinimum() << std::endl;
+      }else{
+        buffer << "Minimum: not defined" << std::endl;
+      }
+
+      if(hasMaximum()){
+        buffer << "Maximum: " << getMaximum() << std::endl;
+      }else{
+        buffer << "Maximum: not defined" << std::endl;
+      }
+
+      if(hasSum()){
+        buffer << "Sum: " << getSum() << std::endl;
+      }else{
+        buffer << "Sum: not defined" << std::endl;
+      }
+      return buffer.str();
+    }
+  };
+
+  class StringColumnStatisticsImpl: public StringColumnStatistics,
+                                    public MutableColumnStatistics{
+  private:
+    InternalStringStatistics _stats;
+
+  public:
+    StringColumnStatisticsImpl() {
+      reset();
+    }
+    StringColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+                               const StatContext& statContext);
+    virtual ~StringColumnStatisticsImpl() override;
+
+    bool hasMinimum() const override {
+      return _stats.hasMinimum();
+    }
+
+    bool hasMaximum() const override {
+      return _stats.hasMaximum();
+    }
+
+    bool hasTotalLength() const override {
+      return _stats.hasTotalLength();
+    }
+
+    void increase(uint64_t count) override {
+      _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+    }
+
+    uint64_t getNumberOfValues() const override {
+      return _stats.getNumberOfValues();
+    }
+
+    void setNumberOfValues(uint64_t value) override {
+      _stats.setNumberOfValues(value);
+    }
+
+    bool hasNull() const override {
+      return _stats.hasNull();
+    }
+
+    void setHasNull(bool hasNull) override {
+      _stats.setHasNull(hasNull);
+    }
+
     const std::string & getMinimum() const override {
-      if(hasMinimum()){ 
-        return _stats.getMinimum(); 
-      }else{ 
-        throw ParseError("Minimum is not defined."); 
-      } 
-    } 
- 
+      if(hasMinimum()){
+        return _stats.getMinimum();
+      }else{
+        throw ParseError("Minimum is not defined.");
+      }
+    }
+
     const std::string & getMaximum() const override {
-      if(hasMaximum()){ 
-        return _stats.getMaximum(); 
-      }else{ 
-        throw ParseError("Maximum is not defined."); 
-      } 
-    } 
- 
-    void setMinimum(std::string minimum) { 
-      _stats.setHasMinimum(true); 
-      _stats.setMinimum(minimum); 
-    } 
- 
-    void setMaximum(std::string maximum) { 
-      _stats.setHasMaximum(true); 
-      _stats.setMaximum(maximum); 
-    } 
- 
-    uint64_t getTotalLength() const override { 
-      if(hasTotalLength()){ 
-        return _stats.getTotalLength(); 
-      }else{ 
-        throw ParseError("Total length is not defined."); 
-      } 
-    } 
- 
-    void setTotalLength(uint64_t length) { 
-      _stats.setHasTotalLength(true); 
-      _stats.setTotalLength(length); 
-    } 
- 
-    void update(const char* value, size_t length) { 
-      if (value != nullptr) { 
-        if (!_stats.hasMinimum()) { 
-          std::string tempStr(value, value + length); 
-          setMinimum(tempStr); 
-          setMaximum(tempStr); 
-        } else { 
-          // update min 
-          int minCmp = strncmp(_stats.getMinimum().c_str(), 
-                               value, 
-                               std::min(_stats.getMinimum().length(), length)); 
-          if (minCmp > 0 || 
-                (minCmp == 0 && length < _stats.getMinimum().length())) { 
-            setMinimum(std::string(value, value + length)); 
-          } 
- 
-          // update max 
-          int maxCmp = strncmp(_stats.getMaximum().c_str(), 
-                               value, 
-                               std::min(_stats.getMaximum().length(), length)); 
-          if (maxCmp < 0 || 
-                (maxCmp == 0 && length > _stats.getMaximum().length())) { 
-            setMaximum(std::string(value, value + length)); 
-          } 
-        } 
-      } 
- 
-      _stats.setTotalLength(_stats.getTotalLength() + length); 
-    } 
- 
-    void update(std::string value) { 
-      update(value.c_str(), value.length()); 
-    } 
- 
-    void merge(const MutableColumnStatistics& other) override { 
-      const StringColumnStatisticsImpl& strStats = 
-        dynamic_cast<const StringColumnStatisticsImpl&>(other); 
-      _stats.merge(strStats._stats); 
-    } 
- 
-    void reset() override { 
-      _stats.reset(); 
-      setTotalLength(0); 
-    } 
- 
-    void toProtoBuf(proto::ColumnStatistics& pbStats) const override { 
-      pbStats.set_hasnull(_stats.hasNull()); 
-      pbStats.set_numberofvalues(_stats.getNumberOfValues()); 
- 
-      proto::StringStatistics* strStats = pbStats.mutable_stringstatistics(); 
-      if (_stats.hasMinimum()) { 
-        strStats->set_minimum(TString(_stats.getMinimum())); 
-        strStats->set_maximum(TString(_stats.getMaximum())); 
-      } else { 
-        strStats->clear_minimum(); 
-        strStats->clear_maximum(); 
-      } 
-      if (_stats.hasTotalLength()) { 
-        strStats->set_sum(static_cast<int64_t>(_stats.getTotalLength())); 
-      } else { 
-        strStats->clear_sum(); 
-      } 
-    } 
- 
-    std::string toString() const override { 
-      std::ostringstream buffer; 
-      buffer << "Data type: String" << std::endl 
-             << "Values: " << getNumberOfValues() << std::endl 
-             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; 
-      if(hasMinimum()){ 
-        buffer << "Minimum: " << getMinimum() << std::endl; 
-      }else{ 
-        buffer << "Minimum is not defined" << std::endl; 
-      } 
- 
-      if(hasMaximum()){ 
-        buffer << "Maximum: " << getMaximum() << std::endl; 
-      }else{ 
-        buffer << "Maximum is not defined" << std::endl; 
-      } 
- 
-      if(hasTotalLength()){ 
-        buffer << "Total length: " << getTotalLength() << std::endl; 
-      }else{ 
-        buffer << "Total length is not defined" << std::endl; 
-      } 
-      return buffer.str(); 
-    } 
-  }; 
- 
-  class TimestampColumnStatisticsImpl: public TimestampColumnStatistics, 
-                                       public MutableColumnStatistics { 
-  private: 
-    InternalIntegerStatistics _stats; 
-    bool _hasLowerBound; 
-    bool _hasUpperBound; 
-    int64_t _lowerBound; 
-    int64_t _upperBound; 
- 
-  public: 
-    TimestampColumnStatisticsImpl() { reset(); } 
-    TimestampColumnStatisticsImpl(const proto::ColumnStatistics& stats, 
-                                  const StatContext& statContext); 
-    virtual ~TimestampColumnStatisticsImpl() override; 
- 
-    bool hasMinimum() const override { 
-      return _stats.hasMinimum(); 
-    } 
- 
-    bool hasMaximum() const override { 
-      return _stats.hasMaximum(); 
-    } 
- 
-    uint64_t getNumberOfValues() const override { 
-      return _stats.getNumberOfValues(); 
-    } 
- 
-    void setNumberOfValues(uint64_t value) override { 
-      _stats.setNumberOfValues(value); 
-    } 
- 
-    void increase(uint64_t count) override { 
-      _stats.setNumberOfValues(_stats.getNumberOfValues() + count); 
-    } 
- 
-    bool hasNull() const override { 
-      return _stats.hasNull(); 
-    } 
- 
-    void setHasNull(bool hasNull) override { 
-      _stats.setHasNull(hasNull); 
-    } 
- 
-    int64_t getMinimum() const override { 
-      if(hasMinimum()){ 
-        return _stats.getMinimum(); 
-      }else{ 
-        throw ParseError("Minimum is not defined."); 
-      } 
-    } 
- 
-    int64_t getMaximum() const override { 
-      if(hasMaximum()){ 
-        return _stats.getMaximum(); 
-      }else{ 
-        throw ParseError("Maximum is not defined."); 
-      } 
-    } 
- 
-    void setMinimum(int64_t minimum) { 
-      _stats.setHasMinimum(true); 
-      _stats.setMinimum(minimum); 
-    } 
- 
-    void setMaximum(int64_t maximum) { 
-      _stats.setHasMaximum(true); 
-      _stats.setMaximum(maximum); 
-    } 
- 
-    void update(int64_t value) { 
-      _stats.updateMinMax(value); 
-    } 
- 
-    void merge(const MutableColumnStatistics& other) override { 
-      const TimestampColumnStatisticsImpl& tsStats = 
-        dynamic_cast<const TimestampColumnStatisticsImpl&>(other); 
-      _stats.merge(tsStats._stats); 
-    } 
- 
-    void reset() override { 
-      _stats.reset(); 
-    } 
- 
-    void toProtoBuf(proto::ColumnStatistics& pbStats) const override { 
-      pbStats.set_hasnull(_stats.hasNull()); 
-      pbStats.set_numberofvalues(_stats.getNumberOfValues()); 
- 
-      proto::TimestampStatistics* tsStats = 
-        pbStats.mutable_timestampstatistics(); 
-      if (_stats.hasMinimum()) { 
-        tsStats->set_minimumutc(_stats.getMinimum()); 
-        tsStats->set_maximumutc(_stats.getMaximum()); 
-      } else { 
-        tsStats->clear_minimumutc(); 
-        tsStats->clear_maximumutc(); 
-      } 
-    } 
- 
-    std::string toString() const override { 
-      std::ostringstream buffer; 
-      struct tm tmValue; 
-      char timeBuffer[20]; 
-      time_t secs = 0; 
- 
-      buffer << "Data type: Timestamp" << std::endl 
-             << "Values: " << getNumberOfValues() << std::endl 
-             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; 
-      if(hasMinimum()){ 
-        secs = static_cast<time_t>(getMinimum() / 1000); 
-        gmtime_r(&secs, &tmValue); 
-        strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); 
-        buffer << "Minimum: " << timeBuffer << "." 
-               << (getMinimum() % 1000) << std::endl; 
-      }else{ 
-        buffer << "Minimum is not defined" << std::endl; 
-      } 
- 
-      if(hasLowerBound()){ 
-        secs = static_cast<time_t>(getLowerBound() / 1000); 
-        gmtime_r(&secs, &tmValue); 
-        strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); 
-        buffer << "LowerBound: " << timeBuffer << "." 
-               << (getLowerBound() % 1000) << std::endl; 
-      }else{ 
-        buffer << "LowerBound is not defined" << std::endl; 
-      } 
- 
-      if(hasMaximum()){ 
-        secs = static_cast<time_t>(getMaximum()/1000); 
-        gmtime_r(&secs, &tmValue); 
-        strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); 
-        buffer << "Maximum: " << timeBuffer << "." 
-               << (getMaximum() % 1000) << std::endl; 
-      }else{ 
-        buffer << "Maximum is not defined" << std::endl; 
-      } 
- 
-      if(hasUpperBound()){ 
-        secs = static_cast<time_t>(getUpperBound() / 1000); 
-        gmtime_r(&secs, &tmValue); 
-        strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); 
-        buffer << "UpperBound: " << timeBuffer << "." 
-               << (getUpperBound() % 1000) << std::endl; 
-      }else{ 
-        buffer << "UpperBound is not defined" << std::endl; 
-      } 
- 
-      return buffer.str(); 
-    } 
- 
-    bool hasLowerBound() const override { 
-      return _hasLowerBound; 
-    } 
- 
-    bool hasUpperBound() const override { 
-      return _hasUpperBound; 
-    } 
- 
-    int64_t getLowerBound() const override { 
-      if(hasLowerBound()){ 
-        return _lowerBound; 
-      }else{ 
-        throw ParseError("LowerBound is not defined."); 
-      } 
-    } 
- 
-    int64_t getUpperBound() const override { 
-      if(hasUpperBound()){ 
-        return _upperBound; 
-      }else{ 
-        throw ParseError("UpperBound is not defined."); 
-      } 
-    } 
-  }; 
- 
-  ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s, 
-                                            const StatContext& statContext); 
- 
-  class StatisticsImpl: public Statistics { 
-  private: 
-    std::vector<ColumnStatistics*> colStats; 
- 
-    // DELIBERATELY NOT IMPLEMENTED 
-    StatisticsImpl(const StatisticsImpl&); 
-    StatisticsImpl& operator=(const StatisticsImpl&); 
- 
-  public: 
-    StatisticsImpl(const proto::StripeStatistics& stripeStats, 
-                   const StatContext& statContext); 
- 
-    StatisticsImpl(const proto::Footer& footer, const StatContext& statContext); 
- 
-    virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId 
-                                                        ) const override { 
-      return colStats[columnId]; 
-    } 
- 
-    virtual ~StatisticsImpl() override; 
- 
-    uint32_t getNumberOfColumns() const override { 
-      return static_cast<uint32_t>(colStats.size()); 
-    } 
-  }; 
- 
-  class StripeStatisticsImpl: public StripeStatistics { 
-  private: 
-    std::unique_ptr<StatisticsImpl> columnStats; 
-    std::vector<std::vector<std::shared_ptr<const ColumnStatistics> > > 
-                                                                  rowIndexStats; 
- 
-    // DELIBERATELY NOT IMPLEMENTED 
-    StripeStatisticsImpl(const StripeStatisticsImpl&); 
-    StripeStatisticsImpl& operator=(const StripeStatisticsImpl&); 
- 
-  public: 
-    StripeStatisticsImpl( 
-                const proto::StripeStatistics& stripeStats, 
-                std::vector<std::vector<proto::ColumnStatistics> >& indexStats, 
-                const StatContext& statContext); 
- 
-    virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId 
-                                                        ) const override { 
-      return columnStats->getColumnStatistics(columnId); 
-    } 
- 
-    uint32_t getNumberOfColumns() const override { 
-      return columnStats->getNumberOfColumns(); 
-    } 
- 
-    virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId, 
-                                                          uint32_t rowIndex 
-                                                        ) const override { 
-      // check id indices are valid 
-      return rowIndexStats[columnId][rowIndex].get(); 
-    } 
- 
-    virtual ~StripeStatisticsImpl() override; 
- 
-    uint32_t getNumberOfRowIndexStats(uint32_t columnId) const override { 
-      return static_cast<uint32_t>(rowIndexStats[columnId].size()); 
-    } 
-  }; 
- 
-  /** 
-   * Create ColumnStatistics for writers 
-   * @param type of column 
-   * @return MutableColumnStatistics instances 
-   */ 
-  std::unique_ptr<MutableColumnStatistics> createColumnStatistics( 
-                                                            const Type& type); 
- 
-}// namespace 
- 
-#endif 
+      if(hasMaximum()){
+        return _stats.getMaximum();
+      }else{
+        throw ParseError("Maximum is not defined.");
+      }
+    }
+
+    void setMinimum(std::string minimum) {
+      _stats.setHasMinimum(true);
+      _stats.setMinimum(minimum);
+    }
+
+    void setMaximum(std::string maximum) {
+      _stats.setHasMaximum(true);
+      _stats.setMaximum(maximum);
+    }
+
+    uint64_t getTotalLength() const override {
+      if(hasTotalLength()){
+        return _stats.getTotalLength();
+      }else{
+        throw ParseError("Total length is not defined.");
+      }
+    }
+
+    void setTotalLength(uint64_t length) {
+      _stats.setHasTotalLength(true);
+      _stats.setTotalLength(length);
+    }
+
+    void update(const char* value, size_t length) {
+      if (value != nullptr) {
+        if (!_stats.hasMinimum()) {
+          std::string tempStr(value, value + length);
+          setMinimum(tempStr);
+          setMaximum(tempStr);
+        } else {
+          // update min
+          int minCmp = strncmp(_stats.getMinimum().c_str(),
+                               value,
+                               std::min(_stats.getMinimum().length(), length));
+          if (minCmp > 0 ||
+                (minCmp == 0 && length < _stats.getMinimum().length())) {
+            setMinimum(std::string(value, value + length));
+          }
+
+          // update max
+          int maxCmp = strncmp(_stats.getMaximum().c_str(),
+                               value,
+                               std::min(_stats.getMaximum().length(), length));
+          if (maxCmp < 0 ||
+                (maxCmp == 0 && length > _stats.getMaximum().length())) {
+            setMaximum(std::string(value, value + length));
+          }
+        }
+      }
+
+      _stats.setTotalLength(_stats.getTotalLength() + length);
+    }
+
+    void update(std::string value) {
+      update(value.c_str(), value.length());
+    }
+
+    void merge(const MutableColumnStatistics& other) override {
+      const StringColumnStatisticsImpl& strStats =
+        dynamic_cast<const StringColumnStatisticsImpl&>(other);
+      _stats.merge(strStats._stats);
+    }
+
+    void reset() override {
+      _stats.reset();
+      setTotalLength(0);
+    }
+
+    void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+      pbStats.set_hasnull(_stats.hasNull());
+      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+      proto::StringStatistics* strStats = pbStats.mutable_stringstatistics();
+      if (_stats.hasMinimum()) {
+        strStats->set_minimum(TString(_stats.getMinimum()));
+        strStats->set_maximum(TString(_stats.getMaximum()));
+      } else {
+        strStats->clear_minimum();
+        strStats->clear_maximum();
+      }
+      if (_stats.hasTotalLength()) {
+        strStats->set_sum(static_cast<int64_t>(_stats.getTotalLength()));
+      } else {
+        strStats->clear_sum();
+      }
+    }
+
+    std::string toString() const override {
+      std::ostringstream buffer;
+      buffer << "Data type: String" << std::endl
+             << "Values: " << getNumberOfValues() << std::endl
+             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+      if(hasMinimum()){
+        buffer << "Minimum: " << getMinimum() << std::endl;
+      }else{
+        buffer << "Minimum is not defined" << std::endl;
+      }
+
+      if(hasMaximum()){
+        buffer << "Maximum: " << getMaximum() << std::endl;
+      }else{
+        buffer << "Maximum is not defined" << std::endl;
+      }
+
+      if(hasTotalLength()){
+        buffer << "Total length: " << getTotalLength() << std::endl;
+      }else{
+        buffer << "Total length is not defined" << std::endl;
+      }
+      return buffer.str();
+    }
+  };
+
+  class TimestampColumnStatisticsImpl: public TimestampColumnStatistics,
+                                       public MutableColumnStatistics {
+  private:
+    InternalIntegerStatistics _stats;
+    bool _hasLowerBound;
+    bool _hasUpperBound;
+    int64_t _lowerBound;
+    int64_t _upperBound;
+
+  public:
+    TimestampColumnStatisticsImpl() { reset(); }
+    TimestampColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+                                  const StatContext& statContext);
+    virtual ~TimestampColumnStatisticsImpl() override;
+
+    bool hasMinimum() const override {
+      return _stats.hasMinimum();
+    }
+
+    bool hasMaximum() const override {
+      return _stats.hasMaximum();
+    }
+
+    uint64_t getNumberOfValues() const override {
+      return _stats.getNumberOfValues();
+    }
+
+    void setNumberOfValues(uint64_t value) override {
+      _stats.setNumberOfValues(value);
+    }
+
+    void increase(uint64_t count) override {
+      _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+    }
+
+    bool hasNull() const override {
+      return _stats.hasNull();
+    }
+
+    void setHasNull(bool hasNull) override {
+      _stats.setHasNull(hasNull);
+    }
+
+    int64_t getMinimum() const override {
+      if(hasMinimum()){
+        return _stats.getMinimum();
+      }else{
+        throw ParseError("Minimum is not defined.");
+      }
+    }
+
+    int64_t getMaximum() const override {
+      if(hasMaximum()){
+        return _stats.getMaximum();
+      }else{
+        throw ParseError("Maximum is not defined.");
+      }
+    }
+
+    void setMinimum(int64_t minimum) {
+      _stats.setHasMinimum(true);
+      _stats.setMinimum(minimum);
+    }
+
+    void setMaximum(int64_t maximum) {
+      _stats.setHasMaximum(true);
+      _stats.setMaximum(maximum);
+    }
+
+    void update(int64_t value) {
+      _stats.updateMinMax(value);
+    }
+
+    void merge(const MutableColumnStatistics& other) override {
+      const TimestampColumnStatisticsImpl& tsStats =
+        dynamic_cast<const TimestampColumnStatisticsImpl&>(other);
+      _stats.merge(tsStats._stats);
+    }
+
+    void reset() override {
+      _stats.reset();
+    }
+
+    void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+      pbStats.set_hasnull(_stats.hasNull());
+      pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+      proto::TimestampStatistics* tsStats =
+        pbStats.mutable_timestampstatistics();
+      if (_stats.hasMinimum()) {
+        tsStats->set_minimumutc(_stats.getMinimum());
+        tsStats->set_maximumutc(_stats.getMaximum());
+      } else {
+        tsStats->clear_minimumutc();
+        tsStats->clear_maximumutc();
+      }
+    }
+
+    std::string toString() const override {
+      std::ostringstream buffer;
+      struct tm tmValue;
+      char timeBuffer[20];
+      time_t secs = 0;
+
+      buffer << "Data type: Timestamp" << std::endl
+             << "Values: " << getNumberOfValues() << std::endl
+             << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+      if(hasMinimum()){
+        secs = static_cast<time_t>(getMinimum() / 1000);
+        gmtime_r(&secs, &tmValue);
+        strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
+        buffer << "Minimum: " << timeBuffer << "."
+               << (getMinimum() % 1000) << std::endl;
+      }else{
+        buffer << "Minimum is not defined" << std::endl;
+      }
+
+      if(hasLowerBound()){
+        secs = static_cast<time_t>(getLowerBound() / 1000);
+        gmtime_r(&secs, &tmValue);
+        strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
+        buffer << "LowerBound: " << timeBuffer << "."
+               << (getLowerBound() % 1000) << std::endl;
+      }else{
+        buffer << "LowerBound is not defined" << std::endl;
+      }
+
+      if(hasMaximum()){
+        secs = static_cast<time_t>(getMaximum()/1000);
+        gmtime_r(&secs, &tmValue);
+        strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
+        buffer << "Maximum: " << timeBuffer << "."
+               << (getMaximum() % 1000) << std::endl;
+      }else{
+        buffer << "Maximum is not defined" << std::endl;
+      }
+
+      if(hasUpperBound()){
+        secs = static_cast<time_t>(getUpperBound() / 1000);
+        gmtime_r(&secs, &tmValue);
+        strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
+        buffer << "UpperBound: " << timeBuffer << "."
+               << (getUpperBound() % 1000) << std::endl;
+      }else{
+        buffer << "UpperBound is not defined" << std::endl;
+      }
+
+      return buffer.str();
+    }
+
+    bool hasLowerBound() const override {
+      return _hasLowerBound;
+    }
+
+    bool hasUpperBound() const override {
+      return _hasUpperBound;
+    }
+
+    int64_t getLowerBound() const override {
+      if(hasLowerBound()){
+        return _lowerBound;
+      }else{
+        throw ParseError("LowerBound is not defined.");
+      }
+    }
+
+    int64_t getUpperBound() const override {
+      if(hasUpperBound()){
+        return _upperBound;
+      }else{
+        throw ParseError("UpperBound is not defined.");
+      }
+    }
+  };
+
+  ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s,
+                                            const StatContext& statContext);
+
+  class StatisticsImpl: public Statistics {
+  private:
+    std::vector<ColumnStatistics*> colStats;
+
+    // DELIBERATELY NOT IMPLEMENTED
+    StatisticsImpl(const StatisticsImpl&);
+    StatisticsImpl& operator=(const StatisticsImpl&);
+
+  public:
+    StatisticsImpl(const proto::StripeStatistics& stripeStats,
+                   const StatContext& statContext);
+
+    StatisticsImpl(const proto::Footer& footer, const StatContext& statContext);
+
+    virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId
+                                                        ) const override {
+      return colStats[columnId];
+    }
+
+    virtual ~StatisticsImpl() override;
+
+    uint32_t getNumberOfColumns() const override {
+      return static_cast<uint32_t>(colStats.size());
+    }
+  };
+
+  class StripeStatisticsImpl: public StripeStatistics {
+  private:
+    std::unique_ptr<StatisticsImpl> columnStats;
+    std::vector<std::vector<std::shared_ptr<const ColumnStatistics> > >
+                                                                  rowIndexStats;
+
+    // DELIBERATELY NOT IMPLEMENTED
+    StripeStatisticsImpl(const StripeStatisticsImpl&);
+    StripeStatisticsImpl& operator=(const StripeStatisticsImpl&);
+
+  public:
+    StripeStatisticsImpl(
+                const proto::StripeStatistics& stripeStats,
+                std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
+                const StatContext& statContext);
+
+    virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId
+                                                        ) const override {
+      return columnStats->getColumnStatistics(columnId);
+    }
+
+    uint32_t getNumberOfColumns() const override {
+      return columnStats->getNumberOfColumns();
+    }
+
+    virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId,
+                                                          uint32_t rowIndex
+                                                        ) const override {
+      // check id indices are valid
+      return rowIndexStats[columnId][rowIndex].get();
+    }
+
+    virtual ~StripeStatisticsImpl() override;
+
+    uint32_t getNumberOfRowIndexStats(uint32_t columnId) const override {
+      return static_cast<uint32_t>(rowIndexStats[columnId].size());
+    }
+  };
+
+  /**
+   * Create ColumnStatistics for writers
+   * @param type of column
+   * @return MutableColumnStatistics instances
+   */
+  std::unique_ptr<MutableColumnStatistics> createColumnStatistics(
+                                                            const Type& type);
+
+}// namespace
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/StripeStream.cc b/contrib/libs/apache/orc/c++/src/StripeStream.cc
index f9d82f30e0..b63f19d28e 100644
--- a/contrib/libs/apache/orc/c++/src/StripeStream.cc
+++ b/contrib/libs/apache/orc/c++/src/StripeStream.cc
@@ -1,161 +1,161 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/Exceptions.hh" 
-#include "RLE.hh" 
-#include "Reader.hh" 
-#include "StripeStream.hh" 
- 
-#include "wrap/coded-stream-wrapper.h" 
- 
-namespace orc { 
- 
-  StripeStreamsImpl::StripeStreamsImpl(const RowReaderImpl& _reader, uint64_t _index, 
-                                       const proto::StripeInformation& _stripeInfo, 
-                                       const proto::StripeFooter& _footer, 
-                                       uint64_t _stripeStart, 
-                                       InputStream& _input, 
-                                       const Timezone& _writerTimezone 
-                                       ): reader(_reader), 
-                                          stripeInfo(_stripeInfo), 
-                                          footer(_footer), 
-                                          stripeIndex(_index), 
-                                          stripeStart(_stripeStart), 
-                                          input(_input), 
-                                          writerTimezone(_writerTimezone) { 
-    // PASS 
-  } 
- 
-  StripeStreamsImpl::~StripeStreamsImpl() { 
-    // PASS 
-  } 
- 
-  StreamInformation::~StreamInformation() { 
-    // PASS 
-  } 
- 
-  StripeInformation::~StripeInformation() { 
-    // PASS 
-  } 
- 
- 
-  StreamInformationImpl::~StreamInformationImpl() { 
-    // PASS 
-  } 
- 
-  const std::vector<bool> StripeStreamsImpl::getSelectedColumns() const { 
-    return reader.getSelectedColumns(); 
-  } 
- 
-  proto::ColumnEncoding StripeStreamsImpl::getEncoding(uint64_t columnId 
-                                                       ) const { 
-    return footer.columns(static_cast<int>(columnId)); 
-  } 
- 
-  const Timezone& StripeStreamsImpl::getWriterTimezone() const { 
-    return writerTimezone; 
-  } 
- 
-  std::ostream* StripeStreamsImpl::getErrorStream() const { 
-    return reader.getFileContents().errorStream; 
-  } 
- 
-    std::unique_ptr<SeekableInputStream> 
-  StripeStreamsImpl::getStream(uint64_t columnId, 
-                               proto::Stream_Kind kind, 
-                               bool shouldStream) const { 
-    uint64_t offset = stripeStart; 
-    uint64_t dataEnd = stripeInfo.offset() + stripeInfo.indexlength() + stripeInfo.datalength(); 
-    MemoryPool *pool = reader.getFileContents().pool; 
-    for(int i = 0; i < footer.streams_size(); ++i) { 
-      const proto::Stream& stream = footer.streams(i); 
-      if (stream.has_kind() && 
-          stream.kind() == kind && 
-          stream.column() == static_cast<uint64_t>(columnId)) { 
-        uint64_t streamLength = stream.length(); 
-        uint64_t myBlock = shouldStream ? input.getNaturalReadSize(): streamLength; 
-        if (offset + streamLength > dataEnd) { 
-          std::stringstream msg; 
-          msg << "Malformed stream meta at stream index " << i << " in stripe " << stripeIndex 
-              << ": streamOffset=" << offset << ", streamLength=" << streamLength 
-              << ", stripeOffset=" << stripeInfo.offset() << ", stripeIndexLength=" 
-              << stripeInfo.indexlength() << ", stripeDataLength=" << stripeInfo.datalength(); 
-          throw ParseError(msg.str()); 
-        } 
-        return createDecompressor(reader.getCompression(), 
-                                  std::unique_ptr<SeekableInputStream> 
-                                  (new SeekableFileInputStream 
-                                   (&input, 
-                                    offset, 
-                                    stream.length(), 
-                                    *pool, 
-                                    myBlock)), 
-                                  reader.getCompressionSize(), 
-                                  *pool); 
-      } 
-      offset += stream.length(); 
-    } 
-    return std::unique_ptr<SeekableInputStream>(); 
-  } 
- 
-  MemoryPool& StripeStreamsImpl::getMemoryPool() const { 
-    return *reader.getFileContents().pool; 
-  } 
- 
-  bool StripeStreamsImpl::getThrowOnHive11DecimalOverflow() const { 
-    return reader.getThrowOnHive11DecimalOverflow(); 
-  } 
- 
-  int32_t StripeStreamsImpl::getForcedScaleOnHive11Decimal() const { 
-    return reader.getForcedScaleOnHive11Decimal(); 
-  } 
- 
-  void StripeInformationImpl::ensureStripeFooterLoaded() const { 
-    if (stripeFooter.get() == nullptr) { 
-      std::unique_ptr<SeekableInputStream> pbStream = 
-        createDecompressor(compression, 
-                           std::unique_ptr<SeekableInputStream> 
-                             (new SeekableFileInputStream(stream, 
-                                                          offset + 
-                                                            indexLength + 
-                                                            dataLength, 
-                                                          footerLength, 
-                                                          memory)), 
-                           blockSize, 
-                           memory); 
-      stripeFooter.reset(new proto::StripeFooter()); 
-      if (!stripeFooter->ParseFromZeroCopyStream(pbStream.get())) { 
-        throw ParseError("Failed to parse the stripe footer"); 
-      } 
-    } 
-  } 
- 
-  std::unique_ptr<StreamInformation> 
-     StripeInformationImpl::getStreamInformation(uint64_t streamId) const { 
-    ensureStripeFooterLoaded(); 
-    uint64_t streamOffset = offset; 
-    for(uint64_t s=0; s < streamId; ++s) { 
-      streamOffset += stripeFooter->streams(static_cast<int>(s)).length(); 
-    } 
-    return ORC_UNIQUE_PTR<StreamInformation> 
-      (new StreamInformationImpl(streamOffset, 
-                                 stripeFooter-> 
-                                   streams(static_cast<int>(streamId)))); 
-  } 
- 
-} 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Exceptions.hh"
+#include "RLE.hh"
+#include "Reader.hh"
+#include "StripeStream.hh"
+
+#include "wrap/coded-stream-wrapper.h"
+
+namespace orc {
+
+  StripeStreamsImpl::StripeStreamsImpl(const RowReaderImpl& _reader, uint64_t _index,
+                                       const proto::StripeInformation& _stripeInfo,
+                                       const proto::StripeFooter& _footer,
+                                       uint64_t _stripeStart,
+                                       InputStream& _input,
+                                       const Timezone& _writerTimezone
+                                       ): reader(_reader),
+                                          stripeInfo(_stripeInfo),
+                                          footer(_footer),
+                                          stripeIndex(_index),
+                                          stripeStart(_stripeStart),
+                                          input(_input),
+                                          writerTimezone(_writerTimezone) {
+    // PASS
+  }
+
+  StripeStreamsImpl::~StripeStreamsImpl() {
+    // PASS
+  }
+
+  StreamInformation::~StreamInformation() {
+    // PASS
+  }
+
+  StripeInformation::~StripeInformation() {
+    // PASS
+  }
+
+
+  StreamInformationImpl::~StreamInformationImpl() {
+    // PASS
+  }
+
+  const std::vector<bool> StripeStreamsImpl::getSelectedColumns() const {
+    return reader.getSelectedColumns();
+  }
+
+  proto::ColumnEncoding StripeStreamsImpl::getEncoding(uint64_t columnId
+                                                       ) const {
+    return footer.columns(static_cast<int>(columnId));
+  }
+
+  const Timezone& StripeStreamsImpl::getWriterTimezone() const {
+    return writerTimezone;
+  }
+
+  std::ostream* StripeStreamsImpl::getErrorStream() const {
+    return reader.getFileContents().errorStream;
+  }
+
+    std::unique_ptr<SeekableInputStream>
+  StripeStreamsImpl::getStream(uint64_t columnId,
+                               proto::Stream_Kind kind,
+                               bool shouldStream) const {
+    uint64_t offset = stripeStart;
+    uint64_t dataEnd = stripeInfo.offset() + stripeInfo.indexlength() + stripeInfo.datalength();
+    MemoryPool *pool = reader.getFileContents().pool;
+    for(int i = 0; i < footer.streams_size(); ++i) {
+      const proto::Stream& stream = footer.streams(i);
+      if (stream.has_kind() &&
+          stream.kind() == kind &&
+          stream.column() == static_cast<uint64_t>(columnId)) {
+        uint64_t streamLength = stream.length();
+        uint64_t myBlock = shouldStream ? input.getNaturalReadSize(): streamLength;
+        if (offset + streamLength > dataEnd) {
+          std::stringstream msg;
+          msg << "Malformed stream meta at stream index " << i << " in stripe " << stripeIndex
+              << ": streamOffset=" << offset << ", streamLength=" << streamLength
+              << ", stripeOffset=" << stripeInfo.offset() << ", stripeIndexLength="
+              << stripeInfo.indexlength() << ", stripeDataLength=" << stripeInfo.datalength();
+          throw ParseError(msg.str());
+        }
+        return createDecompressor(reader.getCompression(),
+                                  std::unique_ptr<SeekableInputStream>
+                                  (new SeekableFileInputStream
+                                   (&input,
+                                    offset,
+                                    stream.length(),
+                                    *pool,
+                                    myBlock)),
+                                  reader.getCompressionSize(),
+                                  *pool);
+      }
+      offset += stream.length();
+    }
+    return std::unique_ptr<SeekableInputStream>();
+  }
+
+  MemoryPool& StripeStreamsImpl::getMemoryPool() const {
+    return *reader.getFileContents().pool;
+  }
+
+  bool StripeStreamsImpl::getThrowOnHive11DecimalOverflow() const {
+    return reader.getThrowOnHive11DecimalOverflow();
+  }
+
+  int32_t StripeStreamsImpl::getForcedScaleOnHive11Decimal() const {
+    return reader.getForcedScaleOnHive11Decimal();
+  }
+
+  void StripeInformationImpl::ensureStripeFooterLoaded() const {
+    if (stripeFooter.get() == nullptr) {
+      std::unique_ptr<SeekableInputStream> pbStream =
+        createDecompressor(compression,
+                           std::unique_ptr<SeekableInputStream>
+                             (new SeekableFileInputStream(stream,
+                                                          offset +
+                                                            indexLength +
+                                                            dataLength,
+                                                          footerLength,
+                                                          memory)),
+                           blockSize,
+                           memory);
+      stripeFooter.reset(new proto::StripeFooter());
+      if (!stripeFooter->ParseFromZeroCopyStream(pbStream.get())) {
+        throw ParseError("Failed to parse the stripe footer");
+      }
+    }
+  }
+
+  std::unique_ptr<StreamInformation>
+     StripeInformationImpl::getStreamInformation(uint64_t streamId) const {
+    ensureStripeFooterLoaded();
+    uint64_t streamOffset = offset;
+    for(uint64_t s=0; s < streamId; ++s) {
+      streamOffset += stripeFooter->streams(static_cast<int>(s)).length();
+    }
+    return ORC_UNIQUE_PTR<StreamInformation>
+      (new StreamInformationImpl(streamOffset,
+                                 stripeFooter->
+                                   streams(static_cast<int>(streamId))));
+  }
+
+}
diff --git a/contrib/libs/apache/orc/c++/src/StripeStream.hh b/contrib/libs/apache/orc/c++/src/StripeStream.hh
index da5cb16f37..5cbaf60a69 100644
--- a/contrib/libs/apache/orc/c++/src/StripeStream.hh
+++ b/contrib/libs/apache/orc/c++/src/StripeStream.hh
@@ -1,213 +1,213 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_STRIPE_STREAM_HH 
-#define ORC_STRIPE_STREAM_HH 
- 
-#include "orc/Int128.hh" 
-#include "orc/OrcFile.hh" 
-#include "orc/Reader.hh" 
- 
-#include "Timezone.hh" 
-#include "TypeImpl.hh" 
- 
-namespace orc { 
- 
-  class RowReaderImpl; 
- 
-  /** 
-  * StripeStream Implementation 
-  */ 
- 
-  class StripeStreamsImpl: public StripeStreams { 
-  private: 
-    const RowReaderImpl& reader; 
-    const proto::StripeInformation& stripeInfo; 
-    const proto::StripeFooter& footer; 
-    const uint64_t stripeIndex; 
-    const uint64_t stripeStart; 
-    InputStream& input; 
-    const Timezone& writerTimezone; 
- 
-  public: 
-    StripeStreamsImpl(const RowReaderImpl& reader, uint64_t index, 
-                      const proto::StripeInformation& stripeInfo, 
-                      const proto::StripeFooter& footer, 
-                      uint64_t stripeStart, 
-                      InputStream& input, 
-                      const Timezone& writerTimezone); 
- 
-    virtual ~StripeStreamsImpl() override; 
- 
-    virtual const std::vector<bool> getSelectedColumns() const override; 
- 
-    virtual proto::ColumnEncoding getEncoding(uint64_t columnId 
-                                              ) const override; 
- 
-    virtual std::unique_ptr<SeekableInputStream> 
-    getStream(uint64_t columnId, 
-              proto::Stream_Kind kind, 
-              bool shouldStream) const override; 
- 
-    MemoryPool& getMemoryPool() const override; 
- 
-    const Timezone& getWriterTimezone() const override; 
- 
-    std::ostream* getErrorStream() const override; 
- 
-    bool getThrowOnHive11DecimalOverflow() const override; 
- 
-    int32_t getForcedScaleOnHive11Decimal() const override; 
-  }; 
- 
- /** 
-  * StreamInformation Implementation 
-  */ 
- 
-  class StreamInformationImpl: public StreamInformation { 
-  private: 
-    StreamKind kind; 
-    uint64_t column; 
-    uint64_t offset; 
-    uint64_t length; 
-  public: 
-    StreamInformationImpl(uint64_t _offset, 
-                          const proto::Stream& stream 
-                          ): kind(static_cast<StreamKind>(stream.kind())), 
-                             column(stream.column()), 
-                             offset(_offset), 
-                             length(stream.length()) { 
-      // PASS 
-    } 
- 
-    ~StreamInformationImpl() override; 
- 
-    StreamKind getKind() const override { 
-      return kind; 
-    } 
- 
-    uint64_t getColumnId() const override { 
-      return column; 
-    } 
- 
-    uint64_t getOffset() const override { 
-      return offset; 
-    } 
- 
-    uint64_t getLength() const override { 
-      return length; 
-    } 
-  }; 
- 
- /** 
- * StripeInformation Implementation 
- */ 
- 
-  class StripeInformationImpl : public StripeInformation { 
-    uint64_t offset; 
-    uint64_t indexLength; 
-    uint64_t dataLength; 
-    uint64_t footerLength; 
-    uint64_t numRows; 
-    InputStream* stream; 
-    MemoryPool& memory; 
-    CompressionKind compression; 
-    uint64_t blockSize; 
-    mutable std::unique_ptr<proto::StripeFooter> stripeFooter; 
-    void ensureStripeFooterLoaded() const; 
-  public: 
- 
-    StripeInformationImpl(uint64_t _offset, 
-                          uint64_t _indexLength, 
-                          uint64_t _dataLength, 
-                          uint64_t _footerLength, 
-                          uint64_t _numRows, 
-                          InputStream* _stream, 
-                          MemoryPool& _memory, 
-                          CompressionKind _compression, 
-                          uint64_t _blockSize 
-                          ) : offset(_offset), 
-                              indexLength(_indexLength), 
-                              dataLength(_dataLength), 
-                              footerLength(_footerLength), 
-                              numRows(_numRows), 
-                              stream(_stream), 
-                              memory(_memory), 
-                              compression(_compression), 
-                              blockSize(_blockSize) { 
-      // PASS 
-    } 
- 
-    virtual ~StripeInformationImpl() override { 
-      // PASS 
-    } 
- 
-    uint64_t getOffset() const override { 
-      return offset; 
-    } 
- 
-    uint64_t getLength() const override { 
-      return indexLength + dataLength + footerLength; 
-    } 
-    uint64_t getIndexLength() const override { 
-      return indexLength; 
-    } 
- 
-    uint64_t getDataLength()const override { 
-      return dataLength; 
-    } 
- 
-    uint64_t getFooterLength() const override { 
-      return footerLength; 
-    } 
- 
-    uint64_t getNumberOfRows() const override { 
-      return numRows; 
-    } 
- 
-    uint64_t getNumberOfStreams() const override { 
-      ensureStripeFooterLoaded(); 
-      return static_cast<uint64_t>(stripeFooter->streams_size()); 
-    } 
- 
-    std::unique_ptr<StreamInformation> getStreamInformation(uint64_t streamId 
-                                                            ) const override; 
- 
-    ColumnEncodingKind getColumnEncoding(uint64_t colId) const override { 
-      ensureStripeFooterLoaded(); 
-      return static_cast<ColumnEncodingKind>(stripeFooter-> 
-                                             columns(static_cast<int>(colId)) 
-                                             .kind()); 
-    } 
- 
-    uint64_t getDictionarySize(uint64_t colId) const override { 
-      ensureStripeFooterLoaded(); 
-      return static_cast<ColumnEncodingKind>(stripeFooter-> 
-                                             columns(static_cast<int>(colId)) 
-                                             .dictionarysize()); 
-    } 
- 
-    const std::string& getWriterTimezone() const override { 
-      ensureStripeFooterLoaded(); 
-      return stripeFooter->writertimezone(); 
-    } 
-  }; 
- 
-} 
- 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_STRIPE_STREAM_HH
+#define ORC_STRIPE_STREAM_HH
+
+#include "orc/Int128.hh"
+#include "orc/OrcFile.hh"
+#include "orc/Reader.hh"
+
+#include "Timezone.hh"
+#include "TypeImpl.hh"
+
+namespace orc {
+
+  class RowReaderImpl;
+
+  /**
+  * StripeStream Implementation
+  */
+
+  class StripeStreamsImpl: public StripeStreams {
+  private:
+    const RowReaderImpl& reader;
+    const proto::StripeInformation& stripeInfo;
+    const proto::StripeFooter& footer;
+    const uint64_t stripeIndex;
+    const uint64_t stripeStart;
+    InputStream& input;
+    const Timezone& writerTimezone;
+
+  public:
+    StripeStreamsImpl(const RowReaderImpl& reader, uint64_t index,
+                      const proto::StripeInformation& stripeInfo,
+                      const proto::StripeFooter& footer,
+                      uint64_t stripeStart,
+                      InputStream& input,
+                      const Timezone& writerTimezone);
+
+    virtual ~StripeStreamsImpl() override;
+
+    virtual const std::vector<bool> getSelectedColumns() const override;
+
+    virtual proto::ColumnEncoding getEncoding(uint64_t columnId
+                                              ) const override;
+
+    virtual std::unique_ptr<SeekableInputStream>
+    getStream(uint64_t columnId,
+              proto::Stream_Kind kind,
+              bool shouldStream) const override;
+
+    MemoryPool& getMemoryPool() const override;
+
+    const Timezone& getWriterTimezone() const override;
+
+    std::ostream* getErrorStream() const override;
+
+    bool getThrowOnHive11DecimalOverflow() const override;
+
+    int32_t getForcedScaleOnHive11Decimal() const override;
+  };
+
+ /**
+  * StreamInformation Implementation
+  */
+
+  class StreamInformationImpl: public StreamInformation {
+  private:
+    StreamKind kind;
+    uint64_t column;
+    uint64_t offset;
+    uint64_t length;
+  public:
+    StreamInformationImpl(uint64_t _offset,
+                          const proto::Stream& stream
+                          ): kind(static_cast<StreamKind>(stream.kind())),
+                             column(stream.column()),
+                             offset(_offset),
+                             length(stream.length()) {
+      // PASS
+    }
+
+    ~StreamInformationImpl() override;
+
+    StreamKind getKind() const override {
+      return kind;
+    }
+
+    uint64_t getColumnId() const override {
+      return column;
+    }
+
+    uint64_t getOffset() const override {
+      return offset;
+    }
+
+    uint64_t getLength() const override {
+      return length;
+    }
+  };
+
+ /**
+ * StripeInformation Implementation
+ */
+
+  class StripeInformationImpl : public StripeInformation {
+    uint64_t offset;
+    uint64_t indexLength;
+    uint64_t dataLength;
+    uint64_t footerLength;
+    uint64_t numRows;
+    InputStream* stream;
+    MemoryPool& memory;
+    CompressionKind compression;
+    uint64_t blockSize;
+    mutable std::unique_ptr<proto::StripeFooter> stripeFooter;
+    void ensureStripeFooterLoaded() const;
+  public:
+
+    StripeInformationImpl(uint64_t _offset,
+                          uint64_t _indexLength,
+                          uint64_t _dataLength,
+                          uint64_t _footerLength,
+                          uint64_t _numRows,
+                          InputStream* _stream,
+                          MemoryPool& _memory,
+                          CompressionKind _compression,
+                          uint64_t _blockSize
+                          ) : offset(_offset),
+                              indexLength(_indexLength),
+                              dataLength(_dataLength),
+                              footerLength(_footerLength),
+                              numRows(_numRows),
+                              stream(_stream),
+                              memory(_memory),
+                              compression(_compression),
+                              blockSize(_blockSize) {
+      // PASS
+    }
+
+    virtual ~StripeInformationImpl() override {
+      // PASS
+    }
+
+    uint64_t getOffset() const override {
+      return offset;
+    }
+
+    uint64_t getLength() const override {
+      return indexLength + dataLength + footerLength;
+    }
+    uint64_t getIndexLength() const override {
+      return indexLength;
+    }
+
+    uint64_t getDataLength()const override {
+      return dataLength;
+    }
+
+    uint64_t getFooterLength() const override {
+      return footerLength;
+    }
+
+    uint64_t getNumberOfRows() const override {
+      return numRows;
+    }
+
+    uint64_t getNumberOfStreams() const override {
+      ensureStripeFooterLoaded();
+      return static_cast<uint64_t>(stripeFooter->streams_size());
+    }
+
+    std::unique_ptr<StreamInformation> getStreamInformation(uint64_t streamId
+                                                            ) const override;
+
+    ColumnEncodingKind getColumnEncoding(uint64_t colId) const override {
+      ensureStripeFooterLoaded();
+      return static_cast<ColumnEncodingKind>(stripeFooter->
+                                             columns(static_cast<int>(colId))
+                                             .kind());
+    }
+
+    uint64_t getDictionarySize(uint64_t colId) const override {
+      ensureStripeFooterLoaded();
+      return static_cast<ColumnEncodingKind>(stripeFooter->
+                                             columns(static_cast<int>(colId))
+                                             .dictionarysize());
+    }
+
+    const std::string& getWriterTimezone() const override {
+      ensureStripeFooterLoaded();
+      return stripeFooter->writertimezone();
+    }
+  };
+
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/Timezone.cc b/contrib/libs/apache/orc/c++/src/Timezone.cc
index 0aa66ef71c..318e5bcc12 100644
--- a/contrib/libs/apache/orc/c++/src/Timezone.cc
+++ b/contrib/libs/apache/orc/c++/src/Timezone.cc
@@ -1,936 +1,936 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/OrcFile.hh" 
-#include "Timezone.hh" 
- 
-#include <errno.h> 
-#include <map> 
-#include <sstream> 
-#include <stdint.h> 
-#include <stdlib.h> 
-#include <string.h> 
-#include <time.h> 
- 
-namespace orc { 
- 
-  // default location of the timezone files 
-  static const char DEFAULT_TZDIR[] = "/usr/share/zoneinfo"; 
- 
-  // location of a symlink to the local timezone 
-  static const char LOCAL_TIMEZONE[] = "/etc/localtime"; 
- 
-  enum TransitionKind { 
-    TRANSITION_JULIAN, 
-    TRANSITION_DAY, 
-    TRANSITION_MONTH 
-  }; 
- 
-  static const int64_t MONTHS_PER_YEAR = 12; 
-  /** 
-   * The number of days in each month in non-leap and leap years. 
-   */ 
-  static const int64_t DAYS_PER_MONTH[2][MONTHS_PER_YEAR] = 
-     {{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, 
-      {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; 
-  static const int64_t DAYS_PER_WEEK = 7; 
- 
-  // Leap years and day of the week repeat every 400 years, which makes it 
-  // a good cycle length. 
-  static const int64_t SECONDS_PER_400_YEARS = 
-    SECONDS_PER_DAY * (365 * (300 + 3) + 366 * (100 - 3)); 
- 
-  /** 
-   * Is the given year a leap year? 
-   */ 
-  bool isLeap(int64_t year) { 
-    return (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0)); 
-  } 
- 
-  /** 
-   * Find the position that is the closest and less than or equal to the 
-   * target. 
-   * @return -1 if the target < array[0] or array is empty or 
-   *          i if array[i] <= target and (i == n or array[i] < array[i+1]) 
-   */ 
-  int64_t binarySearch(const std::vector<int64_t> &array, int64_t target) { 
-    uint64_t size = array.size(); 
-    if (size == 0) { 
-      return -1; 
-    } 
-    uint64_t min = 0; 
-    uint64_t max = size - 1; 
-    uint64_t mid = (min + max) / 2; 
-    while ((array[mid] != target) && (min < max)) { 
-      if (array[mid] < target) { 
-        min = mid + 1; 
-      } else if (mid == 0) { 
-        max = 0; 
-      } else { 
-        max = mid - 1; 
-      } 
-      mid = (min + max) / 2; 
-    } 
-    if (target < array[mid]) { 
-      return static_cast<int64_t>(mid) - 1; 
-    } else { 
-      return static_cast<int64_t>(mid); 
-    } 
-  } 
- 
-  struct Transition { 
-    TransitionKind kind; 
-    int64_t day; 
-    int64_t week; 
-    int64_t month; 
-    int64_t time; 
- 
-    std::string toString() const { 
-      std::stringstream buffer; 
-      switch (kind) { 
-      case TRANSITION_JULIAN: 
-        buffer << "julian " << day; 
-        break; 
-      case TRANSITION_DAY: 
-        buffer << "day " << day; 
-        break; 
-      case TRANSITION_MONTH: 
-        buffer << "month " << month << " week " << week << " day " << day; 
-        break; 
-      } 
-      buffer << " at " << (time / (60 * 60)) << ":" << ((time / 60) % 60) 
-             << ":" << (time % 60); 
-      return buffer.str(); 
-    } 
- 
-    /** 
-     * Get the transition time for the given year. 
-     * @param year the year 
-     * @return the number of seconds past local Jan 1 00:00:00 that the 
-     *    transition happens. 
-     */ 
-    int64_t getTime(int64_t year) const { 
-      int64_t result = time; 
-      switch (kind) { 
-      case TRANSITION_JULIAN: 
-        result += SECONDS_PER_DAY * day; 
-        if (day > 60 && isLeap(year)) { 
-          result += SECONDS_PER_DAY; 
-        } 
-        break; 
-      case TRANSITION_DAY: 
-        result += SECONDS_PER_DAY * day; 
-        break; 
-      case TRANSITION_MONTH: { 
-        bool inLeap = isLeap(year); 
-        int64_t adjustedMonth = (month + 9) % 12 + 1; 
-        int64_t adjustedYear = (month <= 2) ? (year - 1) : year; 
-        int64_t adjustedCentury = adjustedYear / 100; 
-        int64_t adjustedRemainder = adjustedYear % 100; 
- 
-        // day of the week of the first day of month 
-        int64_t dayOfWeek = ((26 * adjustedMonth - 2) / 10 + 
-                             1 + adjustedRemainder + adjustedRemainder / 4 + 
-                             adjustedCentury / 4 - 2 * adjustedCentury) % 7; 
-        if (dayOfWeek < 0) { 
-          dayOfWeek += DAYS_PER_WEEK; 
-        } 
- 
-        int64_t d = day - dayOfWeek; 
-        if (d < 0) { 
-          d += DAYS_PER_WEEK; 
-        } 
-        for (int w = 1; w < week; ++w) { 
-          if (d + DAYS_PER_WEEK >= DAYS_PER_MONTH[inLeap][month - 1]) { 
-            break; 
-          } 
-          d += DAYS_PER_WEEK; 
-        } 
-        result += d * SECONDS_PER_DAY; 
- 
-        // Add in the time for the month 
-        for(int m=0; m < month - 1; ++m) { 
-          result += DAYS_PER_MONTH[inLeap][m] * SECONDS_PER_DAY; 
-        } 
-        break; 
-      } 
-      } 
-      return result; 
-    } 
-  }; 
- 
-  /** 
-   * The current rule for finding timezone variants arbitrarily far in 
-   * the future.  They are based on a string representation that 
-   * specifies the standard name and offset. For timezones with 
-   * daylight savings, the string specifies the daylight variant name 
-   * and offset and the rules for switching between them. 
-   * 
-   * rule = <standard name><standard offset><daylight>? 
-   * name = string with no numbers or '+', '-', or ',' 
-   * offset = [-+]?hh(:mm(:ss)?)? 
-   * daylight = <name><offset>,<start day>(/<offset>)?,<end day>(/<offset>)? 
-   * day = J<day without 2/29>|<day with 2/29>|M<month>.<week>.<day of week> 
-   */ 
-  class FutureRuleImpl: public FutureRule { 
-    std::string ruleString; 
-    TimezoneVariant standard; 
-    bool hasDst; 
-    TimezoneVariant dst; 
-    Transition start; 
-    Transition end; 
- 
-    // expanded time_t offsets of transitions 
-    std::vector<int64_t> offsets; 
- 
-    // Is the epoch (1 Jan 1970 00:00) in standard time? 
-    // This code assumes that the transition dates fall in the same order 
-    // each year. Hopefully no timezone regions decide to move across the 
-    // equator, which is about what it would take. 
-    bool startInStd; 
- 
-    void computeOffsets() { 
-      if (!hasDst) { 
-        startInStd = true; 
-        offsets.resize(1); 
-      } else { 
-        // Insert a transition for the epoch and two per a year for the next 
-        // 400 years. We assume that the all even positions are in standard 
-        // time if and only if startInStd and the odd ones are the reverse. 
-        offsets.resize(400 * 2 + 1); 
-        startInStd = start.getTime(1970) < end.getTime(1970); 
-        int64_t base = 0; 
-        for(int64_t year = 1970; year < 1970 + 400; ++year) { 
-          if (startInStd) { 
-            offsets[static_cast<uint64_t>(year - 1970) * 2 + 1] = 
-              base + start.getTime(year) - standard.gmtOffset; 
-            offsets[static_cast<uint64_t>(year - 1970) * 2 + 2] = 
-              base + end.getTime(year) - dst.gmtOffset; 
-          } else { 
-            offsets[static_cast<uint64_t>(year - 1970) * 2 + 1] = 
-              base + end.getTime(year) - dst.gmtOffset; 
-            offsets[static_cast<uint64_t>(year - 1970) * 2 + 2] = 
-              base + start.getTime(year) - standard.gmtOffset; 
-          } 
-          base += (isLeap(year) ? 366 : 365) * SECONDS_PER_DAY; 
-        } 
-      } 
-      offsets[0] = 0; 
-    } 
- 
-  public: 
-    virtual ~FutureRuleImpl() override; 
-    bool isDefined() const override; 
-    const TimezoneVariant& getVariant(int64_t clk) const override; 
-    void print(std::ostream& out) const override; 
- 
-    friend class FutureRuleParser; 
-  }; 
- 
-  FutureRule::~FutureRule() { 
-    // PASS 
-  } 
- 
-  FutureRuleImpl::~FutureRuleImpl() { 
-    // PASS 
-  } 
- 
-  bool FutureRuleImpl::isDefined() const { 
-    return ruleString.size() > 0; 
-  } 
- 
-  const TimezoneVariant& FutureRuleImpl::getVariant(int64_t clk) const { 
-    if (!hasDst) { 
-      return standard; 
-    } else { 
-      int64_t adjusted = clk % SECONDS_PER_400_YEARS; 
-      if (adjusted < 0) { 
-        adjusted += SECONDS_PER_400_YEARS; 
-      } 
-      int64_t idx = binarySearch(offsets, adjusted); 
-      if (startInStd == (idx % 2 == 0)) { 
-        return standard; 
-      } else { 
-        return dst; 
-      } 
-    } 
-  } 
- 
-  void FutureRuleImpl::print(std::ostream& out) const { 
-    if (isDefined()) { 
-      out << "  Future rule: " << ruleString << "\n"; 
-      out << "  standard " << standard.toString() << "\n"; 
-      if (hasDst) { 
-        out << "  dst " << dst.toString() << "\n"; 
-        out << "  start " << start.toString() << "\n"; 
-        out << "  end " << end.toString() << "\n"; 
-      } 
-    } 
-  } 
- 
-  /** 
-   * A parser for the future rule strings. 
-   */ 
-  class FutureRuleParser { 
-  public: 
-    FutureRuleParser(const std::string& str, 
-                     FutureRuleImpl* rule 
-                     ): ruleString(str), 
-                        length(str.size()), 
-                        position(0), 
-                        output(*rule) { 
-      output.ruleString = str; 
-      if (position != length) { 
-        parseName(output.standard.name); 
-        output.standard.gmtOffset = -parseOffset(); 
-        output.standard.isDst = false; 
-        output.hasDst = position < length; 
-        if (output.hasDst) { 
-          parseName(output.dst.name); 
-          output.dst.isDst = true; 
-          if (ruleString[position] != ',') { 
-            output.dst.gmtOffset = -parseOffset(); 
-          } else { 
-            output.dst.gmtOffset = output.standard.gmtOffset + 60 * 60; 
-          } 
-          parseTransition(output.start); 
-          parseTransition(output.end); 
-        } 
-        if (position != length) { 
-          throwError("Extra text"); 
-        } 
-        output.computeOffsets(); 
-      } 
-    } 
- 
-  private: 
- 
-    const std::string& ruleString; 
-    size_t length; 
-    size_t position; 
-    FutureRuleImpl &output; 
- 
-    void throwError(const char *msg) { 
-      std::stringstream buffer; 
-      buffer << msg << " at " << position << " in '" << ruleString << "'"; 
-      throw TimezoneError(buffer.str()); 
-    } 
- 
-    /** 
-     * Parse the names of the form: 
-     *    ([^-+0-9,]+|<[^>]+>) 
-     * and set the output string. 
-     */ 
-    void parseName(std::string& result) { 
-      if (position == length) { 
-        throwError("name required"); 
-      } 
-      size_t start = position; 
-      if (ruleString[position] == '<') { 
-        while (position < length && ruleString[position] != '>') { 
-          position += 1; 
-        } 
-        if (position == length) { 
-          throwError("missing close '>'"); 
-        } 
-        position +=1; 
-      } else { 
-        while (position < length) { 
-          char ch = ruleString[position]; 
-          if (isdigit(ch) || ch == '-' || ch == '+' || ch == ',') { 
-            break; 
-          } 
-          position += 1; 
-        } 
-      } 
-      if (position == start) { 
-        throwError("empty string not allowed"); 
-      } 
-      result = ruleString.substr(start, position - start); 
-    } 
- 
-    /** 
-     * Parse an integer of the form [0-9]+ and return it. 
-     */ 
-    int64_t parseNumber() { 
-      if (position >= length) { 
-        throwError("missing number"); 
-      } 
-      int64_t result = 0; 
-      while (position < length) { 
-        char ch = ruleString[position]; 
-        if (isdigit(ch)) { 
-          result = result * 10 + (ch - '0'); 
-          position += 1; 
-        } else { 
-          break; 
-        } 
-      } 
-      return result; 
-    } 
- 
-    /** 
-     * Parse the offsets of the form: 
-     *    [-+]?[0-9]+(:[0-9]+(:[0-9]+)?)? 
-     * and convert it into a number of seconds. 
-     */ 
-    int64_t parseOffset() { 
-      int64_t scale = 3600; 
-      bool isNegative = false; 
-      if (position < length) { 
-        char ch = ruleString[position]; 
-        isNegative = ch == '-'; 
-        if (ch == '-' || ch == '+') { 
-          position += 1; 
-        } 
-      } 
-      int64_t result = parseNumber() * scale; 
-      while (position < length && scale > 1 && ruleString[position] == ':') { 
-        scale /= 60; 
-        position += 1; 
-        result += parseNumber() * scale; 
-      } 
-      if (isNegative) { 
-        result = -result; 
-      } 
-      return result; 
-    } 
- 
-    /** 
-     * Parse a transition of the following form: 
-     *   ,(J<number>|<number>|M<number>.<number>.<number>)(/<offset>)? 
-     */ 
-    void parseTransition(Transition& transition) { 
-      if (length - position < 2 || ruleString[position] != ',') { 
-        throwError("missing transition"); 
-      } 
-      position += 1; 
-      char ch = ruleString[position]; 
-      if (ch == 'J') { 
-        transition.kind = TRANSITION_JULIAN; 
-        position += 1; 
-        transition.day = parseNumber(); 
-      } else if (ch == 'M') { 
-        transition.kind = TRANSITION_MONTH; 
-        position += 1; 
-        transition.month = parseNumber(); 
-        if (position == length || ruleString[position] != '.') { 
-          throwError("missing first ."); 
-        } 
-        position += 1; 
-        transition.week = parseNumber(); 
-        if (position == length || ruleString[position] != '.') { 
-          throwError("missing second ."); 
-        } 
-        position += 1; 
-        transition.day = parseNumber(); 
-      } else { 
-        transition.kind = TRANSITION_DAY; 
-        transition.day = parseNumber(); 
-      } 
-      if (position < length && ruleString[position] == '/') { 
-        position += 1; 
-        transition.time = parseOffset(); 
-      } else { 
-        transition.time = 2 * 60 * 60; 
-      } 
-    } 
-  }; 
- 
-  /** 
-   * Parse the POSIX TZ string. 
-   */ 
-  std::shared_ptr<FutureRule> parseFutureRule(const std::string& ruleString) { 
-    std::shared_ptr<FutureRule> result(new FutureRuleImpl()); 
-    FutureRuleParser parser(ruleString, 
-                            dynamic_cast<FutureRuleImpl*>(result.get())); 
-    return result; 
-  } 
- 
-  std::string TimezoneVariant::toString() const { 
-    std::stringstream buffer; 
-    buffer << name << " " << gmtOffset; 
-    if (isDst) { 
-      buffer << " (dst)"; 
-    } 
-    return buffer.str(); 
-  } 
- 
-  /** 
-   * An abstraction of the differences between versions. 
-   */ 
-  class VersionParser { 
-  public: 
-    virtual ~VersionParser(); 
- 
-    /** 
-     * Get the version number. 
-     */ 
-    virtual uint64_t getVersion() const = 0; 
- 
-    /** 
-     * Get the number of bytes 
-     */ 
-    virtual uint64_t getTimeSize() const = 0; 
- 
-    /** 
-     * Parse the time at the given location. 
-     */ 
-    virtual int64_t parseTime(const unsigned char* ptr) const = 0; 
- 
-    /** 
-     * Parse the future string 
-     */ 
-    virtual std::string parseFutureString(const unsigned char *ptr, 
-                                          uint64_t offset, 
-                                          uint64_t length) const = 0; 
-  }; 
- 
-  VersionParser::~VersionParser() { 
-    // PASS 
-  } 
- 
-  static uint32_t decode32(const unsigned char* ptr) { 
-    return static_cast<uint32_t>(ptr[0] << 24) | 
-      static_cast<uint32_t>(ptr[1] << 16) | 
-      static_cast<uint32_t>(ptr[2] << 8) | 
-      static_cast<uint32_t>(ptr[3]); 
-  } 
- 
-  class Version1Parser: public VersionParser { 
-  public: 
-    virtual ~Version1Parser() override; 
- 
-    virtual uint64_t getVersion() const override { 
-      return 1; 
-    } 
- 
-    /** 
-     * Get the number of bytes 
-     */ 
-    virtual uint64_t getTimeSize() const override { 
-      return 4; 
-    } 
- 
-    /** 
-     * Parse the time at the given location. 
-     */ 
-    virtual int64_t parseTime(const unsigned char* ptr) const override { 
-      // sign extend from 32 bits 
-      return static_cast<int32_t>(decode32(ptr)); 
-    } 
- 
-    virtual std::string parseFutureString(const unsigned char *, 
-                                          uint64_t, 
-                                          uint64_t) const override { 
-      return ""; 
-    } 
-  }; 
- 
-  Version1Parser::~Version1Parser() { 
-    // PASS 
-  } 
- 
-  class Version2Parser: public VersionParser { 
-  public: 
-    virtual ~Version2Parser() override; 
- 
-    virtual uint64_t getVersion() const override { 
-      return 2; 
-    } 
- 
-    /** 
-     * Get the number of bytes 
-     */ 
-    virtual uint64_t getTimeSize() const override { 
-      return 8; 
-    } 
- 
-    /** 
-     * Parse the time at the given location. 
-     */ 
-    virtual int64_t parseTime(const unsigned char* ptr) const override { 
-      return static_cast<int64_t>(decode32(ptr)) << 32 | decode32(ptr + 4); 
-    } 
- 
-    virtual std::string parseFutureString(const unsigned char *ptr, 
-                                          uint64_t offset, 
-                                          uint64_t length) const override { 
-      return std::string(reinterpret_cast<const char*>(ptr) + offset + 1, 
-                         length - 2); 
-    } 
-  }; 
- 
-  Version2Parser::~Version2Parser() { 
-    // PASS 
-  } 
- 
-  class TimezoneImpl: public Timezone { 
-  public: 
-    TimezoneImpl(const std::string& name, 
-                 const std::vector<unsigned char> bytes); 
-    virtual ~TimezoneImpl() override; 
- 
-    /** 
-     * Get the variant for the given time (time_t). 
-     */ 
-    const TimezoneVariant& getVariant(int64_t clk) const override; 
- 
-    void print(std::ostream&) const override; 
- 
-    uint64_t getVersion() const override { 
-      return version; 
-    } 
- 
-    int64_t getEpoch() const override { 
-      return epoch; 
-    } 
- 
-    int64_t convertToUTC(int64_t clk) const override { 
-      return clk + getVariant(clk).gmtOffset; 
-    } 
- 
-  private: 
-    void parseTimeVariants(const unsigned char* ptr, 
-                           uint64_t variantOffset, 
-                           uint64_t variantCount, 
-                           uint64_t nameOffset, 
-                           uint64_t nameCount); 
-    void parseZoneFile(const unsigned char* ptr, 
-                       uint64_t sectionOffset, 
-                       uint64_t fileLength, 
-                       const VersionParser& version); 
-    // filename 
-    std::string filename; 
- 
-    // the version of the file 
-    uint64_t version; 
- 
-    // the list of variants for this timezone 
-    std::vector<TimezoneVariant> variants; 
- 
-    // the list of the times where the local rules change 
-    std::vector<int64_t> transitions; 
- 
-    // the variant that starts at this transition. 
-    std::vector<uint64_t> currentVariant; 
- 
-    // the variant before the first transition 
-    uint64_t ancientVariant; 
- 
-    // the rule for future times 
-    std::shared_ptr<FutureRule> futureRule; 
- 
-    // the last explicit transition after which we use the future rule 
-    int64_t lastTransition; 
- 
-    // The ORC epoch time in this timezone. 
-    int64_t epoch; 
-  }; 
- 
-  DIAGNOSTIC_PUSH 
-  #ifdef __clang__ 
-    DIAGNOSTIC_IGNORE("-Wglobal-constructors") 
-    DIAGNOSTIC_IGNORE("-Wexit-time-destructors") 
-  #endif 
-  static std::mutex timezone_mutex; 
-  static std::map<std::string, std::shared_ptr<Timezone> > timezoneCache; 
-  DIAGNOSTIC_POP 
- 
-  Timezone::~Timezone() { 
-    // PASS 
-  } 
- 
-  TimezoneImpl::TimezoneImpl(const std::string& _filename, 
-                             const std::vector<unsigned char> buffer 
-                             ): filename(_filename) { 
-    parseZoneFile(&buffer[0], 0, buffer.size(), Version1Parser()); 
-    // Build the literal for the ORC epoch 
-    // 2015 Jan 1 00:00:00 
-    tm epochStruct; 
-    epochStruct.tm_sec = 0; 
-    epochStruct.tm_min = 0; 
-    epochStruct.tm_hour = 0; 
-    epochStruct.tm_mday = 1; 
-    epochStruct.tm_mon = 0; 
-    epochStruct.tm_year = 2015 - 1900; 
-    epochStruct.tm_isdst = 0; 
-    time_t utcEpoch = timegm(&epochStruct); 
-    epoch = utcEpoch - getVariant(utcEpoch).gmtOffset; 
-  } 
- 
-  const char* getTimezoneDirectory() { 
-    const char *dir = getenv("TZDIR"); 
-    if (!dir) { 
-      dir = DEFAULT_TZDIR; 
-    } 
-    return dir; 
-  } 
- 
-  /** 
-   * Get a timezone by absolute filename. 
-   * Results are cached. 
-   */ 
-  const Timezone& getTimezoneByFilename(const std::string& filename) { 
-    // ORC-110 
-    std::lock_guard<std::mutex> timezone_lock(timezone_mutex); 
-    std::map<std::string, std::shared_ptr<Timezone> >::iterator itr = 
-      timezoneCache.find(filename); 
-    if (itr != timezoneCache.end()) { 
-      return *(itr->second).get(); 
-    } 
-    try { 
-      ORC_UNIQUE_PTR<InputStream> file = readFile(filename); 
-      size_t size = static_cast<size_t>(file->getLength()); 
-      std::vector<unsigned char> buffer(size); 
-      file->read(&buffer[0], size, 0); 
-      timezoneCache[filename] = std::shared_ptr<Timezone>(new TimezoneImpl(filename, buffer)); 
-    } catch(ParseError& err) { 
-      throw TimezoneError(err.what()); 
-    } 
-    return *timezoneCache[filename].get(); 
-  } 
- 
-  /** 
-   * Get the local timezone. 
-   */ 
-  const Timezone& getLocalTimezone() { 
-#ifdef _MSC_VER 
-    return getTimezoneByName("UTC"); 
-#else 
-    return getTimezoneByFilename(LOCAL_TIMEZONE); 
-#endif 
-  } 
- 
-  /** 
-   * Get a timezone by name (eg. America/Los_Angeles). 
-   * Results are cached. 
-   */ 
-  const Timezone& getTimezoneByName(const std::string& zone) { 
-    std::string filename(getTimezoneDirectory()); 
-    filename += "/"; 
-    filename += zone; 
-    return getTimezoneByFilename(filename); 
-  } 
- 
-  /** 
-   * Parse a set of bytes as a timezone file as if they came from filename. 
-   */ 
-  std::unique_ptr<Timezone> getTimezone(const std::string& filename, 
-                                        const std::vector<unsigned char>& b){ 
-    return std::unique_ptr<Timezone>(new TimezoneImpl(filename, b)); 
-  } 
- 
-  TimezoneImpl::~TimezoneImpl() { 
-    // PASS 
-  } 
- 
-  void TimezoneImpl::parseTimeVariants(const unsigned char* ptr, 
-                                       uint64_t variantOffset, 
-                                       uint64_t variantCount, 
-                                       uint64_t nameOffset, 
-                                       uint64_t nameCount) { 
-    for(uint64_t variant=0; variant < variantCount; ++variant) { 
-      variants[variant].gmtOffset = 
-        static_cast<int32_t>(decode32(ptr + variantOffset + 6 * variant)); 
-      variants[variant].isDst = ptr[variantOffset + 6 * variant + 4] != 0; 
-      uint64_t nameStart = ptr[variantOffset + 6 * variant + 5]; 
-      if (nameStart >= nameCount) { 
-        std::stringstream buffer; 
-        buffer << "name out of range in variant " << variant 
-               << " - " << nameStart << " >= " << nameCount; 
-        throw TimezoneError(buffer.str()); 
-      } 
-      variants[variant].name = std::string(reinterpret_cast<const char*>(ptr) 
-                                           + nameOffset + nameStart); 
-    } 
-  } 
- 
-  /** 
-   * Parse the zone file to get the bits we need. 
-   * There are two versions of the timezone file: 
-   * 
-   * Version 1(version = 0x00): 
-   *   Magic(version) 
-   *   Header 
-   *   TransitionTimes(4 byte) 
-   *   TransitionRules 
-   *   Rules 
-   *   LeapSeconds(4 byte) 
-   *   IsStd 
-   *   IsGmt 
-   * 
-   * Version2: 
-   *   Version1(0x32) = a version 1 copy of the data for old clients 
-   *   Magic(0x32) 
-   *   Header 
-   *   TransitionTimes(8 byte) 
-   *   TransitionRules 
-   *   Rules 
-   *   LeapSeconds(8 byte) 
-   *   IsStd 
-   *   IsGmt 
-   *   FutureString 
-   */ 
-  void TimezoneImpl::parseZoneFile(const unsigned char *ptr, 
-                                   uint64_t sectionOffset, 
-                                   uint64_t fileLength, 
-                                   const VersionParser& versionParser) { 
-    const uint64_t magicOffset  = sectionOffset + 0; 
-    const uint64_t headerOffset = magicOffset + 20; 
- 
-    // check for validity before we start parsing 
-    if (fileLength < headerOffset + 6 * 4 || 
-        strncmp(reinterpret_cast<const char*>(ptr) + magicOffset, "TZif", 4) 
-          != 0) { 
-      std::stringstream buffer; 
-      buffer << "non-tzfile " << filename; 
-      throw TimezoneError(buffer.str()); 
-    } 
- 
-    const uint64_t isGmtCount = decode32(ptr + headerOffset + 0); 
-    const uint64_t isStdCount = decode32(ptr + headerOffset + 4); 
-    const uint64_t leapCount  = decode32(ptr + headerOffset + 8); 
-    const uint64_t timeCount  = decode32(ptr + headerOffset + 12); 
-    const uint64_t variantCount  = decode32(ptr + headerOffset + 16); 
-    const uint64_t nameCount  = decode32(ptr + headerOffset + 20); 
- 
-    const uint64_t timeOffset = headerOffset + 24; 
-    const uint64_t timeVariantOffset = 
-      timeOffset + versionParser.getTimeSize() * timeCount; 
-    const uint64_t variantOffset = timeVariantOffset + timeCount; 
-    const uint64_t nameOffset = variantOffset + variantCount * 6; 
-    const uint64_t sectionLength = nameOffset + nameCount 
-      + (versionParser.getTimeSize() + 4) * leapCount 
-      + isGmtCount + isStdCount; 
- 
-    if (sectionLength > fileLength) { 
-      std::stringstream buffer; 
-      buffer << "tzfile too short " << filename 
-             << " needs " << sectionLength << " and has " << fileLength; 
-      throw TimezoneError(buffer.str()); 
-    } 
- 
-    // if it is version 2, skip over the old layout and read the new one. 
-    if (sectionOffset == 0 && ptr[magicOffset + 4] != 0) { 
-      parseZoneFile(ptr, sectionLength, fileLength, Version2Parser()); 
-      return; 
-    } 
-    version = versionParser.getVersion(); 
-    variants.resize(variantCount); 
-    transitions.resize(timeCount); 
-    currentVariant.resize(timeCount); 
-    parseTimeVariants(ptr, variantOffset, variantCount, nameOffset, 
-                      nameCount); 
-    bool foundAncient = false; 
-    for(uint64_t t=0; t < timeCount; ++t) { 
-      transitions[t] = 
-        versionParser.parseTime(ptr + timeOffset + 
-                                t * versionParser.getTimeSize()); 
-      currentVariant[t] = ptr[timeVariantOffset + t]; 
-      if (currentVariant[t] >= variantCount) { 
-        std::stringstream buffer; 
-        buffer << "tzfile rule out of range " << filename 
-               << " references rule " << currentVariant[t] 
-               << " of " << variantCount; 
-        throw TimezoneError(buffer.str()); 
-      } 
-      // find the oldest standard time and use that as the ancient value 
-      if (!foundAncient && 
-          !variants[currentVariant[t]].isDst) { 
-        foundAncient = true; 
-        ancientVariant = currentVariant[t]; 
-      } 
-    } 
-    if (!foundAncient) { 
-      ancientVariant = 0; 
-    } 
-    futureRule = parseFutureRule(versionParser.parseFutureString 
-                                   (ptr, sectionLength, 
-                                    fileLength - sectionLength)); 
- 
-    // find the lower bound for applying the future rule 
-    if (futureRule->isDefined()) { 
-      if (timeCount > 0) { 
-        lastTransition = transitions[timeCount - 1]; 
-      } else { 
-        lastTransition = INT64_MIN; 
-      } 
-    } else { 
-      lastTransition = INT64_MAX; 
-    } 
-  } 
- 
-  const TimezoneVariant& TimezoneImpl::getVariant(int64_t clk) const { 
-    // if it is after the last explicit entry in the table, 
-    // use the future rule to get an answer 
-    if (clk > lastTransition) { 
-      return futureRule->getVariant(clk); 
-    } else { 
-      int64_t transition = binarySearch(transitions, clk); 
-      uint64_t idx; 
-      if (transition < 0) { 
-        idx = ancientVariant; 
-      } else { 
-        idx = currentVariant[static_cast<size_t>(transition)]; 
-      } 
-      return variants[idx]; 
-    } 
-  } 
- 
-  void TimezoneImpl::print(std::ostream& out) const { 
-    out << "Timezone file: " << filename << "\n"; 
-    out << "  Version: " << version << "\n"; 
-    futureRule->print(out); 
-    for(uint64_t r=0; r < variants.size(); ++r) { 
-      out <<  "  Variant " << r << ": " 
-          << variants[r].toString() << "\n"; 
-    } 
-    for(uint64_t t=0; t < transitions.size(); ++t) { 
-      tm timeStruct; 
-      tm* result = nullptr; 
-      char buffer[25]; 
-      if (sizeof(time_t) >= 8) { 
-        time_t val = transitions[t]; 
-        result = gmtime_r(&val, &timeStruct); 
-        if (result) { 
-          strftime(buffer, sizeof(buffer), "%F %H:%M:%S", &timeStruct); 
-        } 
-      } 
-      std::cout << "  Transition: " << (result == nullptr ? "null" : buffer) 
-                << " (" << transitions[t] << ") -> " 
-                << variants[currentVariant[t]].name 
-                << "\n"; 
-    } 
-  } 
- 
-  TimezoneError::TimezoneError(const std::string& what 
-                               ): std::runtime_error(what) { 
-    // PASS 
-  } 
- 
-  TimezoneError::TimezoneError(const TimezoneError& other 
-                               ): std::runtime_error(other) { 
-    // PASS 
-  } 
- 
-  TimezoneError::~TimezoneError() ORC_NOEXCEPT { 
-    // PASS 
-  } 
- 
-} 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/OrcFile.hh"
+#include "Timezone.hh"
+
+#include <errno.h>
+#include <map>
+#include <sstream>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+namespace orc {
+
+  // default location of the timezone files
+  static const char DEFAULT_TZDIR[] = "/usr/share/zoneinfo";
+
+  // location of a symlink to the local timezone
+  static const char LOCAL_TIMEZONE[] = "/etc/localtime";
+
+  enum TransitionKind {
+    TRANSITION_JULIAN,
+    TRANSITION_DAY,
+    TRANSITION_MONTH
+  };
+
+  static const int64_t MONTHS_PER_YEAR = 12;
+  /**
+   * The number of days in each month in non-leap and leap years.
+   */
+  static const int64_t DAYS_PER_MONTH[2][MONTHS_PER_YEAR] =
+     {{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
+      {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}};
+  static const int64_t DAYS_PER_WEEK = 7;
+
+  // Leap years and day of the week repeat every 400 years, which makes it
+  // a good cycle length.
+  static const int64_t SECONDS_PER_400_YEARS =
+    SECONDS_PER_DAY * (365 * (300 + 3) + 366 * (100 - 3));
+
+  /**
+   * Is the given year a leap year?
+   */
+  bool isLeap(int64_t year) {
+    return (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0));
+  }
+
+  /**
+   * Find the position that is the closest and less than or equal to the
+   * target.
+   * @return -1 if the target < array[0] or array is empty or
+   *          i if array[i] <= target and (i == n or array[i] < array[i+1])
+   */
+  int64_t binarySearch(const std::vector<int64_t> &array, int64_t target) {
+    uint64_t size = array.size();
+    if (size == 0) {
+      return -1;
+    }
+    uint64_t min = 0;
+    uint64_t max = size - 1;
+    uint64_t mid = (min + max) / 2;
+    while ((array[mid] != target) && (min < max)) {
+      if (array[mid] < target) {
+        min = mid + 1;
+      } else if (mid == 0) {
+        max = 0;
+      } else {
+        max = mid - 1;
+      }
+      mid = (min + max) / 2;
+    }
+    if (target < array[mid]) {
+      return static_cast<int64_t>(mid) - 1;
+    } else {
+      return static_cast<int64_t>(mid);
+    }
+  }
+
+  struct Transition {
+    TransitionKind kind;
+    int64_t day;
+    int64_t week;
+    int64_t month;
+    int64_t time;
+
+    std::string toString() const {
+      std::stringstream buffer;
+      switch (kind) {
+      case TRANSITION_JULIAN:
+        buffer << "julian " << day;
+        break;
+      case TRANSITION_DAY:
+        buffer << "day " << day;
+        break;
+      case TRANSITION_MONTH:
+        buffer << "month " << month << " week " << week << " day " << day;
+        break;
+      }
+      buffer << " at " << (time / (60 * 60)) << ":" << ((time / 60) % 60)
+             << ":" << (time % 60);
+      return buffer.str();
+    }
+
+    /**
+     * Get the transition time for the given year.
+     * @param year the year
+     * @return the number of seconds past local Jan 1 00:00:00 that the
+     *    transition happens.
+     */
+    int64_t getTime(int64_t year) const {
+      int64_t result = time;
+      switch (kind) {
+      case TRANSITION_JULIAN:
+        result += SECONDS_PER_DAY * day;
+        if (day > 60 && isLeap(year)) {
+          result += SECONDS_PER_DAY;
+        }
+        break;
+      case TRANSITION_DAY:
+        result += SECONDS_PER_DAY * day;
+        break;
+      case TRANSITION_MONTH: {
+        bool inLeap = isLeap(year);
+        int64_t adjustedMonth = (month + 9) % 12 + 1;
+        int64_t adjustedYear = (month <= 2) ? (year - 1) : year;
+        int64_t adjustedCentury = adjustedYear / 100;
+        int64_t adjustedRemainder = adjustedYear % 100;
+
+        // day of the week of the first day of month
+        int64_t dayOfWeek = ((26 * adjustedMonth - 2) / 10 +
+                             1 + adjustedRemainder + adjustedRemainder / 4 +
+                             adjustedCentury / 4 - 2 * adjustedCentury) % 7;
+        if (dayOfWeek < 0) {
+          dayOfWeek += DAYS_PER_WEEK;
+        }
+
+        int64_t d = day - dayOfWeek;
+        if (d < 0) {
+          d += DAYS_PER_WEEK;
+        }
+        for (int w = 1; w < week; ++w) {
+          if (d + DAYS_PER_WEEK >= DAYS_PER_MONTH[inLeap][month - 1]) {
+            break;
+          }
+          d += DAYS_PER_WEEK;
+        }
+        result += d * SECONDS_PER_DAY;
+
+        // Add in the time for the month
+        for(int m=0; m < month - 1; ++m) {
+          result += DAYS_PER_MONTH[inLeap][m] * SECONDS_PER_DAY;
+        }
+        break;
+      }
+      }
+      return result;
+    }
+  };
+
+  /**
+   * The current rule for finding timezone variants arbitrarily far in
+   * the future.  They are based on a string representation that
+   * specifies the standard name and offset. For timezones with
+   * daylight savings, the string specifies the daylight variant name
+   * and offset and the rules for switching between them.
+   *
+   * rule = <standard name><standard offset><daylight>?
+   * name = string with no numbers or '+', '-', or ','
+   * offset = [-+]?hh(:mm(:ss)?)?
+   * daylight = <name><offset>,<start day>(/<offset>)?,<end day>(/<offset>)?
+   * day = J<day without 2/29>|<day with 2/29>|M<month>.<week>.<day of week>
+   */
+  class FutureRuleImpl: public FutureRule {
+    std::string ruleString;
+    TimezoneVariant standard;
+    bool hasDst;
+    TimezoneVariant dst;
+    Transition start;
+    Transition end;
+
+    // expanded time_t offsets of transitions
+    std::vector<int64_t> offsets;
+
+    // Is the epoch (1 Jan 1970 00:00) in standard time?
+    // This code assumes that the transition dates fall in the same order
+    // each year. Hopefully no timezone regions decide to move across the
+    // equator, which is about what it would take.
+    bool startInStd;
+
+    void computeOffsets() {
+      if (!hasDst) {
+        startInStd = true;
+        offsets.resize(1);
+      } else {
+        // Insert a transition for the epoch and two per a year for the next
+        // 400 years. We assume that the all even positions are in standard
+        // time if and only if startInStd and the odd ones are the reverse.
+        offsets.resize(400 * 2 + 1);
+        startInStd = start.getTime(1970) < end.getTime(1970);
+        int64_t base = 0;
+        for(int64_t year = 1970; year < 1970 + 400; ++year) {
+          if (startInStd) {
+            offsets[static_cast<uint64_t>(year - 1970) * 2 + 1] =
+              base + start.getTime(year) - standard.gmtOffset;
+            offsets[static_cast<uint64_t>(year - 1970) * 2 + 2] =
+              base + end.getTime(year) - dst.gmtOffset;
+          } else {
+            offsets[static_cast<uint64_t>(year - 1970) * 2 + 1] =
+              base + end.getTime(year) - dst.gmtOffset;
+            offsets[static_cast<uint64_t>(year - 1970) * 2 + 2] =
+              base + start.getTime(year) - standard.gmtOffset;
+          }
+          base += (isLeap(year) ? 366 : 365) * SECONDS_PER_DAY;
+        }
+      }
+      offsets[0] = 0;
+    }
+
+  public:
+    virtual ~FutureRuleImpl() override;
+    bool isDefined() const override;
+    const TimezoneVariant& getVariant(int64_t clk) const override;
+    void print(std::ostream& out) const override;
+
+    friend class FutureRuleParser;
+  };
+
+  FutureRule::~FutureRule() {
+    // PASS
+  }
+
+  FutureRuleImpl::~FutureRuleImpl() {
+    // PASS
+  }
+
+  bool FutureRuleImpl::isDefined() const {
+    return ruleString.size() > 0;
+  }
+
+  const TimezoneVariant& FutureRuleImpl::getVariant(int64_t clk) const {
+    if (!hasDst) {
+      return standard;
+    } else {
+      int64_t adjusted = clk % SECONDS_PER_400_YEARS;
+      if (adjusted < 0) {
+        adjusted += SECONDS_PER_400_YEARS;
+      }
+      int64_t idx = binarySearch(offsets, adjusted);
+      if (startInStd == (idx % 2 == 0)) {
+        return standard;
+      } else {
+        return dst;
+      }
+    }
+  }
+
+  void FutureRuleImpl::print(std::ostream& out) const {
+    if (isDefined()) {
+      out << "  Future rule: " << ruleString << "\n";
+      out << "  standard " << standard.toString() << "\n";
+      if (hasDst) {
+        out << "  dst " << dst.toString() << "\n";
+        out << "  start " << start.toString() << "\n";
+        out << "  end " << end.toString() << "\n";
+      }
+    }
+  }
+
+  /**
+   * A parser for the future rule strings.
+   */
+  class FutureRuleParser {
+  public:
+    FutureRuleParser(const std::string& str,
+                     FutureRuleImpl* rule
+                     ): ruleString(str),
+                        length(str.size()),
+                        position(0),
+                        output(*rule) {
+      output.ruleString = str;
+      if (position != length) {
+        parseName(output.standard.name);
+        output.standard.gmtOffset = -parseOffset();
+        output.standard.isDst = false;
+        output.hasDst = position < length;
+        if (output.hasDst) {
+          parseName(output.dst.name);
+          output.dst.isDst = true;
+          if (ruleString[position] != ',') {
+            output.dst.gmtOffset = -parseOffset();
+          } else {
+            output.dst.gmtOffset = output.standard.gmtOffset + 60 * 60;
+          }
+          parseTransition(output.start);
+          parseTransition(output.end);
+        }
+        if (position != length) {
+          throwError("Extra text");
+        }
+        output.computeOffsets();
+      }
+    }
+
+  private:
+
+    const std::string& ruleString;
+    size_t length;
+    size_t position;
+    FutureRuleImpl &output;
+
+    void throwError(const char *msg) {
+      std::stringstream buffer;
+      buffer << msg << " at " << position << " in '" << ruleString << "'";
+      throw TimezoneError(buffer.str());
+    }
+
+    /**
+     * Parse the names of the form:
+     *    ([^-+0-9,]+|<[^>]+>)
+     * and set the output string.
+     */
+    void parseName(std::string& result) {
+      if (position == length) {
+        throwError("name required");
+      }
+      size_t start = position;
+      if (ruleString[position] == '<') {
+        while (position < length && ruleString[position] != '>') {
+          position += 1;
+        }
+        if (position == length) {
+          throwError("missing close '>'");
+        }
+        position +=1;
+      } else {
+        while (position < length) {
+          char ch = ruleString[position];
+          if (isdigit(ch) || ch == '-' || ch == '+' || ch == ',') {
+            break;
+          }
+          position += 1;
+        }
+      }
+      if (position == start) {
+        throwError("empty string not allowed");
+      }
+      result = ruleString.substr(start, position - start);
+    }
+
+    /**
+     * Parse an integer of the form [0-9]+ and return it.
+     */
+    int64_t parseNumber() {
+      if (position >= length) {
+        throwError("missing number");
+      }
+      int64_t result = 0;
+      while (position < length) {
+        char ch = ruleString[position];
+        if (isdigit(ch)) {
+          result = result * 10 + (ch - '0');
+          position += 1;
+        } else {
+          break;
+        }
+      }
+      return result;
+    }
+
+    /**
+     * Parse the offsets of the form:
+     *    [-+]?[0-9]+(:[0-9]+(:[0-9]+)?)?
+     * and convert it into a number of seconds.
+     */
+    int64_t parseOffset() {
+      int64_t scale = 3600;
+      bool isNegative = false;
+      if (position < length) {
+        char ch = ruleString[position];
+        isNegative = ch == '-';
+        if (ch == '-' || ch == '+') {
+          position += 1;
+        }
+      }
+      int64_t result = parseNumber() * scale;
+      while (position < length && scale > 1 && ruleString[position] == ':') {
+        scale /= 60;
+        position += 1;
+        result += parseNumber() * scale;
+      }
+      if (isNegative) {
+        result = -result;
+      }
+      return result;
+    }
+
+    /**
+     * Parse a transition of the following form:
+     *   ,(J<number>|<number>|M<number>.<number>.<number>)(/<offset>)?
+     */
+    void parseTransition(Transition& transition) {
+      if (length - position < 2 || ruleString[position] != ',') {
+        throwError("missing transition");
+      }
+      position += 1;
+      char ch = ruleString[position];
+      if (ch == 'J') {
+        transition.kind = TRANSITION_JULIAN;
+        position += 1;
+        transition.day = parseNumber();
+      } else if (ch == 'M') {
+        transition.kind = TRANSITION_MONTH;
+        position += 1;
+        transition.month = parseNumber();
+        if (position == length || ruleString[position] != '.') {
+          throwError("missing first .");
+        }
+        position += 1;
+        transition.week = parseNumber();
+        if (position == length || ruleString[position] != '.') {
+          throwError("missing second .");
+        }
+        position += 1;
+        transition.day = parseNumber();
+      } else {
+        transition.kind = TRANSITION_DAY;
+        transition.day = parseNumber();
+      }
+      if (position < length && ruleString[position] == '/') {
+        position += 1;
+        transition.time = parseOffset();
+      } else {
+        transition.time = 2 * 60 * 60;
+      }
+    }
+  };
+
+  /**
+   * Parse the POSIX TZ string.
+   */
+  std::shared_ptr<FutureRule> parseFutureRule(const std::string& ruleString) {
+    std::shared_ptr<FutureRule> result(new FutureRuleImpl());
+    FutureRuleParser parser(ruleString,
+                            dynamic_cast<FutureRuleImpl*>(result.get()));
+    return result;
+  }
+
+  std::string TimezoneVariant::toString() const {
+    std::stringstream buffer;
+    buffer << name << " " << gmtOffset;
+    if (isDst) {
+      buffer << " (dst)";
+    }
+    return buffer.str();
+  }
+
+  /**
+   * An abstraction of the differences between versions.
+   */
+  class VersionParser {
+  public:
+    virtual ~VersionParser();
+
+    /**
+     * Get the version number.
+     */
+    virtual uint64_t getVersion() const = 0;
+
+    /**
+     * Get the number of bytes
+     */
+    virtual uint64_t getTimeSize() const = 0;
+
+    /**
+     * Parse the time at the given location.
+     */
+    virtual int64_t parseTime(const unsigned char* ptr) const = 0;
+
+    /**
+     * Parse the future string
+     */
+    virtual std::string parseFutureString(const unsigned char *ptr,
+                                          uint64_t offset,
+                                          uint64_t length) const = 0;
+  };
+
+  VersionParser::~VersionParser() {
+    // PASS
+  }
+
+  static uint32_t decode32(const unsigned char* ptr) {
+    return static_cast<uint32_t>(ptr[0] << 24) |
+      static_cast<uint32_t>(ptr[1] << 16) |
+      static_cast<uint32_t>(ptr[2] << 8) |
+      static_cast<uint32_t>(ptr[3]);
+  }
+
+  class Version1Parser: public VersionParser {
+  public:
+    virtual ~Version1Parser() override;
+
+    virtual uint64_t getVersion() const override {
+      return 1;
+    }
+
+    /**
+     * Get the number of bytes
+     */
+    virtual uint64_t getTimeSize() const override {
+      return 4;
+    }
+
+    /**
+     * Parse the time at the given location.
+     */
+    virtual int64_t parseTime(const unsigned char* ptr) const override {
+      // sign extend from 32 bits
+      return static_cast<int32_t>(decode32(ptr));
+    }
+
+    virtual std::string parseFutureString(const unsigned char *,
+                                          uint64_t,
+                                          uint64_t) const override {
+      return "";
+    }
+  };
+
+  Version1Parser::~Version1Parser() {
+    // PASS
+  }
+
+  class Version2Parser: public VersionParser {
+  public:
+    virtual ~Version2Parser() override;
+
+    virtual uint64_t getVersion() const override {
+      return 2;
+    }
+
+    /**
+     * Get the number of bytes
+     */
+    virtual uint64_t getTimeSize() const override {
+      return 8;
+    }
+
+    /**
+     * Parse the time at the given location.
+     */
+    virtual int64_t parseTime(const unsigned char* ptr) const override {
+      return static_cast<int64_t>(decode32(ptr)) << 32 | decode32(ptr + 4);
+    }
+
+    virtual std::string parseFutureString(const unsigned char *ptr,
+                                          uint64_t offset,
+                                          uint64_t length) const override {
+      return std::string(reinterpret_cast<const char*>(ptr) + offset + 1,
+                         length - 2);
+    }
+  };
+
+  Version2Parser::~Version2Parser() {
+    // PASS
+  }
+
+  class TimezoneImpl: public Timezone {
+  public:
+    TimezoneImpl(const std::string& name,
+                 const std::vector<unsigned char> bytes);
+    virtual ~TimezoneImpl() override;
+
+    /**
+     * Get the variant for the given time (time_t).
+     */
+    const TimezoneVariant& getVariant(int64_t clk) const override;
+
+    void print(std::ostream&) const override;
+
+    uint64_t getVersion() const override {
+      return version;
+    }
+
+    int64_t getEpoch() const override {
+      return epoch;
+    }
+
+    int64_t convertToUTC(int64_t clk) const override {
+      return clk + getVariant(clk).gmtOffset;
+    }
+
+  private:
+    void parseTimeVariants(const unsigned char* ptr,
+                           uint64_t variantOffset,
+                           uint64_t variantCount,
+                           uint64_t nameOffset,
+                           uint64_t nameCount);
+    void parseZoneFile(const unsigned char* ptr,
+                       uint64_t sectionOffset,
+                       uint64_t fileLength,
+                       const VersionParser& version);
+    // filename
+    std::string filename;
+
+    // the version of the file
+    uint64_t version;
+
+    // the list of variants for this timezone
+    std::vector<TimezoneVariant> variants;
+
+    // the list of the times where the local rules change
+    std::vector<int64_t> transitions;
+
+    // the variant that starts at this transition.
+    std::vector<uint64_t> currentVariant;
+
+    // the variant before the first transition
+    uint64_t ancientVariant;
+
+    // the rule for future times
+    std::shared_ptr<FutureRule> futureRule;
+
+    // the last explicit transition after which we use the future rule
+    int64_t lastTransition;
+
+    // The ORC epoch time in this timezone.
+    int64_t epoch;
+  };
+
+  DIAGNOSTIC_PUSH
+  #ifdef __clang__
+    DIAGNOSTIC_IGNORE("-Wglobal-constructors")
+    DIAGNOSTIC_IGNORE("-Wexit-time-destructors")
+  #endif
+  static std::mutex timezone_mutex;
+  static std::map<std::string, std::shared_ptr<Timezone> > timezoneCache;
+  DIAGNOSTIC_POP
+
+  Timezone::~Timezone() {
+    // PASS
+  }
+
+  TimezoneImpl::TimezoneImpl(const std::string& _filename,
+                             const std::vector<unsigned char> buffer
+                             ): filename(_filename) {
+    parseZoneFile(&buffer[0], 0, buffer.size(), Version1Parser());
+    // Build the literal for the ORC epoch
+    // 2015 Jan 1 00:00:00
+    tm epochStruct;
+    epochStruct.tm_sec = 0;
+    epochStruct.tm_min = 0;
+    epochStruct.tm_hour = 0;
+    epochStruct.tm_mday = 1;
+    epochStruct.tm_mon = 0;
+    epochStruct.tm_year = 2015 - 1900;
+    epochStruct.tm_isdst = 0;
+    time_t utcEpoch = timegm(&epochStruct);
+    epoch = utcEpoch - getVariant(utcEpoch).gmtOffset;
+  }
+
+  const char* getTimezoneDirectory() {
+    const char *dir = getenv("TZDIR");
+    if (!dir) {
+      dir = DEFAULT_TZDIR;
+    }
+    return dir;
+  }
+
+  /**
+   * Get a timezone by absolute filename.
+   * Results are cached.
+   */
+  const Timezone& getTimezoneByFilename(const std::string& filename) {
+    // ORC-110
+    std::lock_guard<std::mutex> timezone_lock(timezone_mutex);
+    std::map<std::string, std::shared_ptr<Timezone> >::iterator itr =
+      timezoneCache.find(filename);
+    if (itr != timezoneCache.end()) {
+      return *(itr->second).get();
+    }
+    try {
+      ORC_UNIQUE_PTR<InputStream> file = readFile(filename);
+      size_t size = static_cast<size_t>(file->getLength());
+      std::vector<unsigned char> buffer(size);
+      file->read(&buffer[0], size, 0);
+      timezoneCache[filename] = std::shared_ptr<Timezone>(new TimezoneImpl(filename, buffer));
+    } catch(ParseError& err) {
+      throw TimezoneError(err.what());
+    }
+    return *timezoneCache[filename].get();
+  }
+
+  /**
+   * Get the local timezone.
+   */
+  const Timezone& getLocalTimezone() {
+#ifdef _MSC_VER
+    return getTimezoneByName("UTC");
+#else
+    return getTimezoneByFilename(LOCAL_TIMEZONE);
+#endif
+  }
+
+  /**
+   * Get a timezone by name (eg. America/Los_Angeles).
+   * Results are cached.
+   */
+  const Timezone& getTimezoneByName(const std::string& zone) {
+    std::string filename(getTimezoneDirectory());
+    filename += "/";
+    filename += zone;
+    return getTimezoneByFilename(filename);
+  }
+
+  /**
+   * Parse a set of bytes as a timezone file as if they came from filename.
+   */
+  std::unique_ptr<Timezone> getTimezone(const std::string& filename,
+                                        const std::vector<unsigned char>& b){
+    return std::unique_ptr<Timezone>(new TimezoneImpl(filename, b));
+  }
+
+  TimezoneImpl::~TimezoneImpl() {
+    // PASS
+  }
+
+  void TimezoneImpl::parseTimeVariants(const unsigned char* ptr,
+                                       uint64_t variantOffset,
+                                       uint64_t variantCount,
+                                       uint64_t nameOffset,
+                                       uint64_t nameCount) {
+    for(uint64_t variant=0; variant < variantCount; ++variant) {
+      variants[variant].gmtOffset =
+        static_cast<int32_t>(decode32(ptr + variantOffset + 6 * variant));
+      variants[variant].isDst = ptr[variantOffset + 6 * variant + 4] != 0;
+      uint64_t nameStart = ptr[variantOffset + 6 * variant + 5];
+      if (nameStart >= nameCount) {
+        std::stringstream buffer;
+        buffer << "name out of range in variant " << variant
+               << " - " << nameStart << " >= " << nameCount;
+        throw TimezoneError(buffer.str());
+      }
+      variants[variant].name = std::string(reinterpret_cast<const char*>(ptr)
+                                           + nameOffset + nameStart);
+    }
+  }
+
+  /**
+   * Parse the zone file to get the bits we need.
+   * There are two versions of the timezone file:
+   *
+   * Version 1(version = 0x00):
+   *   Magic(version)
+   *   Header
+   *   TransitionTimes(4 byte)
+   *   TransitionRules
+   *   Rules
+   *   LeapSeconds(4 byte)
+   *   IsStd
+   *   IsGmt
+   *
+   * Version2:
+   *   Version1(0x32) = a version 1 copy of the data for old clients
+   *   Magic(0x32)
+   *   Header
+   *   TransitionTimes(8 byte)
+   *   TransitionRules
+   *   Rules
+   *   LeapSeconds(8 byte)
+   *   IsStd
+   *   IsGmt
+   *   FutureString
+   */
+  void TimezoneImpl::parseZoneFile(const unsigned char *ptr,
+                                   uint64_t sectionOffset,
+                                   uint64_t fileLength,
+                                   const VersionParser& versionParser) {
+    const uint64_t magicOffset  = sectionOffset + 0;
+    const uint64_t headerOffset = magicOffset + 20;
+
+    // check for validity before we start parsing
+    if (fileLength < headerOffset + 6 * 4 ||
+        strncmp(reinterpret_cast<const char*>(ptr) + magicOffset, "TZif", 4)
+          != 0) {
+      std::stringstream buffer;
+      buffer << "non-tzfile " << filename;
+      throw TimezoneError(buffer.str());
+    }
+
+    const uint64_t isGmtCount = decode32(ptr + headerOffset + 0);
+    const uint64_t isStdCount = decode32(ptr + headerOffset + 4);
+    const uint64_t leapCount  = decode32(ptr + headerOffset + 8);
+    const uint64_t timeCount  = decode32(ptr + headerOffset + 12);
+    const uint64_t variantCount  = decode32(ptr + headerOffset + 16);
+    const uint64_t nameCount  = decode32(ptr + headerOffset + 20);
+
+    const uint64_t timeOffset = headerOffset + 24;
+    const uint64_t timeVariantOffset =
+      timeOffset + versionParser.getTimeSize() * timeCount;
+    const uint64_t variantOffset = timeVariantOffset + timeCount;
+    const uint64_t nameOffset = variantOffset + variantCount * 6;
+    const uint64_t sectionLength = nameOffset + nameCount
+      + (versionParser.getTimeSize() + 4) * leapCount
+      + isGmtCount + isStdCount;
+
+    if (sectionLength > fileLength) {
+      std::stringstream buffer;
+      buffer << "tzfile too short " << filename
+             << " needs " << sectionLength << " and has " << fileLength;
+      throw TimezoneError(buffer.str());
+    }
+
+    // if it is version 2, skip over the old layout and read the new one.
+    if (sectionOffset == 0 && ptr[magicOffset + 4] != 0) {
+      parseZoneFile(ptr, sectionLength, fileLength, Version2Parser());
+      return;
+    }
+    version = versionParser.getVersion();
+    variants.resize(variantCount);
+    transitions.resize(timeCount);
+    currentVariant.resize(timeCount);
+    parseTimeVariants(ptr, variantOffset, variantCount, nameOffset,
+                      nameCount);
+    bool foundAncient = false;
+    for(uint64_t t=0; t < timeCount; ++t) {
+      transitions[t] =
+        versionParser.parseTime(ptr + timeOffset +
+                                t * versionParser.getTimeSize());
+      currentVariant[t] = ptr[timeVariantOffset + t];
+      if (currentVariant[t] >= variantCount) {
+        std::stringstream buffer;
+        buffer << "tzfile rule out of range " << filename
+               << " references rule " << currentVariant[t]
+               << " of " << variantCount;
+        throw TimezoneError(buffer.str());
+      }
+      // find the oldest standard time and use that as the ancient value
+      if (!foundAncient &&
+          !variants[currentVariant[t]].isDst) {
+        foundAncient = true;
+        ancientVariant = currentVariant[t];
+      }
+    }
+    if (!foundAncient) {
+      ancientVariant = 0;
+    }
+    futureRule = parseFutureRule(versionParser.parseFutureString
+                                   (ptr, sectionLength,
+                                    fileLength - sectionLength));
+
+    // find the lower bound for applying the future rule
+    if (futureRule->isDefined()) {
+      if (timeCount > 0) {
+        lastTransition = transitions[timeCount - 1];
+      } else {
+        lastTransition = INT64_MIN;
+      }
+    } else {
+      lastTransition = INT64_MAX;
+    }
+  }
+
+  const TimezoneVariant& TimezoneImpl::getVariant(int64_t clk) const {
+    // if it is after the last explicit entry in the table,
+    // use the future rule to get an answer
+    if (clk > lastTransition) {
+      return futureRule->getVariant(clk);
+    } else {
+      int64_t transition = binarySearch(transitions, clk);
+      uint64_t idx;
+      if (transition < 0) {
+        idx = ancientVariant;
+      } else {
+        idx = currentVariant[static_cast<size_t>(transition)];
+      }
+      return variants[idx];
+    }
+  }
+
+  void TimezoneImpl::print(std::ostream& out) const {
+    out << "Timezone file: " << filename << "\n";
+    out << "  Version: " << version << "\n";
+    futureRule->print(out);
+    for(uint64_t r=0; r < variants.size(); ++r) {
+      out <<  "  Variant " << r << ": "
+          << variants[r].toString() << "\n";
+    }
+    for(uint64_t t=0; t < transitions.size(); ++t) {
+      tm timeStruct;
+      tm* result = nullptr;
+      char buffer[25];
+      if (sizeof(time_t) >= 8) {
+        time_t val = transitions[t];
+        result = gmtime_r(&val, &timeStruct);
+        if (result) {
+          strftime(buffer, sizeof(buffer), "%F %H:%M:%S", &timeStruct);
+        }
+      }
+      std::cout << "  Transition: " << (result == nullptr ? "null" : buffer)
+                << " (" << transitions[t] << ") -> "
+                << variants[currentVariant[t]].name
+                << "\n";
+    }
+  }
+
+  TimezoneError::TimezoneError(const std::string& what
+                               ): std::runtime_error(what) {
+    // PASS
+  }
+
+  TimezoneError::TimezoneError(const TimezoneError& other
+                               ): std::runtime_error(other) {
+    // PASS
+  }
+
+  TimezoneError::~TimezoneError() ORC_NOEXCEPT {
+    // PASS
+  }
+
+}
diff --git a/contrib/libs/apache/orc/c++/src/Timezone.hh b/contrib/libs/apache/orc/c++/src/Timezone.hh
index 6bcb6586d0..136b7a18b7 100644
--- a/contrib/libs/apache/orc/c++/src/Timezone.hh
+++ b/contrib/libs/apache/orc/c++/src/Timezone.hh
@@ -1,130 +1,130 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef TIMEZONE_HH 
-#define TIMEZONE_HH 
- 
-// This file is for timezone routines. 
- 
-#include "Adaptor.hh" 
- 
-#include <memory> 
-#include <stdexcept> 
-#include <stdint.h> 
-#include <string> 
-#include <vector> 
- 
-namespace orc { 
- 
-  static const int64_t SECONDS_PER_HOUR = 60 * 60; 
-  static const int64_t SECONDS_PER_DAY = SECONDS_PER_HOUR * 24; 
- 
-  /** 
-   * A variant  (eg. PST or PDT) of a timezone (eg. America/Los_Angeles). 
-   */ 
-  struct TimezoneVariant { 
-    int64_t gmtOffset; 
-    bool isDst; 
-    std::string name; 
- 
-    std::string toString() const; 
-  }; 
- 
-  /** 
-   * A region that shares the same legal rules for wall clock time and 
-   * day light savings transitions. They are typically named for the largest 
-   * city in the region (eg. America/Los_Angeles or America/Mexico_City). 
-   */ 
-  class Timezone { 
-  public: 
-    virtual ~Timezone(); 
- 
-    /** 
-     * Get the variant for the given time (time_t). 
-     */ 
-    virtual const TimezoneVariant& getVariant(int64_t clk) const = 0; 
- 
-    /** 
-     * Get the number of seconds between the ORC epoch in this timezone 
-     * and Unix epoch. 
-     * ORC epoch is 1 Jan 2015 00:00:00 local. 
-     * Unix epoch is 1 Jan 1970 00:00:00 UTC. 
-     */ 
-    virtual int64_t getEpoch() const = 0; 
- 
-    /** 
-     * Print the timezone to the stream. 
-     */ 
-    virtual void print(std::ostream&) const = 0; 
- 
-    /** 
-     * Get the version of the zone file. 
-     */ 
-    virtual uint64_t getVersion() const =0; 
- 
-    /** 
-     * Convert wall clock time of current timezone to UTC timezone 
-     */ 
-    virtual int64_t convertToUTC(int64_t clk) const = 0; 
-  }; 
- 
-  /** 
-   * Get the local timezone. 
-   * Results are cached. 
-   */ 
-  const Timezone& getLocalTimezone(); 
- 
-  /** 
-   * Get a timezone by name (eg. America/Los_Angeles). 
-   * Results are cached. 
-   */ 
-  const Timezone& getTimezoneByName(const std::string& zone); 
- 
-  /** 
-   * Parse a set of bytes as a timezone file as if they came from filename. 
-   */ 
-  std::unique_ptr<Timezone> getTimezone(const std::string& filename, 
-                                        const std::vector<unsigned char>& b); 
- 
-  class TimezoneError: public std::runtime_error { 
-  public: 
-    TimezoneError(const std::string& what); 
-    TimezoneError(const TimezoneError&); 
-    virtual ~TimezoneError() ORC_NOEXCEPT; 
-  }; 
- 
-  /** 
-   * Represents the parsed POSIX timezone rule strings that are used to 
-   * describe the future transitions, because they can go arbitrarily far into 
-   * the future. 
-   */ 
-  class FutureRule { 
-  public: 
-    virtual ~FutureRule(); 
-    virtual bool isDefined() const = 0; 
-    virtual const TimezoneVariant& getVariant(int64_t clk) const = 0; 
-    virtual void print(std::ostream& out) const = 0; 
-  }; 
- 
-  /** 
-   * Parse the POSIX TZ string. 
-   */ 
-  std::shared_ptr<FutureRule> parseFutureRule(const std::string& ruleString); 
-} 
- 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TIMEZONE_HH
+#define TIMEZONE_HH
+
+// This file is for timezone routines.
+
+#include "Adaptor.hh"
+
+#include <memory>
+#include <stdexcept>
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+namespace orc {
+
+  static const int64_t SECONDS_PER_HOUR = 60 * 60;
+  static const int64_t SECONDS_PER_DAY = SECONDS_PER_HOUR * 24;
+
+  /**
+   * A variant  (eg. PST or PDT) of a timezone (eg. America/Los_Angeles).
+   */
+  struct TimezoneVariant {
+    int64_t gmtOffset;
+    bool isDst;
+    std::string name;
+
+    std::string toString() const;
+  };
+
+  /**
+   * A region that shares the same legal rules for wall clock time and
+   * day light savings transitions. They are typically named for the largest
+   * city in the region (eg. America/Los_Angeles or America/Mexico_City).
+   */
+  class Timezone {
+  public:
+    virtual ~Timezone();
+
+    /**
+     * Get the variant for the given time (time_t).
+     */
+    virtual const TimezoneVariant& getVariant(int64_t clk) const = 0;
+
+    /**
+     * Get the number of seconds between the ORC epoch in this timezone
+     * and Unix epoch.
+     * ORC epoch is 1 Jan 2015 00:00:00 local.
+     * Unix epoch is 1 Jan 1970 00:00:00 UTC.
+     */
+    virtual int64_t getEpoch() const = 0;
+
+    /**
+     * Print the timezone to the stream.
+     */
+    virtual void print(std::ostream&) const = 0;
+
+    /**
+     * Get the version of the zone file.
+     */
+    virtual uint64_t getVersion() const =0;
+
+    /**
+     * Convert wall clock time of current timezone to UTC timezone
+     */
+    virtual int64_t convertToUTC(int64_t clk) const = 0;
+  };
+
+  /**
+   * Get the local timezone.
+   * Results are cached.
+   */
+  const Timezone& getLocalTimezone();
+
+  /**
+   * Get a timezone by name (eg. America/Los_Angeles).
+   * Results are cached.
+   */
+  const Timezone& getTimezoneByName(const std::string& zone);
+
+  /**
+   * Parse a set of bytes as a timezone file as if they came from filename.
+   */
+  std::unique_ptr<Timezone> getTimezone(const std::string& filename,
+                                        const std::vector<unsigned char>& b);
+
+  class TimezoneError: public std::runtime_error {
+  public:
+    TimezoneError(const std::string& what);
+    TimezoneError(const TimezoneError&);
+    virtual ~TimezoneError() ORC_NOEXCEPT;
+  };
+
+  /**
+   * Represents the parsed POSIX timezone rule strings that are used to
+   * describe the future transitions, because they can go arbitrarily far into
+   * the future.
+   */
+  class FutureRule {
+  public:
+    virtual ~FutureRule();
+    virtual bool isDefined() const = 0;
+    virtual const TimezoneVariant& getVariant(int64_t clk) const = 0;
+    virtual void print(std::ostream& out) const = 0;
+  };
+
+  /**
+   * Parse the POSIX TZ string.
+   */
+  std::shared_ptr<FutureRule> parseFutureRule(const std::string& ruleString);
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/TypeImpl.cc b/contrib/libs/apache/orc/c++/src/TypeImpl.cc
index 78a0e00686..c154f2af04 100644
--- a/contrib/libs/apache/orc/c++/src/TypeImpl.cc
+++ b/contrib/libs/apache/orc/c++/src/TypeImpl.cc
@@ -1,707 +1,707 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "Adaptor.hh" 
-#include "orc/Exceptions.hh" 
-#include "TypeImpl.hh" 
- 
-#include <iostream> 
-#include <sstream> 
- 
-namespace orc { 
- 
-  Type::~Type() { 
-    // PASS 
-  } 
- 
-  TypeImpl::TypeImpl(TypeKind _kind) { 
-    parent = nullptr; 
-    columnId = -1; 
-    maximumColumnId = -1; 
-    kind = _kind; 
-    maxLength = 0; 
-    precision = 0; 
-    scale = 0; 
-    subtypeCount = 0; 
-  } 
- 
-  TypeImpl::TypeImpl(TypeKind _kind, uint64_t _maxLength) { 
-    parent = nullptr; 
-    columnId = -1; 
-    maximumColumnId = -1; 
-    kind = _kind; 
-    maxLength = _maxLength; 
-    precision = 0; 
-    scale = 0; 
-    subtypeCount = 0; 
-  } 
- 
-  TypeImpl::TypeImpl(TypeKind _kind, uint64_t _precision, 
-                     uint64_t _scale) { 
-    parent = nullptr; 
-    columnId = -1; 
-    maximumColumnId = -1; 
-    kind = _kind; 
-    maxLength = 0; 
-    precision = _precision; 
-    scale = _scale; 
-    subtypeCount = 0; 
-  } 
- 
-  uint64_t TypeImpl::assignIds(uint64_t root) const { 
-    columnId = static_cast<int64_t>(root); 
-    uint64_t current = root + 1; 
-    for(uint64_t i=0; i < subtypeCount; ++i) { 
-      current = dynamic_cast<TypeImpl*>(subTypes[i])->assignIds(current); 
-    } 
-    maximumColumnId = static_cast<int64_t>(current) - 1; 
-    return current; 
-  } 
- 
-  TypeImpl::~TypeImpl() { 
-    for (std::vector<Type*>::iterator it = subTypes.begin(); 
-        it != subTypes.end(); it++) { 
-      delete (*it) ; 
-    } 
-  } 
- 
-  void TypeImpl::ensureIdAssigned() const { 
-    if (columnId == -1) { 
-      const TypeImpl* root = this; 
-      while (root->parent != nullptr) { 
-        root = root->parent; 
-      } 
-      root->assignIds(0); 
-    } 
-  } 
- 
-  uint64_t TypeImpl::getColumnId() const { 
-    ensureIdAssigned(); 
-    return static_cast<uint64_t>(columnId); 
-  } 
- 
-  uint64_t TypeImpl::getMaximumColumnId() const { 
-    ensureIdAssigned(); 
-    return static_cast<uint64_t>(maximumColumnId); 
-  } 
- 
-  TypeKind TypeImpl::getKind() const { 
-    return kind; 
-  } 
- 
-  uint64_t TypeImpl::getSubtypeCount() const { 
-    return subtypeCount; 
-  } 
- 
-  const Type* TypeImpl::getSubtype(uint64_t i) const { 
-    return subTypes[i]; 
-  } 
- 
-  const std::string& TypeImpl::getFieldName(uint64_t i) const { 
-    return fieldNames[i]; 
-  } 
- 
-  uint64_t TypeImpl::getMaximumLength() const { 
-    return maxLength; 
-  } 
- 
-  uint64_t TypeImpl::getPrecision() const { 
-    return precision; 
-  } 
- 
-  uint64_t TypeImpl::getScale() const { 
-    return scale; 
-  } 
- 
-  void TypeImpl::setIds(uint64_t _columnId, uint64_t _maxColumnId) { 
-    columnId = static_cast<int64_t>(_columnId); 
-    maximumColumnId = static_cast<int64_t>(_maxColumnId); 
-  } 
- 
-  void TypeImpl::addChildType(std::unique_ptr<Type> childType) { 
-    TypeImpl* child = dynamic_cast<TypeImpl*>(childType.release()); 
-    subTypes.push_back(child); 
-    if (child != nullptr) { 
-      child->parent = this; 
-    } 
-    subtypeCount += 1; 
-  } 
- 
-  Type* TypeImpl::addStructField(const std::string& fieldName, 
-                                 std::unique_ptr<Type> fieldType) { 
-    addChildType(std::move(fieldType)); 
-    fieldNames.push_back(fieldName); 
-    return this; 
-  } 
- 
-  Type* TypeImpl::addUnionChild(std::unique_ptr<Type> fieldType) { 
-    addChildType(std::move(fieldType)); 
-    return this; 
-  } 
- 
-  std::string TypeImpl::toString() const { 
-    switch (static_cast<int64_t>(kind)) { 
-    case BOOLEAN: 
-      return "boolean"; 
-    case BYTE: 
-      return "tinyint"; 
-    case SHORT: 
-      return "smallint"; 
-    case INT: 
-      return "int"; 
-    case LONG: 
-      return "bigint"; 
-    case FLOAT: 
-      return "float"; 
-    case DOUBLE: 
-      return "double"; 
-    case STRING: 
-      return "string"; 
-    case BINARY: 
-      return "binary"; 
-    case TIMESTAMP: 
-      return "timestamp"; 
-    case LIST: 
-      return "array<" + (subTypes[0] ? subTypes[0]->toString() : "void") + ">"; 
-    case MAP: 
-      return "map<" + (subTypes[0] ? subTypes[0]->toString() : "void") + "," + 
-        (subTypes[1] ? subTypes[1]->toString() : "void") +  ">"; 
-    case STRUCT: { 
-      std::string result = "struct<"; 
-      for(size_t i=0; i < subTypes.size(); ++i) { 
-        if (i != 0) { 
-          result += ","; 
-        } 
-        result += fieldNames[i]; 
-        result += ":"; 
-        result += subTypes[i]->toString(); 
-      } 
-      result += ">"; 
-      return result; 
-    } 
-    case UNION: { 
-      std::string result = "uniontype<"; 
-      for(size_t i=0; i < subTypes.size(); ++i) { 
-        if (i != 0) { 
-          result += ","; 
-        } 
-        result += subTypes[i]->toString(); 
-      } 
-      result += ">"; 
-      return result; 
-    } 
-    case DECIMAL: { 
-      std::stringstream result; 
-      result << "decimal(" << precision << "," << scale << ")"; 
-      return result.str(); 
-    } 
-    case DATE: 
-      return "date"; 
-    case VARCHAR: { 
-      std::stringstream result; 
-      result << "varchar(" << maxLength << ")"; 
-      return result.str(); 
-    } 
-    case CHAR: { 
-      std::stringstream result; 
-      result << "char(" << maxLength << ")"; 
-      return result.str(); 
-    } 
-    default: 
-      throw NotImplementedYet("Unknown type"); 
-    } 
-  } 
- 
-  std::unique_ptr<ColumnVectorBatch> 
-  TypeImpl::createRowBatch(uint64_t capacity, 
-                           MemoryPool& memoryPool, 
-                           bool encoded) const { 
-    switch (static_cast<int64_t>(kind)) { 
-    case BOOLEAN: 
-    case BYTE: 
-    case SHORT: 
-    case INT: 
-    case LONG: 
-    case DATE: 
-      return std::unique_ptr<ColumnVectorBatch> 
-        (new LongVectorBatch(capacity, memoryPool)); 
- 
-    case FLOAT: 
-    case DOUBLE: 
-      return std::unique_ptr<ColumnVectorBatch> 
-        (new DoubleVectorBatch(capacity, memoryPool)); 
- 
-    case STRING: 
-    case BINARY: 
-    case CHAR: 
-    case VARCHAR: 
-      return encoded ? 
-      std::unique_ptr<ColumnVectorBatch> 
-        (new EncodedStringVectorBatch(capacity, memoryPool)) 
-      : std::unique_ptr<ColumnVectorBatch> 
-        (new StringVectorBatch(capacity, memoryPool)); 
- 
-    case TIMESTAMP: 
-      return std::unique_ptr<ColumnVectorBatch> 
-        (new TimestampVectorBatch(capacity, memoryPool)); 
- 
-    case STRUCT: { 
-      StructVectorBatch *result = new StructVectorBatch(capacity, memoryPool); 
-      std::unique_ptr<ColumnVectorBatch> return_value = std::unique_ptr<ColumnVectorBatch>(result); 
-      for(uint64_t i=0; i < getSubtypeCount(); ++i) { 
-          result->fields.push_back(getSubtype(i)-> 
-                                   createRowBatch(capacity, 
-                                                  memoryPool, encoded).release()); 
-      } 
-      return return_value; 
-    } 
- 
-    case LIST: { 
-      ListVectorBatch* result = new ListVectorBatch(capacity, memoryPool); 
-      std::unique_ptr<ColumnVectorBatch> return_value = std::unique_ptr<ColumnVectorBatch>(result); 
-      if (getSubtype(0) != nullptr) { 
-        result->elements = getSubtype(0)->createRowBatch(capacity, memoryPool, encoded); 
-      } 
-      return return_value; 
-    } 
- 
-    case MAP: { 
-      MapVectorBatch* result = new MapVectorBatch(capacity, memoryPool); 
-      std::unique_ptr<ColumnVectorBatch> return_value = std::unique_ptr<ColumnVectorBatch>(result); 
-      if (getSubtype(0) != nullptr) { 
-        result->keys = getSubtype(0)->createRowBatch(capacity, memoryPool, encoded); 
-      } 
-      if (getSubtype(1) != nullptr) { 
-        result->elements = getSubtype(1)->createRowBatch(capacity, memoryPool, encoded); 
-      } 
-      return return_value; 
-    } 
- 
-    case DECIMAL: { 
-      if (getPrecision() == 0 || getPrecision() > 18) { 
-        return std::unique_ptr<ColumnVectorBatch> 
-          (new Decimal128VectorBatch(capacity, memoryPool)); 
-      } else { 
-        return std::unique_ptr<ColumnVectorBatch> 
-          (new Decimal64VectorBatch(capacity, memoryPool)); 
-      } 
-    } 
- 
-    case UNION: { 
-      UnionVectorBatch *result = new UnionVectorBatch(capacity, memoryPool); 
-      std::unique_ptr<ColumnVectorBatch> return_value = std::unique_ptr<ColumnVectorBatch>(result); 
-      for(uint64_t i=0; i < getSubtypeCount(); ++i) { 
-          result->children.push_back(getSubtype(i)->createRowBatch(capacity, 
-                                                                   memoryPool, encoded) 
-                                     .release()); 
-      } 
-      return return_value; 
-    } 
- 
-    default: 
-      throw NotImplementedYet("not supported yet"); 
-    } 
-  } 
- 
-  std::unique_ptr<Type> createPrimitiveType(TypeKind kind) { 
-    return std::unique_ptr<Type>(new TypeImpl(kind)); 
-  } 
- 
-  std::unique_ptr<Type> createCharType(TypeKind kind, 
-                                       uint64_t maxLength) { 
-    return std::unique_ptr<Type>(new TypeImpl(kind, maxLength)); 
-  } 
- 
-  std::unique_ptr<Type> createDecimalType(uint64_t precision, 
-                                          uint64_t scale) { 
-    return std::unique_ptr<Type>(new TypeImpl(DECIMAL, precision, scale)); 
-  } 
- 
-  std::unique_ptr<Type> createStructType() { 
-    return std::unique_ptr<Type>(new TypeImpl(STRUCT)); 
-  } 
- 
-  std::unique_ptr<Type> createListType(std::unique_ptr<Type> elements) { 
-    TypeImpl* result = new TypeImpl(LIST); 
-    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(result); 
-    result->addChildType(std::move(elements)); 
-    return return_value; 
-  } 
- 
-  std::unique_ptr<Type> createMapType(std::unique_ptr<Type> key, 
-                                      std::unique_ptr<Type> value) { 
-    TypeImpl* result = new TypeImpl(MAP); 
-    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(result); 
-    result->addChildType(std::move(key)); 
-    result->addChildType(std::move(value)); 
-    return return_value; 
-  } 
- 
-  std::unique_ptr<Type> createUnionType() { 
-    return std::unique_ptr<Type>(new TypeImpl(UNION)); 
-  } 
- 
-  std::string printProtobufMessage(const google::protobuf::Message& message); 
-  std::unique_ptr<Type> convertType(const proto::Type& type, 
-                                    const proto::Footer& footer) { 
-    switch (static_cast<int64_t>(type.kind())) { 
- 
-    case proto::Type_Kind_BOOLEAN: 
-    case proto::Type_Kind_BYTE: 
-    case proto::Type_Kind_SHORT: 
-    case proto::Type_Kind_INT: 
-    case proto::Type_Kind_LONG: 
-    case proto::Type_Kind_FLOAT: 
-    case proto::Type_Kind_DOUBLE: 
-    case proto::Type_Kind_STRING: 
-    case proto::Type_Kind_BINARY: 
-    case proto::Type_Kind_TIMESTAMP: 
-    case proto::Type_Kind_DATE: 
-      return std::unique_ptr<Type> 
-        (new TypeImpl(static_cast<TypeKind>(type.kind()))); 
- 
-    case proto::Type_Kind_CHAR: 
-    case proto::Type_Kind_VARCHAR: 
-      return std::unique_ptr<Type> 
-        (new TypeImpl(static_cast<TypeKind>(type.kind()), 
-                      type.maximumlength())); 
- 
-    case proto::Type_Kind_DECIMAL: 
-      return std::unique_ptr<Type> 
-        (new TypeImpl(DECIMAL, type.precision(), type.scale())); 
- 
-    case proto::Type_Kind_LIST: 
-    case proto::Type_Kind_MAP: 
-    case proto::Type_Kind_UNION: { 
-      TypeImpl* result = new TypeImpl(static_cast<TypeKind>(type.kind())); 
-      std::unique_ptr<Type> return_value = std::unique_ptr<Type>(result); 
-      if (type.kind() == proto::Type_Kind_LIST && type.subtypes_size() != 1) 
-        throw ParseError("Illegal LIST type that doesn't contain one subtype"); 
-      if (type.kind() == proto::Type_Kind_MAP && type.subtypes_size() != 2) 
-        throw ParseError("Illegal MAP type that doesn't contain two subtypes"); 
-      if (type.kind() == proto::Type_Kind_UNION && type.subtypes_size() == 0) 
-        throw ParseError("Illegal UNION type that doesn't contain any subtypes"); 
-      for(int i=0; i < type.subtypes_size(); ++i) { 
-        result->addUnionChild(convertType(footer.types(static_cast<int> 
-                                                       (type.subtypes(i))), 
-                                          footer)); 
-      } 
-      return return_value; 
-    } 
- 
-    case proto::Type_Kind_STRUCT: { 
-      TypeImpl* result = new TypeImpl(STRUCT); 
-      std::unique_ptr<Type> return_value = std::unique_ptr<Type>(result); 
-      for(int i=0; i < type.subtypes_size(); ++i) { 
-        result->addStructField(type.fieldnames(i), 
-                               convertType(footer.types(static_cast<int> 
-                                                        (type.subtypes(i))), 
-                                           footer)); 
-      } 
-      return return_value; 
-    } 
-    default: 
-      throw NotImplementedYet("Unknown type kind"); 
-    } 
-  } 
- 
-  /** 
-   * Build a clone of the file type, projecting columns from the selected 
-   * vector. This routine assumes that the parent of any selected column 
-   * is also selected. The column ids are copied from the fileType. 
-   * @param fileType the type in the file 
-   * @param selected is each column by id selected 
-   * @return a clone of the fileType filtered by the selection array 
-   */ 
-  std::unique_ptr<Type> buildSelectedType(const Type *fileType, 
-                                          const std::vector<bool>& selected) { 
-    if (fileType == nullptr || !selected[fileType->getColumnId()]) { 
-      return std::unique_ptr<Type>(); 
-    } 
- 
-    TypeImpl* result; 
-    switch (static_cast<int>(fileType->getKind())) { 
-    case BOOLEAN: 
-    case BYTE: 
-    case SHORT: 
-    case INT: 
-    case LONG: 
-    case FLOAT: 
-    case DOUBLE: 
-    case STRING: 
-    case BINARY: 
-    case TIMESTAMP: 
-    case DATE: 
-      result = new TypeImpl(fileType->getKind()); 
-      break; 
- 
-    case DECIMAL: 
-      result= new TypeImpl(fileType->getKind(), 
-                           fileType->getPrecision(), fileType->getScale()); 
-      break; 
- 
-    case VARCHAR: 
-    case CHAR: 
-      result = new TypeImpl(fileType->getKind(), fileType->getMaximumLength()); 
-      break; 
- 
-    case LIST: 
-      result = new TypeImpl(fileType->getKind()); 
-      result->addChildType(buildSelectedType(fileType->getSubtype(0), 
-                                             selected)); 
-      break; 
- 
-    case MAP: 
-      result = new TypeImpl(fileType->getKind()); 
-      result->addChildType(buildSelectedType(fileType->getSubtype(0), 
-                                             selected)); 
-      result->addChildType(buildSelectedType(fileType->getSubtype(1), 
-                                             selected)); 
-      break; 
- 
-    case STRUCT: { 
-      result = new TypeImpl(fileType->getKind()); 
-      for(uint64_t child=0; child < fileType->getSubtypeCount(); ++child) { 
-        std::unique_ptr<Type> childType = 
-          buildSelectedType(fileType->getSubtype(child), selected); 
-        if (childType.get() != nullptr) { 
-          result->addStructField(fileType->getFieldName(child), 
-                                 std::move(childType)); 
-        } 
-      } 
-      break; 
-    } 
- 
-    case UNION: { 
-      result = new TypeImpl(fileType->getKind()); 
-      for(uint64_t child=0; child < fileType->getSubtypeCount(); ++child) { 
-        std::unique_ptr<Type> childType = 
-          buildSelectedType(fileType->getSubtype(child), selected); 
-        if (childType.get() != nullptr) { 
-          result->addUnionChild(std::move(childType)); 
-        } 
-      } 
-      break; 
-    } 
- 
-    default: 
-      throw NotImplementedYet("Unknown type kind"); 
-    } 
-    result->setIds(fileType->getColumnId(), fileType->getMaximumColumnId()); 
-    return std::unique_ptr<Type>(result); 
-  } 
- 
-  ORC_UNIQUE_PTR<Type> Type::buildTypeFromString(const std::string& input) { 
-    std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type> > > res = 
-      TypeImpl::parseType(input, 0, input.size()); 
-    if (res.size() != 1) { 
-      throw std::logic_error("Invalid type string."); 
-    } 
-    return std::move(res[0].second); 
-  } 
- 
-  std::unique_ptr<Type> TypeImpl::parseArrayType(const std::string &input, 
-                                                 size_t start, 
-                                                 size_t end) { 
-    TypeImpl* arrayType = new TypeImpl(LIST); 
-    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(arrayType); 
-    std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type> > > v = 
-      TypeImpl::parseType(input, start, end); 
-    if (v.size() != 1) { 
-      throw std::logic_error("Array type must contain exactly one sub type."); 
-    } 
-    arrayType->addChildType(std::move(v[0].second)); 
-    return return_value; 
-  } 
- 
-  std::unique_ptr<Type> TypeImpl::parseMapType(const std::string &input, 
-                                               size_t start, 
-                                               size_t end) { 
-    TypeImpl * mapType = new TypeImpl(MAP); 
-    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(mapType); 
-    std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type> > > v = 
-      TypeImpl::parseType(input, start, end); 
-    if (v.size() != 2) { 
-      throw std::logic_error( 
-        "Map type must contain exactly two sub types."); 
-    } 
-    mapType->addChildType(std::move(v[0].second)); 
-    mapType->addChildType(std::move(v[1].second)); 
-    return return_value; 
-  } 
- 
-  std::unique_ptr<Type> TypeImpl::parseStructType(const std::string &input, 
-                                                  size_t start, 
-                                                  size_t end) { 
-    TypeImpl* structType = new TypeImpl(STRUCT); 
-    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(structType); 
-    std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type>> > v = 
-      TypeImpl::parseType(input, start, end); 
-    if (v.size() == 0) { 
-      throw std::logic_error( 
-        "Struct type must contain at least one sub type."); 
-    } 
-    for (size_t i = 0; i < v.size(); ++i) { 
-      structType->addStructField(v[i].first, std::move(v[i].second)); 
-    } 
-    return return_value; 
-  } 
- 
-  std::unique_ptr<Type> TypeImpl::parseUnionType(const std::string &input, 
-                                                 size_t start, 
-                                                 size_t end) { 
-    TypeImpl* unionType = new TypeImpl(UNION); 
-    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(unionType); 
-    std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type> > > v = 
-      TypeImpl::parseType(input, start, end); 
-    if (v.size() == 0) { 
-      throw std::logic_error("Union type must contain at least one sub type."); 
-    } 
-    for (size_t i = 0; i < v.size(); ++i) { 
-      unionType->addChildType(std::move(v[i].second)); 
-    } 
-    return return_value; 
-  } 
- 
-  std::unique_ptr<Type> TypeImpl::parseDecimalType(const std::string &input, 
-                                                   size_t start, 
-                                                   size_t end) { 
-    size_t sep = input.find(',', start); 
-    if (sep + 1 >= end || sep == std::string::npos) { 
-      throw std::logic_error("Decimal type must specify precision and scale."); 
-    } 
-    uint64_t precision = 
-      static_cast<uint64_t>(atoi(input.substr(start, sep - start).c_str())); 
-    uint64_t scale = 
-      static_cast<uint64_t>(atoi(input.substr(sep + 1, end - sep - 1).c_str())); 
-    return std::unique_ptr<Type>(new TypeImpl(DECIMAL, precision, scale)); 
-  } 
- 
-  std::unique_ptr<Type> TypeImpl::parseCategory(std::string category, 
-                                                const std::string &input, 
-                                                size_t start, 
-                                                size_t end) { 
-    if (category == "boolean") { 
-      return std::unique_ptr<Type>(new TypeImpl(BOOLEAN)); 
-    } else if (category == "tinyint") { 
-      return std::unique_ptr<Type>(new TypeImpl(BYTE)); 
-    } else if (category == "smallint") { 
-      return std::unique_ptr<Type>(new TypeImpl(SHORT)); 
-    } else if (category == "int") { 
-      return std::unique_ptr<Type>(new TypeImpl(INT)); 
-    } else if (category == "bigint") { 
-      return std::unique_ptr<Type>(new TypeImpl(LONG)); 
-    } else if (category == "float") { 
-      return std::unique_ptr<Type>(new TypeImpl(FLOAT)); 
-    } else if (category == "double") { 
-      return std::unique_ptr<Type>(new TypeImpl(DOUBLE)); 
-    } else if (category == "string") { 
-      return std::unique_ptr<Type>(new TypeImpl(STRING)); 
-    } else if (category == "binary") { 
-      return std::unique_ptr<Type>(new TypeImpl(BINARY)); 
-    } else if (category == "timestamp") { 
-      return std::unique_ptr<Type>(new TypeImpl(TIMESTAMP)); 
-    } else if (category == "array") { 
-      return parseArrayType(input, start, end); 
-    } else if (category == "map") { 
-      return parseMapType(input, start, end); 
-    } else if (category == "struct") { 
-      return parseStructType(input, start, end); 
-    } else if (category == "uniontype") { 
-      return parseUnionType(input, start, end); 
-    } else if (category == "decimal") { 
-      return parseDecimalType(input, start, end); 
-    } else if (category == "date") { 
-      return std::unique_ptr<Type>(new TypeImpl(DATE)); 
-    } else if (category == "varchar") { 
-      uint64_t maxLength = static_cast<uint64_t>( 
-        atoi(input.substr(start, end - start).c_str())); 
-      return std::unique_ptr<Type>(new TypeImpl(VARCHAR, maxLength)); 
-    } else if (category == "char") { 
-      uint64_t maxLength = static_cast<uint64_t>( 
-        atoi(input.substr(start, end - start).c_str())); 
-      return std::unique_ptr<Type>(new TypeImpl(CHAR, maxLength)); 
-    } else { 
-      throw std::logic_error("Unknown type " + category); 
-    } 
-  } 
- 
-  std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type> > > TypeImpl::parseType( 
-                                                       const std::string &input, 
-                                                       size_t start, 
-                                                       size_t end) { 
-    std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type> > > res; 
-    size_t pos = start; 
- 
-    while (pos < end) { 
-      size_t endPos = pos; 
-      while (endPos < end && (isalnum(input[endPos]) || input[endPos] == '_')) { 
-        ++endPos; 
-      } 
- 
-      std::string fieldName; 
-      if (input[endPos] == ':') { 
-        fieldName = input.substr(pos, endPos - pos); 
-        pos = ++endPos; 
-        while (endPos < end && isalpha(input[endPos])) { 
-          ++endPos; 
-        } 
-      } 
- 
-      size_t nextPos = endPos + 1; 
-      if (input[endPos] == '<') { 
-        int count = 1; 
-        while (nextPos < end) { 
-          if (input[nextPos] == '<') { 
-            ++count; 
-          } else if (input[nextPos] == '>') { 
-            --count; 
-          } 
-          if (count == 0) { 
-            break; 
-          } 
-          ++nextPos; 
-        } 
-        if (nextPos == end) { 
-          throw std::logic_error("Invalid type string. Cannot find closing >"); 
-        } 
-      } else if (input[endPos] == '(') { 
-        while (nextPos < end && input[nextPos] != ')') { 
-          ++nextPos; 
-        } 
-        if (nextPos == end) { 
-          throw std::logic_error("Invalid type string. Cannot find closing )"); 
-        } 
-      } else if (input[endPos] != ',' && endPos != end) { 
-        throw std::logic_error("Unrecognized character."); 
-      } 
- 
-      std::string category = input.substr(pos, endPos - pos); 
-      res.push_back(std::make_pair(fieldName, parseCategory(category, input, endPos + 1, nextPos))); 
- 
-      if (nextPos < end && (input[nextPos] == ')' || input[nextPos] == '>')) { 
-        pos = nextPos + 2; 
-      } else { 
-        pos = nextPos; 
-      } 
-    } 
- 
-    return res; 
-  } 
- 
-} 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "orc/Exceptions.hh"
+#include "TypeImpl.hh"
+
+#include <iostream>
+#include <sstream>
+
+namespace orc {
+
+  Type::~Type() {
+    // PASS
+  }
+
+  TypeImpl::TypeImpl(TypeKind _kind) {
+    parent = nullptr;
+    columnId = -1;
+    maximumColumnId = -1;
+    kind = _kind;
+    maxLength = 0;
+    precision = 0;
+    scale = 0;
+    subtypeCount = 0;
+  }
+
+  TypeImpl::TypeImpl(TypeKind _kind, uint64_t _maxLength) {
+    parent = nullptr;
+    columnId = -1;
+    maximumColumnId = -1;
+    kind = _kind;
+    maxLength = _maxLength;
+    precision = 0;
+    scale = 0;
+    subtypeCount = 0;
+  }
+
+  TypeImpl::TypeImpl(TypeKind _kind, uint64_t _precision,
+                     uint64_t _scale) {
+    parent = nullptr;
+    columnId = -1;
+    maximumColumnId = -1;
+    kind = _kind;
+    maxLength = 0;
+    precision = _precision;
+    scale = _scale;
+    subtypeCount = 0;
+  }
+
+  uint64_t TypeImpl::assignIds(uint64_t root) const {
+    columnId = static_cast<int64_t>(root);
+    uint64_t current = root + 1;
+    for(uint64_t i=0; i < subtypeCount; ++i) {
+      current = dynamic_cast<TypeImpl*>(subTypes[i])->assignIds(current);
+    }
+    maximumColumnId = static_cast<int64_t>(current) - 1;
+    return current;
+  }
+
+  TypeImpl::~TypeImpl() {
+    for (std::vector<Type*>::iterator it = subTypes.begin();
+        it != subTypes.end(); it++) {
+      delete (*it) ;
+    }
+  }
+
+  void TypeImpl::ensureIdAssigned() const {
+    if (columnId == -1) {
+      const TypeImpl* root = this;
+      while (root->parent != nullptr) {
+        root = root->parent;
+      }
+      root->assignIds(0);
+    }
+  }
+
+  uint64_t TypeImpl::getColumnId() const {
+    ensureIdAssigned();
+    return static_cast<uint64_t>(columnId);
+  }
+
+  uint64_t TypeImpl::getMaximumColumnId() const {
+    ensureIdAssigned();
+    return static_cast<uint64_t>(maximumColumnId);
+  }
+
+  TypeKind TypeImpl::getKind() const {
+    return kind;
+  }
+
+  uint64_t TypeImpl::getSubtypeCount() const {
+    return subtypeCount;
+  }
+
+  const Type* TypeImpl::getSubtype(uint64_t i) const {
+    return subTypes[i];
+  }
+
+  const std::string& TypeImpl::getFieldName(uint64_t i) const {
+    return fieldNames[i];
+  }
+
+  uint64_t TypeImpl::getMaximumLength() const {
+    return maxLength;
+  }
+
+  uint64_t TypeImpl::getPrecision() const {
+    return precision;
+  }
+
+  uint64_t TypeImpl::getScale() const {
+    return scale;
+  }
+
+  void TypeImpl::setIds(uint64_t _columnId, uint64_t _maxColumnId) {
+    columnId = static_cast<int64_t>(_columnId);
+    maximumColumnId = static_cast<int64_t>(_maxColumnId);
+  }
+
+  void TypeImpl::addChildType(std::unique_ptr<Type> childType) {
+    TypeImpl* child = dynamic_cast<TypeImpl*>(childType.release());
+    subTypes.push_back(child);
+    if (child != nullptr) {
+      child->parent = this;
+    }
+    subtypeCount += 1;
+  }
+
+  Type* TypeImpl::addStructField(const std::string& fieldName,
+                                 std::unique_ptr<Type> fieldType) {
+    addChildType(std::move(fieldType));
+    fieldNames.push_back(fieldName);
+    return this;
+  }
+
+  Type* TypeImpl::addUnionChild(std::unique_ptr<Type> fieldType) {
+    addChildType(std::move(fieldType));
+    return this;
+  }
+
+  std::string TypeImpl::toString() const {
+    switch (static_cast<int64_t>(kind)) {
+    case BOOLEAN:
+      return "boolean";
+    case BYTE:
+      return "tinyint";
+    case SHORT:
+      return "smallint";
+    case INT:
+      return "int";
+    case LONG:
+      return "bigint";
+    case FLOAT:
+      return "float";
+    case DOUBLE:
+      return "double";
+    case STRING:
+      return "string";
+    case BINARY:
+      return "binary";
+    case TIMESTAMP:
+      return "timestamp";
+    case LIST:
+      return "array<" + (subTypes[0] ? subTypes[0]->toString() : "void") + ">";
+    case MAP:
+      return "map<" + (subTypes[0] ? subTypes[0]->toString() : "void") + "," +
+        (subTypes[1] ? subTypes[1]->toString() : "void") +  ">";
+    case STRUCT: {
+      std::string result = "struct<";
+      for(size_t i=0; i < subTypes.size(); ++i) {
+        if (i != 0) {
+          result += ",";
+        }
+        result += fieldNames[i];
+        result += ":";
+        result += subTypes[i]->toString();
+      }
+      result += ">";
+      return result;
+    }
+    case UNION: {
+      std::string result = "uniontype<";
+      for(size_t i=0; i < subTypes.size(); ++i) {
+        if (i != 0) {
+          result += ",";
+        }
+        result += subTypes[i]->toString();
+      }
+      result += ">";
+      return result;
+    }
+    case DECIMAL: {
+      std::stringstream result;
+      result << "decimal(" << precision << "," << scale << ")";
+      return result.str();
+    }
+    case DATE:
+      return "date";
+    case VARCHAR: {
+      std::stringstream result;
+      result << "varchar(" << maxLength << ")";
+      return result.str();
+    }
+    case CHAR: {
+      std::stringstream result;
+      result << "char(" << maxLength << ")";
+      return result.str();
+    }
+    default:
+      throw NotImplementedYet("Unknown type");
+    }
+  }
+
+  std::unique_ptr<ColumnVectorBatch>
+  TypeImpl::createRowBatch(uint64_t capacity,
+                           MemoryPool& memoryPool,
+                           bool encoded) const {
+    switch (static_cast<int64_t>(kind)) {
+    case BOOLEAN:
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+    case DATE:
+      return std::unique_ptr<ColumnVectorBatch>
+        (new LongVectorBatch(capacity, memoryPool));
+
+    case FLOAT:
+    case DOUBLE:
+      return std::unique_ptr<ColumnVectorBatch>
+        (new DoubleVectorBatch(capacity, memoryPool));
+
+    case STRING:
+    case BINARY:
+    case CHAR:
+    case VARCHAR:
+      return encoded ?
+      std::unique_ptr<ColumnVectorBatch>
+        (new EncodedStringVectorBatch(capacity, memoryPool))
+      : std::unique_ptr<ColumnVectorBatch>
+        (new StringVectorBatch(capacity, memoryPool));
+
+    case TIMESTAMP:
+      return std::unique_ptr<ColumnVectorBatch>
+        (new TimestampVectorBatch(capacity, memoryPool));
+
+    case STRUCT: {
+      StructVectorBatch *result = new StructVectorBatch(capacity, memoryPool);
+      std::unique_ptr<ColumnVectorBatch> return_value = std::unique_ptr<ColumnVectorBatch>(result);
+      for(uint64_t i=0; i < getSubtypeCount(); ++i) {
+          result->fields.push_back(getSubtype(i)->
+                                   createRowBatch(capacity,
+                                                  memoryPool, encoded).release());
+      }
+      return return_value;
+    }
+
+    case LIST: {
+      ListVectorBatch* result = new ListVectorBatch(capacity, memoryPool);
+      std::unique_ptr<ColumnVectorBatch> return_value = std::unique_ptr<ColumnVectorBatch>(result);
+      if (getSubtype(0) != nullptr) {
+        result->elements = getSubtype(0)->createRowBatch(capacity, memoryPool, encoded);
+      }
+      return return_value;
+    }
+
+    case MAP: {
+      MapVectorBatch* result = new MapVectorBatch(capacity, memoryPool);
+      std::unique_ptr<ColumnVectorBatch> return_value = std::unique_ptr<ColumnVectorBatch>(result);
+      if (getSubtype(0) != nullptr) {
+        result->keys = getSubtype(0)->createRowBatch(capacity, memoryPool, encoded);
+      }
+      if (getSubtype(1) != nullptr) {
+        result->elements = getSubtype(1)->createRowBatch(capacity, memoryPool, encoded);
+      }
+      return return_value;
+    }
+
+    case DECIMAL: {
+      if (getPrecision() == 0 || getPrecision() > 18) {
+        return std::unique_ptr<ColumnVectorBatch>
+          (new Decimal128VectorBatch(capacity, memoryPool));
+      } else {
+        return std::unique_ptr<ColumnVectorBatch>
+          (new Decimal64VectorBatch(capacity, memoryPool));
+      }
+    }
+
+    case UNION: {
+      UnionVectorBatch *result = new UnionVectorBatch(capacity, memoryPool);
+      std::unique_ptr<ColumnVectorBatch> return_value = std::unique_ptr<ColumnVectorBatch>(result);
+      for(uint64_t i=0; i < getSubtypeCount(); ++i) {
+          result->children.push_back(getSubtype(i)->createRowBatch(capacity,
+                                                                   memoryPool, encoded)
+                                     .release());
+      }
+      return return_value;
+    }
+
+    default:
+      throw NotImplementedYet("not supported yet");
+    }
+  }
+
+  std::unique_ptr<Type> createPrimitiveType(TypeKind kind) {
+    return std::unique_ptr<Type>(new TypeImpl(kind));
+  }
+
+  std::unique_ptr<Type> createCharType(TypeKind kind,
+                                       uint64_t maxLength) {
+    return std::unique_ptr<Type>(new TypeImpl(kind, maxLength));
+  }
+
+  std::unique_ptr<Type> createDecimalType(uint64_t precision,
+                                          uint64_t scale) {
+    return std::unique_ptr<Type>(new TypeImpl(DECIMAL, precision, scale));
+  }
+
+  std::unique_ptr<Type> createStructType() {
+    return std::unique_ptr<Type>(new TypeImpl(STRUCT));
+  }
+
+  std::unique_ptr<Type> createListType(std::unique_ptr<Type> elements) {
+    TypeImpl* result = new TypeImpl(LIST);
+    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(result);
+    result->addChildType(std::move(elements));
+    return return_value;
+  }
+
+  std::unique_ptr<Type> createMapType(std::unique_ptr<Type> key,
+                                      std::unique_ptr<Type> value) {
+    TypeImpl* result = new TypeImpl(MAP);
+    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(result);
+    result->addChildType(std::move(key));
+    result->addChildType(std::move(value));
+    return return_value;
+  }
+
+  std::unique_ptr<Type> createUnionType() {
+    return std::unique_ptr<Type>(new TypeImpl(UNION));
+  }
+
+  std::string printProtobufMessage(const google::protobuf::Message& message);
+  std::unique_ptr<Type> convertType(const proto::Type& type,
+                                    const proto::Footer& footer) {
+    switch (static_cast<int64_t>(type.kind())) {
+
+    case proto::Type_Kind_BOOLEAN:
+    case proto::Type_Kind_BYTE:
+    case proto::Type_Kind_SHORT:
+    case proto::Type_Kind_INT:
+    case proto::Type_Kind_LONG:
+    case proto::Type_Kind_FLOAT:
+    case proto::Type_Kind_DOUBLE:
+    case proto::Type_Kind_STRING:
+    case proto::Type_Kind_BINARY:
+    case proto::Type_Kind_TIMESTAMP:
+    case proto::Type_Kind_DATE:
+      return std::unique_ptr<Type>
+        (new TypeImpl(static_cast<TypeKind>(type.kind())));
+
+    case proto::Type_Kind_CHAR:
+    case proto::Type_Kind_VARCHAR:
+      return std::unique_ptr<Type>
+        (new TypeImpl(static_cast<TypeKind>(type.kind()),
+                      type.maximumlength()));
+
+    case proto::Type_Kind_DECIMAL:
+      return std::unique_ptr<Type>
+        (new TypeImpl(DECIMAL, type.precision(), type.scale()));
+
+    case proto::Type_Kind_LIST:
+    case proto::Type_Kind_MAP:
+    case proto::Type_Kind_UNION: {
+      TypeImpl* result = new TypeImpl(static_cast<TypeKind>(type.kind()));
+      std::unique_ptr<Type> return_value = std::unique_ptr<Type>(result);
+      if (type.kind() == proto::Type_Kind_LIST && type.subtypes_size() != 1)
+        throw ParseError("Illegal LIST type that doesn't contain one subtype");
+      if (type.kind() == proto::Type_Kind_MAP && type.subtypes_size() != 2)
+        throw ParseError("Illegal MAP type that doesn't contain two subtypes");
+      if (type.kind() == proto::Type_Kind_UNION && type.subtypes_size() == 0)
+        throw ParseError("Illegal UNION type that doesn't contain any subtypes");
+      for(int i=0; i < type.subtypes_size(); ++i) {
+        result->addUnionChild(convertType(footer.types(static_cast<int>
+                                                       (type.subtypes(i))),
+                                          footer));
+      }
+      return return_value;
+    }
+
+    case proto::Type_Kind_STRUCT: {
+      TypeImpl* result = new TypeImpl(STRUCT);
+      std::unique_ptr<Type> return_value = std::unique_ptr<Type>(result);
+      for(int i=0; i < type.subtypes_size(); ++i) {
+        result->addStructField(type.fieldnames(i),
+                               convertType(footer.types(static_cast<int>
+                                                        (type.subtypes(i))),
+                                           footer));
+      }
+      return return_value;
+    }
+    default:
+      throw NotImplementedYet("Unknown type kind");
+    }
+  }
+
+  /**
+   * Build a clone of the file type, projecting columns from the selected
+   * vector. This routine assumes that the parent of any selected column
+   * is also selected. The column ids are copied from the fileType.
+   * @param fileType the type in the file
+   * @param selected is each column by id selected
+   * @return a clone of the fileType filtered by the selection array
+   */
+  std::unique_ptr<Type> buildSelectedType(const Type *fileType,
+                                          const std::vector<bool>& selected) {
+    if (fileType == nullptr || !selected[fileType->getColumnId()]) {
+      return std::unique_ptr<Type>();
+    }
+
+    TypeImpl* result;
+    switch (static_cast<int>(fileType->getKind())) {
+    case BOOLEAN:
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+    case FLOAT:
+    case DOUBLE:
+    case STRING:
+    case BINARY:
+    case TIMESTAMP:
+    case DATE:
+      result = new TypeImpl(fileType->getKind());
+      break;
+
+    case DECIMAL:
+      result= new TypeImpl(fileType->getKind(),
+                           fileType->getPrecision(), fileType->getScale());
+      break;
+
+    case VARCHAR:
+    case CHAR:
+      result = new TypeImpl(fileType->getKind(), fileType->getMaximumLength());
+      break;
+
+    case LIST:
+      result = new TypeImpl(fileType->getKind());
+      result->addChildType(buildSelectedType(fileType->getSubtype(0),
+                                             selected));
+      break;
+
+    case MAP:
+      result = new TypeImpl(fileType->getKind());
+      result->addChildType(buildSelectedType(fileType->getSubtype(0),
+                                             selected));
+      result->addChildType(buildSelectedType(fileType->getSubtype(1),
+                                             selected));
+      break;
+
+    case STRUCT: {
+      result = new TypeImpl(fileType->getKind());
+      for(uint64_t child=0; child < fileType->getSubtypeCount(); ++child) {
+        std::unique_ptr<Type> childType =
+          buildSelectedType(fileType->getSubtype(child), selected);
+        if (childType.get() != nullptr) {
+          result->addStructField(fileType->getFieldName(child),
+                                 std::move(childType));
+        }
+      }
+      break;
+    }
+
+    case UNION: {
+      result = new TypeImpl(fileType->getKind());
+      for(uint64_t child=0; child < fileType->getSubtypeCount(); ++child) {
+        std::unique_ptr<Type> childType =
+          buildSelectedType(fileType->getSubtype(child), selected);
+        if (childType.get() != nullptr) {
+          result->addUnionChild(std::move(childType));
+        }
+      }
+      break;
+    }
+
+    default:
+      throw NotImplementedYet("Unknown type kind");
+    }
+    result->setIds(fileType->getColumnId(), fileType->getMaximumColumnId());
+    return std::unique_ptr<Type>(result);
+  }
+
+  ORC_UNIQUE_PTR<Type> Type::buildTypeFromString(const std::string& input) {
+    std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type> > > res =
+      TypeImpl::parseType(input, 0, input.size());
+    if (res.size() != 1) {
+      throw std::logic_error("Invalid type string.");
+    }
+    return std::move(res[0].second);
+  }
+
+  std::unique_ptr<Type> TypeImpl::parseArrayType(const std::string &input,
+                                                 size_t start,
+                                                 size_t end) {
+    TypeImpl* arrayType = new TypeImpl(LIST);
+    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(arrayType);
+    std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type> > > v =
+      TypeImpl::parseType(input, start, end);
+    if (v.size() != 1) {
+      throw std::logic_error("Array type must contain exactly one sub type.");
+    }
+    arrayType->addChildType(std::move(v[0].second));
+    return return_value;
+  }
+
+  std::unique_ptr<Type> TypeImpl::parseMapType(const std::string &input,
+                                               size_t start,
+                                               size_t end) {
+    TypeImpl * mapType = new TypeImpl(MAP);
+    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(mapType);
+    std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type> > > v =
+      TypeImpl::parseType(input, start, end);
+    if (v.size() != 2) {
+      throw std::logic_error(
+        "Map type must contain exactly two sub types.");
+    }
+    mapType->addChildType(std::move(v[0].second));
+    mapType->addChildType(std::move(v[1].second));
+    return return_value;
+  }
+
+  std::unique_ptr<Type> TypeImpl::parseStructType(const std::string &input,
+                                                  size_t start,
+                                                  size_t end) {
+    TypeImpl* structType = new TypeImpl(STRUCT);
+    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(structType);
+    std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type>> > v =
+      TypeImpl::parseType(input, start, end);
+    if (v.size() == 0) {
+      throw std::logic_error(
+        "Struct type must contain at least one sub type.");
+    }
+    for (size_t i = 0; i < v.size(); ++i) {
+      structType->addStructField(v[i].first, std::move(v[i].second));
+    }
+    return return_value;
+  }
+
+  std::unique_ptr<Type> TypeImpl::parseUnionType(const std::string &input,
+                                                 size_t start,
+                                                 size_t end) {
+    TypeImpl* unionType = new TypeImpl(UNION);
+    std::unique_ptr<Type> return_value = std::unique_ptr<Type>(unionType);
+    std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type> > > v =
+      TypeImpl::parseType(input, start, end);
+    if (v.size() == 0) {
+      throw std::logic_error("Union type must contain at least one sub type.");
+    }
+    for (size_t i = 0; i < v.size(); ++i) {
+      unionType->addChildType(std::move(v[i].second));
+    }
+    return return_value;
+  }
+
+  std::unique_ptr<Type> TypeImpl::parseDecimalType(const std::string &input,
+                                                   size_t start,
+                                                   size_t end) {
+    size_t sep = input.find(',', start);
+    if (sep + 1 >= end || sep == std::string::npos) {
+      throw std::logic_error("Decimal type must specify precision and scale.");
+    }
+    uint64_t precision =
+      static_cast<uint64_t>(atoi(input.substr(start, sep - start).c_str()));
+    uint64_t scale =
+      static_cast<uint64_t>(atoi(input.substr(sep + 1, end - sep - 1).c_str()));
+    return std::unique_ptr<Type>(new TypeImpl(DECIMAL, precision, scale));
+  }
+
+  std::unique_ptr<Type> TypeImpl::parseCategory(std::string category,
+                                                const std::string &input,
+                                                size_t start,
+                                                size_t end) {
+    if (category == "boolean") {
+      return std::unique_ptr<Type>(new TypeImpl(BOOLEAN));
+    } else if (category == "tinyint") {
+      return std::unique_ptr<Type>(new TypeImpl(BYTE));
+    } else if (category == "smallint") {
+      return std::unique_ptr<Type>(new TypeImpl(SHORT));
+    } else if (category == "int") {
+      return std::unique_ptr<Type>(new TypeImpl(INT));
+    } else if (category == "bigint") {
+      return std::unique_ptr<Type>(new TypeImpl(LONG));
+    } else if (category == "float") {
+      return std::unique_ptr<Type>(new TypeImpl(FLOAT));
+    } else if (category == "double") {
+      return std::unique_ptr<Type>(new TypeImpl(DOUBLE));
+    } else if (category == "string") {
+      return std::unique_ptr<Type>(new TypeImpl(STRING));
+    } else if (category == "binary") {
+      return std::unique_ptr<Type>(new TypeImpl(BINARY));
+    } else if (category == "timestamp") {
+      return std::unique_ptr<Type>(new TypeImpl(TIMESTAMP));
+    } else if (category == "array") {
+      return parseArrayType(input, start, end);
+    } else if (category == "map") {
+      return parseMapType(input, start, end);
+    } else if (category == "struct") {
+      return parseStructType(input, start, end);
+    } else if (category == "uniontype") {
+      return parseUnionType(input, start, end);
+    } else if (category == "decimal") {
+      return parseDecimalType(input, start, end);
+    } else if (category == "date") {
+      return std::unique_ptr<Type>(new TypeImpl(DATE));
+    } else if (category == "varchar") {
+      uint64_t maxLength = static_cast<uint64_t>(
+        atoi(input.substr(start, end - start).c_str()));
+      return std::unique_ptr<Type>(new TypeImpl(VARCHAR, maxLength));
+    } else if (category == "char") {
+      uint64_t maxLength = static_cast<uint64_t>(
+        atoi(input.substr(start, end - start).c_str()));
+      return std::unique_ptr<Type>(new TypeImpl(CHAR, maxLength));
+    } else {
+      throw std::logic_error("Unknown type " + category);
+    }
+  }
+
+  std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type> > > TypeImpl::parseType(
+                                                       const std::string &input,
+                                                       size_t start,
+                                                       size_t end) {
+    std::vector<std::pair<std::string, ORC_UNIQUE_PTR<Type> > > res;
+    size_t pos = start;
+
+    while (pos < end) {
+      size_t endPos = pos;
+      while (endPos < end && (isalnum(input[endPos]) || input[endPos] == '_')) {
+        ++endPos;
+      }
+
+      std::string fieldName;
+      if (input[endPos] == ':') {
+        fieldName = input.substr(pos, endPos - pos);
+        pos = ++endPos;
+        while (endPos < end && isalpha(input[endPos])) {
+          ++endPos;
+        }
+      }
+
+      size_t nextPos = endPos + 1;
+      if (input[endPos] == '<') {
+        int count = 1;
+        while (nextPos < end) {
+          if (input[nextPos] == '<') {
+            ++count;
+          } else if (input[nextPos] == '>') {
+            --count;
+          }
+          if (count == 0) {
+            break;
+          }
+          ++nextPos;
+        }
+        if (nextPos == end) {
+          throw std::logic_error("Invalid type string. Cannot find closing >");
+        }
+      } else if (input[endPos] == '(') {
+        while (nextPos < end && input[nextPos] != ')') {
+          ++nextPos;
+        }
+        if (nextPos == end) {
+          throw std::logic_error("Invalid type string. Cannot find closing )");
+        }
+      } else if (input[endPos] != ',' && endPos != end) {
+        throw std::logic_error("Unrecognized character.");
+      }
+
+      std::string category = input.substr(pos, endPos - pos);
+      res.push_back(std::make_pair(fieldName, parseCategory(category, input, endPos + 1, nextPos)));
+
+      if (nextPos < end && (input[nextPos] == ')' || input[nextPos] == '>')) {
+        pos = nextPos + 2;
+      } else {
+        pos = nextPos;
+      }
+    }
+
+    return res;
+  }
+
+}
diff --git a/contrib/libs/apache/orc/c++/src/TypeImpl.hh b/contrib/libs/apache/orc/c++/src/TypeImpl.hh
index cee52006b7..054ceab5dc 100644
--- a/contrib/libs/apache/orc/c++/src/TypeImpl.hh
+++ b/contrib/libs/apache/orc/c++/src/TypeImpl.hh
@@ -1,198 +1,198 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef TYPE_IMPL_HH 
-#define TYPE_IMPL_HH 
- 
-#include "orc/Type.hh" 
- 
-#include "Adaptor.hh" 
-#include "wrap/orc-proto-wrapper.hh" 
- 
-#include <vector> 
- 
-namespace orc { 
- 
-  class TypeImpl: public Type { 
-  private: 
-    TypeImpl* parent; 
-    mutable int64_t columnId; 
-    mutable int64_t maximumColumnId; 
-    TypeKind kind; 
-    std::vector<Type*> subTypes; 
-    std::vector<std::string> fieldNames; 
-    uint64_t subtypeCount; 
-    uint64_t maxLength; 
-    uint64_t precision; 
-    uint64_t scale; 
- 
-  public: 
-    /** 
-     * Create most of the primitive types. 
-     */ 
-    TypeImpl(TypeKind kind); 
- 
-    /** 
-     * Create char and varchar type. 
-     */ 
-    TypeImpl(TypeKind kind, uint64_t maxLength); 
- 
-    /** 
-     * Create decimal type. 
-     */ 
-    TypeImpl(TypeKind kind, uint64_t precision, 
-             uint64_t scale); 
- 
-    virtual ~TypeImpl() override; 
- 
-    uint64_t getColumnId() const override; 
- 
-    uint64_t getMaximumColumnId() const override; 
- 
-    TypeKind getKind() const override; 
- 
-    uint64_t getSubtypeCount() const override; 
- 
-    const Type* getSubtype(uint64_t i) const override; 
- 
-    const std::string& getFieldName(uint64_t i) const override; 
- 
-    uint64_t getMaximumLength() const override; 
- 
-    uint64_t getPrecision() const override; 
- 
-    uint64_t getScale() const override; 
- 
-    std::string toString() const override; 
- 
-    Type* addStructField(const std::string& fieldName, 
-                         std::unique_ptr<Type> fieldType) override; 
-    Type* addUnionChild(std::unique_ptr<Type> fieldType) override; 
- 
-    std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size, 
-                                                      MemoryPool& memoryPool, 
-                                                      bool encoded = false 
-                                                      ) const override; 
- 
-    /** 
-     * Explicitly set the column ids. Only for internal usage. 
-     */ 
-    void setIds(uint64_t columnId, uint64_t maxColumnId); 
- 
-    /** 
-     * Add a child type. 
-     */ 
-    void addChildType(std::unique_ptr<Type> childType); 
- 
-    static std::vector<std::pair<std::string, std::unique_ptr<Type> > > parseType( 
-      const std::string &input, 
-      size_t start, 
-      size_t end); 
- 
-  private: 
-    /** 
-     * Assign ids to this node and its children giving this 
-     * node rootId. 
-     * @param rootId the column id that should be assigned to this node. 
-     */ 
-    uint64_t assignIds(uint64_t rootId) const; 
- 
-    /** 
-     * Ensure that ids are assigned to all of the nodes. 
-     */ 
-    void ensureIdAssigned() const; 
- 
-    /** 
-     * Parse array type from string 
-     * @param input the input string of an array type 
-     * @param start start position of the input string 
-     * @param end end position of the input string 
-     */ 
-    static std::unique_ptr<Type> parseArrayType(const std::string &input, 
-                                                size_t start, 
-                                                size_t end); 
- 
-    /** 
-     * Parse map type from string 
-     * @param input the input string of a map type 
-     * @param start start position of the input string 
-     * @param end end position of the input string 
-     */ 
-    static std::unique_ptr<Type> parseMapType(const std::string &input, 
-                                              size_t start, 
-                                              size_t end); 
- 
-    /** 
-     * Parse struct type from string 
-     * @param input the input string of a struct type 
-     * @param start start position of the input string 
-     * @param end end position of the input string 
-     */ 
-    static std::unique_ptr<Type> parseStructType(const std::string &input, 
-                                                 size_t start, 
-                                                 size_t end); 
- 
-    /** 
-     * Parse union type from string 
-     * @param input the input string of an union type 
-     * @param start start position of the input string 
-     * @param end end position of the input string 
-     */ 
-    static std::unique_ptr<Type> parseUnionType(const std::string &input, 
-                                                size_t start, 
-                                                size_t end); 
- 
-    /** 
-     * Parse decimal type from string 
-     * @param input the input string of a decimal type 
-     * @param start start position of the input string 
-     * @param end end position of the input string 
-     */ 
-    static std::unique_ptr<Type> parseDecimalType(const std::string &input, 
-                                                  size_t start, 
-                                                  size_t end); 
- 
-    /** 
-     * Parse type for a category 
-     * @param category type name 
-     * @param input the input string of the category 
-     * @param start start position of the input string 
-     * @param end end position of the input string 
-     */ 
-    static std::unique_ptr<Type> parseCategory(std::string category, 
-                                               const std::string &input, 
-                                               size_t start, 
-                                               size_t end); 
-  }; 
- 
-  std::unique_ptr<Type> convertType(const proto::Type& type, 
-                                    const proto::Footer& footer); 
- 
-  /** 
-   * Build a clone of the file type, projecting columns from the selected 
-   * vector. This routine assumes that the parent of any selected column 
-   * is also selected. 
-   * @param fileType the type in the file 
-   * @param selected is each column by id selected 
-   * @return a clone of the fileType filtered by the selection array 
-   */ 
-  std::unique_ptr<Type> buildSelectedType(const Type *fileType, 
-                                          const std::vector<bool>& selected); 
-} 
- 
-#endif 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TYPE_IMPL_HH
+#define TYPE_IMPL_HH
+
+#include "orc/Type.hh"
+
+#include "Adaptor.hh"
+#include "wrap/orc-proto-wrapper.hh"
+
+#include <vector>
+
+namespace orc {
+
+  class TypeImpl: public Type {
+  private:
+    TypeImpl* parent;
+    mutable int64_t columnId;
+    mutable int64_t maximumColumnId;
+    TypeKind kind;
+    std::vector<Type*> subTypes;
+    std::vector<std::string> fieldNames;
+    uint64_t subtypeCount;
+    uint64_t maxLength;
+    uint64_t precision;
+    uint64_t scale;
+
+  public:
+    /**
+     * Create most of the primitive types.
+     */
+    TypeImpl(TypeKind kind);
+
+    /**
+     * Create char and varchar type.
+     */
+    TypeImpl(TypeKind kind, uint64_t maxLength);
+
+    /**
+     * Create decimal type.
+     */
+    TypeImpl(TypeKind kind, uint64_t precision,
+             uint64_t scale);
+
+    virtual ~TypeImpl() override;
+
+    uint64_t getColumnId() const override;
+
+    uint64_t getMaximumColumnId() const override;
+
+    TypeKind getKind() const override;
+
+    uint64_t getSubtypeCount() const override;
+
+    const Type* getSubtype(uint64_t i) const override;
+
+    const std::string& getFieldName(uint64_t i) const override;
+
+    uint64_t getMaximumLength() const override;
+
+    uint64_t getPrecision() const override;
+
+    uint64_t getScale() const override;
+
+    std::string toString() const override;
+
+    Type* addStructField(const std::string& fieldName,
+                         std::unique_ptr<Type> fieldType) override;
+    Type* addUnionChild(std::unique_ptr<Type> fieldType) override;
+
+    std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size,
+                                                      MemoryPool& memoryPool,
+                                                      bool encoded = false
+                                                      ) const override;
+
+    /**
+     * Explicitly set the column ids. Only for internal usage.
+     */
+    void setIds(uint64_t columnId, uint64_t maxColumnId);
+
+    /**
+     * Add a child type.
+     */
+    void addChildType(std::unique_ptr<Type> childType);
+
+    static std::vector<std::pair<std::string, std::unique_ptr<Type> > > parseType(
+      const std::string &input,
+      size_t start,
+      size_t end);
+
+  private:
+    /**
+     * Assign ids to this node and its children giving this
+     * node rootId.
+     * @param rootId the column id that should be assigned to this node.
+     */
+    uint64_t assignIds(uint64_t rootId) const;
+
+    /**
+     * Ensure that ids are assigned to all of the nodes.
+     */
+    void ensureIdAssigned() const;
+
+    /**
+     * Parse array type from string
+     * @param input the input string of an array type
+     * @param start start position of the input string
+     * @param end end position of the input string
+     */
+    static std::unique_ptr<Type> parseArrayType(const std::string &input,
+                                                size_t start,
+                                                size_t end);
+
+    /**
+     * Parse map type from string
+     * @param input the input string of a map type
+     * @param start start position of the input string
+     * @param end end position of the input string
+     */
+    static std::unique_ptr<Type> parseMapType(const std::string &input,
+                                              size_t start,
+                                              size_t end);
+
+    /**
+     * Parse struct type from string
+     * @param input the input string of a struct type
+     * @param start start position of the input string
+     * @param end end position of the input string
+     */
+    static std::unique_ptr<Type> parseStructType(const std::string &input,
+                                                 size_t start,
+                                                 size_t end);
+
+    /**
+     * Parse union type from string
+     * @param input the input string of an union type
+     * @param start start position of the input string
+     * @param end end position of the input string
+     */
+    static std::unique_ptr<Type> parseUnionType(const std::string &input,
+                                                size_t start,
+                                                size_t end);
+
+    /**
+     * Parse decimal type from string
+     * @param input the input string of a decimal type
+     * @param start start position of the input string
+     * @param end end position of the input string
+     */
+    static std::unique_ptr<Type> parseDecimalType(const std::string &input,
+                                                  size_t start,
+                                                  size_t end);
+
+    /**
+     * Parse type for a category
+     * @param category type name
+     * @param input the input string of the category
+     * @param start start position of the input string
+     * @param end end position of the input string
+     */
+    static std::unique_ptr<Type> parseCategory(std::string category,
+                                               const std::string &input,
+                                               size_t start,
+                                               size_t end);
+  };
+
+  std::unique_ptr<Type> convertType(const proto::Type& type,
+                                    const proto::Footer& footer);
+
+  /**
+   * Build a clone of the file type, projecting columns from the selected
+   * vector. This routine assumes that the parent of any selected column
+   * is also selected.
+   * @param fileType the type in the file
+   * @param selected is each column by id selected
+   * @return a clone of the fileType filtered by the selection array
+   */
+  std::unique_ptr<Type> buildSelectedType(const Type *fileType,
+                                          const std::vector<bool>& selected);
+}
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/Vector.cc b/contrib/libs/apache/orc/c++/src/Vector.cc
index 6ba2f8ae7d..14c0ded030 100644
--- a/contrib/libs/apache/orc/c++/src/Vector.cc
+++ b/contrib/libs/apache/orc/c++/src/Vector.cc
@@ -1,518 +1,518 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/Vector.hh" 
- 
-#include "Adaptor.hh" 
-#include "orc/Exceptions.hh" 
- 
-#include <iostream> 
-#include <sstream> 
-#include <cstdlib> 
- 
-namespace orc { 
- 
-  ColumnVectorBatch::ColumnVectorBatch(uint64_t cap, 
-                                       MemoryPool& pool 
-                                       ): capacity(cap), 
-                                          numElements(0), 
-                                          notNull(pool, cap), 
-                                          hasNulls(false), 
-                                          isEncoded(false), 
-                                          memoryPool(pool) { 
-    std::memset(notNull.data(), 1, capacity); 
-  } 
- 
-  ColumnVectorBatch::~ColumnVectorBatch() { 
-    // PASS 
-  } 
- 
-  void ColumnVectorBatch::resize(uint64_t cap) { 
-    if (capacity < cap) { 
-      capacity = cap; 
-      notNull.resize(cap); 
-    } 
-  } 
- 
-  void ColumnVectorBatch::clear() { 
-    numElements = 0; 
-  } 
- 
-  uint64_t ColumnVectorBatch::getMemoryUsage() { 
-    return static_cast<uint64_t>(notNull.capacity() * sizeof(char)); 
-  } 
- 
-  bool ColumnVectorBatch::hasVariableLength() { 
-    return false; 
-  } 
- 
-  LongVectorBatch::LongVectorBatch(uint64_t _capacity, MemoryPool& pool 
-                     ): ColumnVectorBatch(_capacity, pool), 
-                        data(pool, _capacity) { 
-    // PASS 
-  } 
- 
-  LongVectorBatch::~LongVectorBatch() { 
-    // PASS 
-  } 
- 
-  std::string LongVectorBatch::toString() const { 
-    std::ostringstream buffer; 
-    buffer << "Long vector <" << numElements << " of " << capacity << ">"; 
-    return buffer.str(); 
-  } 
- 
-  void LongVectorBatch::resize(uint64_t cap) { 
-    if (capacity < cap) { 
-      ColumnVectorBatch::resize(cap); 
-      data.resize(cap); 
-    } 
-  } 
- 
-  void LongVectorBatch::clear() { 
-    numElements = 0; 
-  } 
- 
-  uint64_t LongVectorBatch::getMemoryUsage() { 
-    return ColumnVectorBatch::getMemoryUsage() + 
-        static_cast<uint64_t>(data.capacity() * sizeof(int64_t)); 
-  } 
- 
-  DoubleVectorBatch::DoubleVectorBatch(uint64_t _capacity, MemoryPool& pool 
-                   ): ColumnVectorBatch(_capacity, pool), 
-                      data(pool, _capacity) { 
-    // PASS 
-  } 
- 
-  DoubleVectorBatch::~DoubleVectorBatch() { 
-    // PASS 
-  } 
- 
-  std::string DoubleVectorBatch::toString() const { 
-    std::ostringstream buffer; 
-    buffer << "Double vector <" << numElements << " of " << capacity << ">"; 
-    return buffer.str(); 
-  } 
- 
-  void DoubleVectorBatch::resize(uint64_t cap) { 
-    if (capacity < cap) { 
-      ColumnVectorBatch::resize(cap); 
-      data.resize(cap); 
-    } 
-  } 
- 
-  void DoubleVectorBatch::clear() { 
-    numElements = 0; 
-  } 
- 
-  uint64_t DoubleVectorBatch::getMemoryUsage() { 
-    return ColumnVectorBatch::getMemoryUsage() 
-          + static_cast<uint64_t>(data.capacity() * sizeof(double)); 
-  } 
- 
-  StringDictionary::StringDictionary(MemoryPool& pool) 
-              : dictionaryBlob(pool), 
-                dictionaryOffset(pool) { 
-    // PASS 
-  } 
- 
-  EncodedStringVectorBatch::EncodedStringVectorBatch(uint64_t _capacity, 
-                                                     MemoryPool& pool) 
-                      : StringVectorBatch(_capacity, pool), 
-                        dictionary(), 
-                        index(pool, _capacity) { 
-    // PASS 
-  } 
- 
-  EncodedStringVectorBatch::~EncodedStringVectorBatch() { 
-    // PASS 
-  } 
- 
-  std::string EncodedStringVectorBatch::toString() const { 
-    std::ostringstream buffer; 
-    buffer << "Encoded string vector <" << numElements << " of " << capacity << ">"; 
-    return buffer.str(); 
-  } 
- 
-  StringVectorBatch::StringVectorBatch(uint64_t _capacity, MemoryPool& pool 
-               ): ColumnVectorBatch(_capacity, pool), 
-                  data(pool, _capacity), 
-                  length(pool, _capacity), 
-                  blob(pool) { 
-    // PASS 
-  } 
- 
-  StringVectorBatch::~StringVectorBatch() { 
-    // PASS 
-  } 
- 
-  std::string StringVectorBatch::toString() const { 
-    std::ostringstream buffer; 
-    buffer << "Byte vector <" << numElements << " of " << capacity << ">"; 
-    return buffer.str(); 
-  } 
- 
-  void StringVectorBatch::resize(uint64_t cap) { 
-    if (capacity < cap) { 
-      ColumnVectorBatch::resize(cap); 
-      data.resize(cap); 
-      length.resize(cap); 
-    } 
-  } 
- 
-  void StringVectorBatch::clear() { 
-    numElements = 0; 
-  } 
- 
-  uint64_t StringVectorBatch::getMemoryUsage() { 
-    return ColumnVectorBatch::getMemoryUsage() 
-          + static_cast<uint64_t>(data.capacity() * sizeof(char*) 
-          + length.capacity() * sizeof(int64_t)); 
-  } 
- 
-  StructVectorBatch::StructVectorBatch(uint64_t cap, MemoryPool& pool 
-                                        ): ColumnVectorBatch(cap, pool) { 
-    // PASS 
-  } 
- 
-  StructVectorBatch::~StructVectorBatch() { 
-    for (uint64_t i=0; i<this->fields.size(); i++) { 
-      delete this->fields[i]; 
-    } 
-  } 
- 
-  std::string StructVectorBatch::toString() const { 
-    std::ostringstream buffer; 
-    buffer << "Struct vector <" << numElements << " of " << capacity 
-           << "; "; 
-    for(std::vector<ColumnVectorBatch*>::const_iterator ptr=fields.begin(); 
-        ptr != fields.end(); ++ptr) { 
-      buffer << (*ptr)->toString() << "; "; 
-    } 
-    buffer << ">"; 
-    return buffer.str(); 
-  } 
- 
-  void StructVectorBatch::resize(uint64_t cap) { 
-    ColumnVectorBatch::resize(cap); 
-  } 
- 
-  void StructVectorBatch::clear() { 
-    for(size_t i=0; i < fields.size(); i++) { 
-      fields[i]->clear(); 
-    } 
-    numElements = 0; 
-  } 
- 
-  uint64_t StructVectorBatch::getMemoryUsage() { 
-    uint64_t memory = ColumnVectorBatch::getMemoryUsage(); 
-    for (unsigned int i=0; i < fields.size(); i++) { 
-      memory += fields[i]->getMemoryUsage(); 
-    } 
-    return memory; 
-  } 
- 
-  bool StructVectorBatch::hasVariableLength() { 
-    for (unsigned int i=0; i < fields.size(); i++) { 
-      if (fields[i]->hasVariableLength()) { 
-        return true; 
-      } 
-    } 
-    return false; 
-  } 
- 
-  ListVectorBatch::ListVectorBatch(uint64_t cap, MemoryPool& pool 
-                   ): ColumnVectorBatch(cap, pool), 
-                      offsets(pool, cap+1) { 
-    // PASS 
-  } 
- 
-  ListVectorBatch::~ListVectorBatch() { 
-    // PASS 
-  } 
- 
-  std::string ListVectorBatch::toString() const { 
-    std::ostringstream buffer; 
-    buffer << "List vector <" << elements->toString() << " with " 
-           << numElements << " of " << capacity << ">"; 
-    return buffer.str(); 
-  } 
- 
-  void ListVectorBatch::resize(uint64_t cap) { 
-    if (capacity < cap) { 
-      ColumnVectorBatch::resize(cap); 
-      offsets.resize(cap + 1); 
-    } 
-  } 
- 
-  void ListVectorBatch::clear() { 
-    numElements = 0; 
-    elements->clear(); 
-  } 
- 
-  uint64_t ListVectorBatch::getMemoryUsage() { 
-    return ColumnVectorBatch::getMemoryUsage() 
-           + static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t)) 
-           + elements->getMemoryUsage(); 
-  } 
- 
-  bool ListVectorBatch::hasVariableLength() { 
-    return true; 
-  } 
- 
-  MapVectorBatch::MapVectorBatch(uint64_t cap, MemoryPool& pool 
-                 ): ColumnVectorBatch(cap, pool), 
-                    offsets(pool, cap+1) { 
-    // PASS 
-  } 
- 
-  MapVectorBatch::~MapVectorBatch() { 
-    // PASS 
-  } 
- 
-  std::string MapVectorBatch::toString() const { 
-    std::ostringstream buffer; 
-    buffer << "Map vector <" << keys->toString() << ", " 
-           << elements->toString() << " with " 
-           << numElements << " of " << capacity << ">"; 
-    return buffer.str(); 
-  } 
- 
-  void MapVectorBatch::resize(uint64_t cap) { 
-    if (capacity < cap) { 
-      ColumnVectorBatch::resize(cap); 
-      offsets.resize(cap + 1); 
-    } 
-  } 
- 
-  void MapVectorBatch::clear() { 
-    keys->clear(); 
-    elements->clear(); 
-    numElements = 0; 
-  } 
- 
-  uint64_t MapVectorBatch::getMemoryUsage() { 
-    return ColumnVectorBatch::getMemoryUsage() 
-           + static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t)) 
-           + keys->getMemoryUsage() 
-           + elements->getMemoryUsage(); 
-  } 
- 
-  bool MapVectorBatch::hasVariableLength() { 
-    return true; 
-  } 
- 
-  UnionVectorBatch::UnionVectorBatch(uint64_t cap, MemoryPool& pool 
-                                     ): ColumnVectorBatch(cap, pool), 
-                                        tags(pool, cap), 
-                                        offsets(pool, cap) { 
-    // PASS 
-  } 
- 
-  UnionVectorBatch::~UnionVectorBatch() { 
-    for (uint64_t i=0; i < children.size(); i++) { 
-      delete children[i]; 
-    } 
-  } 
- 
-  std::string UnionVectorBatch::toString() const { 
-    std::ostringstream buffer; 
-    buffer << "Union vector <"; 
-    for(size_t i=0; i < children.size(); ++i) { 
-      if (i != 0) { 
-        buffer << ", "; 
-      } 
-      buffer << children[i]->toString(); 
-    } 
-    buffer << "; with " << numElements << " of " << capacity << ">"; 
-    return buffer.str(); 
-  } 
- 
-  void UnionVectorBatch::resize(uint64_t cap) { 
-    if (capacity < cap) { 
-      ColumnVectorBatch::resize(cap); 
-      tags.resize(cap); 
-      offsets.resize(cap); 
-    } 
-  } 
- 
-  void UnionVectorBatch::clear() { 
-    for(size_t i=0; i < children.size(); i++) { 
-      children[i]->clear(); 
-    } 
-    numElements = 0; 
-  } 
- 
-  uint64_t UnionVectorBatch::getMemoryUsage() { 
-    uint64_t memory = ColumnVectorBatch::getMemoryUsage() 
-               + static_cast<uint64_t>(tags.capacity() * sizeof(unsigned char) 
-               + offsets.capacity() * sizeof(uint64_t)); 
-    for(size_t i=0; i < children.size(); ++i) { 
-      memory += children[i]->getMemoryUsage(); 
-    } 
-    return memory; 
-  } 
- 
-  bool UnionVectorBatch::hasVariableLength() { 
-    for(size_t i=0; i < children.size(); ++i) { 
-      if (children[i]->hasVariableLength()) { 
-        return true; 
-      } 
-    } 
-    return false; 
-  } 
- 
-  Decimal64VectorBatch::Decimal64VectorBatch(uint64_t cap, MemoryPool& pool 
-                 ): ColumnVectorBatch(cap, pool), 
-                    precision(0), 
-                    scale(0), 
-                    values(pool, cap), 
-                    readScales(pool, cap) { 
-    // PASS 
-  } 
- 
-  Decimal64VectorBatch::~Decimal64VectorBatch() { 
-    // PASS 
-  } 
- 
-  std::string Decimal64VectorBatch::toString() const { 
-    std::ostringstream buffer; 
-    buffer << "Decimal64 vector  with " 
-           << numElements << " of " << capacity << ">"; 
-    return buffer.str(); 
-  } 
- 
-  void Decimal64VectorBatch::resize(uint64_t cap) { 
-    if (capacity < cap) { 
-      ColumnVectorBatch::resize(cap); 
-      values.resize(cap); 
-      readScales.resize(cap); 
-    } 
-  } 
- 
-  void Decimal64VectorBatch::clear() { 
-    numElements = 0; 
-  } 
- 
-  uint64_t Decimal64VectorBatch::getMemoryUsage() { 
-    return ColumnVectorBatch::getMemoryUsage() 
-          + static_cast<uint64_t>( 
-              (values.capacity() + readScales.capacity()) * sizeof(int64_t)); 
-  } 
- 
-  Decimal128VectorBatch::Decimal128VectorBatch(uint64_t cap, MemoryPool& pool 
-               ): ColumnVectorBatch(cap, pool), 
-                  precision(0), 
-                  scale(0), 
-                  values(pool, cap), 
-                  readScales(pool, cap) { 
-    // PASS 
-  } 
- 
-  Decimal128VectorBatch::~Decimal128VectorBatch() { 
-    // PASS 
-  } 
- 
-  std::string Decimal128VectorBatch::toString() const { 
-    std::ostringstream buffer; 
-    buffer << "Decimal128 vector  with " 
-           << numElements << " of " << capacity << ">"; 
-    return buffer.str(); 
-  } 
- 
-  void Decimal128VectorBatch::resize(uint64_t cap) { 
-    if (capacity < cap) { 
-      ColumnVectorBatch::resize(cap); 
-      values.resize(cap); 
-      readScales.resize(cap); 
-    } 
-  } 
- 
-  void Decimal128VectorBatch::clear() { 
-    numElements = 0; 
-  } 
- 
-  uint64_t Decimal128VectorBatch::getMemoryUsage() { 
-    return ColumnVectorBatch::getMemoryUsage() 
-          + static_cast<uint64_t>(values.capacity() * sizeof(Int128) 
-          + readScales.capacity() * sizeof(int64_t)); 
-  } 
- 
-  Decimal::Decimal(const Int128& _value, 
-                   int32_t _scale): value(_value), scale(_scale) { 
-    // PASS 
-  } 
- 
-  Decimal::Decimal(const std::string& str) { 
-    std::size_t foundPoint = str.find("."); 
-    // no decimal point, it is int 
-    if(foundPoint == std::string::npos){ 
-      value = Int128(str); 
-      scale = 0; 
-    }else{ 
-      std::string copy(str); 
-      scale = static_cast<int32_t>(str.length() - foundPoint - 1); 
-      value = Int128(copy.replace(foundPoint, 1, "")); 
-    } 
-  } 
- 
-  Decimal::Decimal() : value(0), scale(0) { 
-    // PASS 
-  } 
- 
-  std::string Decimal::toString() const { 
-    return value.toDecimalString(scale); 
-  } 
- 
-  TimestampVectorBatch::TimestampVectorBatch(uint64_t _capacity, 
-                                             MemoryPool& pool 
-                                             ): ColumnVectorBatch(_capacity, 
-                                                                  pool), 
-                                                data(pool, _capacity), 
-                                                nanoseconds(pool, _capacity) { 
-    // PASS 
-  } 
- 
-  TimestampVectorBatch::~TimestampVectorBatch() { 
-    // PASS 
-  } 
- 
-  std::string TimestampVectorBatch::toString() const { 
-    std::ostringstream buffer; 
-    buffer << "Timestamp vector <" << numElements << " of " << capacity << ">"; 
-    return buffer.str(); 
-  } 
- 
-  void TimestampVectorBatch::resize(uint64_t cap) { 
-    if (capacity < cap) { 
-      ColumnVectorBatch::resize(cap); 
-      data.resize(cap); 
-      nanoseconds.resize(cap); 
-    } 
-  } 
- 
-  void TimestampVectorBatch::clear() { 
-    numElements = 0; 
-  } 
- 
-  uint64_t TimestampVectorBatch::getMemoryUsage() { 
-    return ColumnVectorBatch::getMemoryUsage() 
-          + static_cast<uint64_t>( 
-              (data.capacity() + nanoseconds.capacity()) * sizeof(int64_t)); 
-  } 
-} 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Vector.hh"
+
+#include "Adaptor.hh"
+#include "orc/Exceptions.hh"
+
+#include <iostream>
+#include <sstream>
+#include <cstdlib>
+
+namespace orc {
+
+  ColumnVectorBatch::ColumnVectorBatch(uint64_t cap,
+                                       MemoryPool& pool
+                                       ): capacity(cap),
+                                          numElements(0),
+                                          notNull(pool, cap),
+                                          hasNulls(false),
+                                          isEncoded(false),
+                                          memoryPool(pool) {
+    std::memset(notNull.data(), 1, capacity);
+  }
+
+  ColumnVectorBatch::~ColumnVectorBatch() {
+    // PASS
+  }
+
+  void ColumnVectorBatch::resize(uint64_t cap) {
+    if (capacity < cap) {
+      capacity = cap;
+      notNull.resize(cap);
+    }
+  }
+
+  void ColumnVectorBatch::clear() {
+    numElements = 0;
+  }
+
+  uint64_t ColumnVectorBatch::getMemoryUsage() {
+    return static_cast<uint64_t>(notNull.capacity() * sizeof(char));
+  }
+
+  bool ColumnVectorBatch::hasVariableLength() {
+    return false;
+  }
+
+  LongVectorBatch::LongVectorBatch(uint64_t _capacity, MemoryPool& pool
+                     ): ColumnVectorBatch(_capacity, pool),
+                        data(pool, _capacity) {
+    // PASS
+  }
+
+  LongVectorBatch::~LongVectorBatch() {
+    // PASS
+  }
+
+  std::string LongVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Long vector <" << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  void LongVectorBatch::resize(uint64_t cap) {
+    if (capacity < cap) {
+      ColumnVectorBatch::resize(cap);
+      data.resize(cap);
+    }
+  }
+
+  void LongVectorBatch::clear() {
+    numElements = 0;
+  }
+
+  uint64_t LongVectorBatch::getMemoryUsage() {
+    return ColumnVectorBatch::getMemoryUsage() +
+        static_cast<uint64_t>(data.capacity() * sizeof(int64_t));
+  }
+
+  DoubleVectorBatch::DoubleVectorBatch(uint64_t _capacity, MemoryPool& pool
+                   ): ColumnVectorBatch(_capacity, pool),
+                      data(pool, _capacity) {
+    // PASS
+  }
+
+  DoubleVectorBatch::~DoubleVectorBatch() {
+    // PASS
+  }
+
+  std::string DoubleVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Double vector <" << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  void DoubleVectorBatch::resize(uint64_t cap) {
+    if (capacity < cap) {
+      ColumnVectorBatch::resize(cap);
+      data.resize(cap);
+    }
+  }
+
+  void DoubleVectorBatch::clear() {
+    numElements = 0;
+  }
+
+  uint64_t DoubleVectorBatch::getMemoryUsage() {
+    return ColumnVectorBatch::getMemoryUsage()
+          + static_cast<uint64_t>(data.capacity() * sizeof(double));
+  }
+
+  StringDictionary::StringDictionary(MemoryPool& pool)
+              : dictionaryBlob(pool),
+                dictionaryOffset(pool) {
+    // PASS
+  }
+
+  EncodedStringVectorBatch::EncodedStringVectorBatch(uint64_t _capacity,
+                                                     MemoryPool& pool)
+                      : StringVectorBatch(_capacity, pool),
+                        dictionary(),
+                        index(pool, _capacity) {
+    // PASS
+  }
+
+  EncodedStringVectorBatch::~EncodedStringVectorBatch() {
+    // PASS
+  }
+
+  std::string EncodedStringVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Encoded string vector <" << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  StringVectorBatch::StringVectorBatch(uint64_t _capacity, MemoryPool& pool
+               ): ColumnVectorBatch(_capacity, pool),
+                  data(pool, _capacity),
+                  length(pool, _capacity),
+                  blob(pool) {
+    // PASS
+  }
+
+  StringVectorBatch::~StringVectorBatch() {
+    // PASS
+  }
+
+  std::string StringVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Byte vector <" << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  void StringVectorBatch::resize(uint64_t cap) {
+    if (capacity < cap) {
+      ColumnVectorBatch::resize(cap);
+      data.resize(cap);
+      length.resize(cap);
+    }
+  }
+
+  void StringVectorBatch::clear() {
+    numElements = 0;
+  }
+
+  uint64_t StringVectorBatch::getMemoryUsage() {
+    return ColumnVectorBatch::getMemoryUsage()
+          + static_cast<uint64_t>(data.capacity() * sizeof(char*)
+          + length.capacity() * sizeof(int64_t));
+  }
+
+  StructVectorBatch::StructVectorBatch(uint64_t cap, MemoryPool& pool
+                                        ): ColumnVectorBatch(cap, pool) {
+    // PASS
+  }
+
+  StructVectorBatch::~StructVectorBatch() {
+    for (uint64_t i=0; i<this->fields.size(); i++) {
+      delete this->fields[i];
+    }
+  }
+
+  std::string StructVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Struct vector <" << numElements << " of " << capacity
+           << "; ";
+    for(std::vector<ColumnVectorBatch*>::const_iterator ptr=fields.begin();
+        ptr != fields.end(); ++ptr) {
+      buffer << (*ptr)->toString() << "; ";
+    }
+    buffer << ">";
+    return buffer.str();
+  }
+
+  void StructVectorBatch::resize(uint64_t cap) {
+    ColumnVectorBatch::resize(cap);
+  }
+
+  void StructVectorBatch::clear() {
+    for(size_t i=0; i < fields.size(); i++) {
+      fields[i]->clear();
+    }
+    numElements = 0;
+  }
+
+  uint64_t StructVectorBatch::getMemoryUsage() {
+    uint64_t memory = ColumnVectorBatch::getMemoryUsage();
+    for (unsigned int i=0; i < fields.size(); i++) {
+      memory += fields[i]->getMemoryUsage();
+    }
+    return memory;
+  }
+
+  bool StructVectorBatch::hasVariableLength() {
+    for (unsigned int i=0; i < fields.size(); i++) {
+      if (fields[i]->hasVariableLength()) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  ListVectorBatch::ListVectorBatch(uint64_t cap, MemoryPool& pool
+                   ): ColumnVectorBatch(cap, pool),
+                      offsets(pool, cap+1) {
+    // PASS
+  }
+
+  ListVectorBatch::~ListVectorBatch() {
+    // PASS
+  }
+
+  std::string ListVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "List vector <" << elements->toString() << " with "
+           << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  void ListVectorBatch::resize(uint64_t cap) {
+    if (capacity < cap) {
+      ColumnVectorBatch::resize(cap);
+      offsets.resize(cap + 1);
+    }
+  }
+
+  void ListVectorBatch::clear() {
+    numElements = 0;
+    elements->clear();
+  }
+
+  uint64_t ListVectorBatch::getMemoryUsage() {
+    return ColumnVectorBatch::getMemoryUsage()
+           + static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
+           + elements->getMemoryUsage();
+  }
+
+  bool ListVectorBatch::hasVariableLength() {
+    return true;
+  }
+
+  MapVectorBatch::MapVectorBatch(uint64_t cap, MemoryPool& pool
+                 ): ColumnVectorBatch(cap, pool),
+                    offsets(pool, cap+1) {
+    // PASS
+  }
+
+  MapVectorBatch::~MapVectorBatch() {
+    // PASS
+  }
+
+  std::string MapVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Map vector <" << keys->toString() << ", "
+           << elements->toString() << " with "
+           << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  void MapVectorBatch::resize(uint64_t cap) {
+    if (capacity < cap) {
+      ColumnVectorBatch::resize(cap);
+      offsets.resize(cap + 1);
+    }
+  }
+
+  void MapVectorBatch::clear() {
+    keys->clear();
+    elements->clear();
+    numElements = 0;
+  }
+
+  uint64_t MapVectorBatch::getMemoryUsage() {
+    return ColumnVectorBatch::getMemoryUsage()
+           + static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
+           + keys->getMemoryUsage()
+           + elements->getMemoryUsage();
+  }
+
+  bool MapVectorBatch::hasVariableLength() {
+    return true;
+  }
+
+  UnionVectorBatch::UnionVectorBatch(uint64_t cap, MemoryPool& pool
+                                     ): ColumnVectorBatch(cap, pool),
+                                        tags(pool, cap),
+                                        offsets(pool, cap) {
+    // PASS
+  }
+
+  UnionVectorBatch::~UnionVectorBatch() {
+    for (uint64_t i=0; i < children.size(); i++) {
+      delete children[i];
+    }
+  }
+
+  std::string UnionVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Union vector <";
+    for(size_t i=0; i < children.size(); ++i) {
+      if (i != 0) {
+        buffer << ", ";
+      }
+      buffer << children[i]->toString();
+    }
+    buffer << "; with " << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  void UnionVectorBatch::resize(uint64_t cap) {
+    if (capacity < cap) {
+      ColumnVectorBatch::resize(cap);
+      tags.resize(cap);
+      offsets.resize(cap);
+    }
+  }
+
+  void UnionVectorBatch::clear() {
+    for(size_t i=0; i < children.size(); i++) {
+      children[i]->clear();
+    }
+    numElements = 0;
+  }
+
+  uint64_t UnionVectorBatch::getMemoryUsage() {
+    uint64_t memory = ColumnVectorBatch::getMemoryUsage()
+               + static_cast<uint64_t>(tags.capacity() * sizeof(unsigned char)
+               + offsets.capacity() * sizeof(uint64_t));
+    for(size_t i=0; i < children.size(); ++i) {
+      memory += children[i]->getMemoryUsage();
+    }
+    return memory;
+  }
+
+  bool UnionVectorBatch::hasVariableLength() {
+    for(size_t i=0; i < children.size(); ++i) {
+      if (children[i]->hasVariableLength()) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  Decimal64VectorBatch::Decimal64VectorBatch(uint64_t cap, MemoryPool& pool
+                 ): ColumnVectorBatch(cap, pool),
+                    precision(0),
+                    scale(0),
+                    values(pool, cap),
+                    readScales(pool, cap) {
+    // PASS
+  }
+
+  Decimal64VectorBatch::~Decimal64VectorBatch() {
+    // PASS
+  }
+
+  std::string Decimal64VectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Decimal64 vector  with "
+           << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  void Decimal64VectorBatch::resize(uint64_t cap) {
+    if (capacity < cap) {
+      ColumnVectorBatch::resize(cap);
+      values.resize(cap);
+      readScales.resize(cap);
+    }
+  }
+
+  void Decimal64VectorBatch::clear() {
+    numElements = 0;
+  }
+
+  uint64_t Decimal64VectorBatch::getMemoryUsage() {
+    return ColumnVectorBatch::getMemoryUsage()
+          + static_cast<uint64_t>(
+              (values.capacity() + readScales.capacity()) * sizeof(int64_t));
+  }
+
+  Decimal128VectorBatch::Decimal128VectorBatch(uint64_t cap, MemoryPool& pool
+               ): ColumnVectorBatch(cap, pool),
+                  precision(0),
+                  scale(0),
+                  values(pool, cap),
+                  readScales(pool, cap) {
+    // PASS
+  }
+
+  Decimal128VectorBatch::~Decimal128VectorBatch() {
+    // PASS
+  }
+
+  std::string Decimal128VectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Decimal128 vector  with "
+           << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  void Decimal128VectorBatch::resize(uint64_t cap) {
+    if (capacity < cap) {
+      ColumnVectorBatch::resize(cap);
+      values.resize(cap);
+      readScales.resize(cap);
+    }
+  }
+
+  void Decimal128VectorBatch::clear() {
+    numElements = 0;
+  }
+
+  uint64_t Decimal128VectorBatch::getMemoryUsage() {
+    return ColumnVectorBatch::getMemoryUsage()
+          + static_cast<uint64_t>(values.capacity() * sizeof(Int128)
+          + readScales.capacity() * sizeof(int64_t));
+  }
+
+  Decimal::Decimal(const Int128& _value,
+                   int32_t _scale): value(_value), scale(_scale) {
+    // PASS
+  }
+
+  Decimal::Decimal(const std::string& str) {
+    std::size_t foundPoint = str.find(".");
+    // no decimal point, it is int
+    if(foundPoint == std::string::npos){
+      value = Int128(str);
+      scale = 0;
+    }else{
+      std::string copy(str);
+      scale = static_cast<int32_t>(str.length() - foundPoint - 1);
+      value = Int128(copy.replace(foundPoint, 1, ""));
+    }
+  }
+
+  Decimal::Decimal() : value(0), scale(0) {
+    // PASS
+  }
+
+  std::string Decimal::toString() const {
+    return value.toDecimalString(scale);
+  }
+
+  TimestampVectorBatch::TimestampVectorBatch(uint64_t _capacity,
+                                             MemoryPool& pool
+                                             ): ColumnVectorBatch(_capacity,
+                                                                  pool),
+                                                data(pool, _capacity),
+                                                nanoseconds(pool, _capacity) {
+    // PASS
+  }
+
+  TimestampVectorBatch::~TimestampVectorBatch() {
+    // PASS
+  }
+
+  std::string TimestampVectorBatch::toString() const {
+    std::ostringstream buffer;
+    buffer << "Timestamp vector <" << numElements << " of " << capacity << ">";
+    return buffer.str();
+  }
+
+  void TimestampVectorBatch::resize(uint64_t cap) {
+    if (capacity < cap) {
+      ColumnVectorBatch::resize(cap);
+      data.resize(cap);
+      nanoseconds.resize(cap);
+    }
+  }
+
+  void TimestampVectorBatch::clear() {
+    numElements = 0;
+  }
+
+  uint64_t TimestampVectorBatch::getMemoryUsage() {
+    return ColumnVectorBatch::getMemoryUsage()
+          + static_cast<uint64_t>(
+              (data.capacity() + nanoseconds.capacity()) * sizeof(int64_t));
+  }
+}
diff --git a/contrib/libs/apache/orc/c++/src/Writer.cc b/contrib/libs/apache/orc/c++/src/Writer.cc
index 8b13750865..b5bd19b304 100644
--- a/contrib/libs/apache/orc/c++/src/Writer.cc
+++ b/contrib/libs/apache/orc/c++/src/Writer.cc
@@ -1,641 +1,641 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/Common.hh" 
-#include "orc/OrcFile.hh" 
- 
-#include "ColumnWriter.hh" 
-#include "Timezone.hh" 
- 
-#include <memory> 
- 
-namespace orc { 
- 
-  struct WriterOptionsPrivate { 
-    uint64_t stripeSize; 
-    uint64_t compressionBlockSize; 
-    uint64_t rowIndexStride; 
-    CompressionKind compression; 
-    CompressionStrategy compressionStrategy; 
-    MemoryPool* memoryPool; 
-    double paddingTolerance; 
-    std::ostream* errorStream; 
-    FileVersion fileVersion; 
-    double dictionaryKeySizeThreshold; 
-    bool enableIndex; 
-    std::set<uint64_t> columnsUseBloomFilter; 
-    double bloomFilterFalsePositiveProb; 
-    BloomFilterVersion bloomFilterVersion; 
- 
-    WriterOptionsPrivate() : 
-                            fileVersion(FileVersion::v_0_12()) { // default to Hive_0_12 
-      stripeSize = 64 * 1024 * 1024; // 64M 
-      compressionBlockSize = 64 * 1024; // 64K 
-      rowIndexStride = 10000; 
-      compression = CompressionKind_ZLIB; 
-      compressionStrategy = CompressionStrategy_SPEED; 
-      memoryPool = getDefaultPool(); 
-      paddingTolerance = 0.0; 
-      errorStream = &std::cerr; 
-      dictionaryKeySizeThreshold = 0.0; 
-      enableIndex = true; 
-      bloomFilterFalsePositiveProb = 0.05; 
-      bloomFilterVersion = UTF8; 
-    } 
-  }; 
- 
-  WriterOptions::WriterOptions(): 
-    privateBits(std::unique_ptr<WriterOptionsPrivate> 
-                (new WriterOptionsPrivate())) { 
-    // PASS 
-  } 
- 
-  WriterOptions::WriterOptions(const WriterOptions& rhs): 
-    privateBits(std::unique_ptr<WriterOptionsPrivate> 
-                (new WriterOptionsPrivate(*(rhs.privateBits.get())))) { 
-    // PASS 
-  } 
- 
-  WriterOptions::WriterOptions(WriterOptions& rhs) { 
-    // swap privateBits with rhs 
-    WriterOptionsPrivate* l = privateBits.release(); 
-    privateBits.reset(rhs.privateBits.release()); 
-    rhs.privateBits.reset(l); 
-  } 
- 
-  WriterOptions& WriterOptions::operator=(const WriterOptions& rhs) { 
-    if (this != &rhs) { 
-      privateBits.reset(new WriterOptionsPrivate(*(rhs.privateBits.get()))); 
-    } 
-    return *this; 
-  } 
- 
-  WriterOptions::~WriterOptions() { 
-    // PASS 
-  } 
-  RleVersion WriterOptions::getRleVersion() const { 
-    if(privateBits->fileVersion == FileVersion::v_0_11()) 
-    { 
-      return RleVersion_1; 
-    } 
- 
-    return RleVersion_2; 
-  } 
- 
-  WriterOptions& WriterOptions::setStripeSize(uint64_t size) { 
-    privateBits->stripeSize = size; 
-    return *this; 
-  } 
- 
-  uint64_t WriterOptions::getStripeSize() const { 
-    return privateBits->stripeSize; 
-  } 
- 
-  WriterOptions& WriterOptions::setCompressionBlockSize(uint64_t size) { 
-    privateBits->compressionBlockSize = size; 
-    return *this; 
-  } 
- 
-  uint64_t WriterOptions::getCompressionBlockSize() const { 
-    return privateBits->compressionBlockSize; 
-  } 
- 
-  WriterOptions& WriterOptions::setRowIndexStride(uint64_t stride) { 
-    privateBits->rowIndexStride = stride; 
-    privateBits->enableIndex = (stride != 0); 
-    return *this; 
-  } 
- 
-  uint64_t WriterOptions::getRowIndexStride() const { 
-    return privateBits->rowIndexStride; 
-  } 
- 
-  WriterOptions& WriterOptions::setDictionaryKeySizeThreshold(double val) { 
-    privateBits->dictionaryKeySizeThreshold = val; 
-    return *this; 
-  } 
- 
-  double WriterOptions::getDictionaryKeySizeThreshold() const { 
-    return privateBits->dictionaryKeySizeThreshold; 
-  } 
- 
-  WriterOptions& WriterOptions::setFileVersion(const FileVersion& version) { 
-    // Only Hive_0_11 and Hive_0_12 version are supported currently 
-    if (version.getMajor() == 0 && (version.getMinor() == 11 || version.getMinor() == 12)) { 
-      privateBits->fileVersion = version; 
-      return *this; 
-    } 
-    throw std::logic_error("Unsupported file version specified."); 
-  } 
- 
-  FileVersion WriterOptions::getFileVersion() const { 
-    return privateBits->fileVersion; 
-  } 
- 
-  WriterOptions& WriterOptions::setCompression(CompressionKind comp) { 
-    privateBits->compression = comp; 
-    return *this; 
-  } 
- 
-  CompressionKind WriterOptions::getCompression() const { 
-    return privateBits->compression; 
-  } 
- 
-  WriterOptions& WriterOptions::setCompressionStrategy( 
-    CompressionStrategy strategy) { 
-    privateBits->compressionStrategy = strategy; 
-    return *this; 
-  } 
- 
-  CompressionStrategy WriterOptions::getCompressionStrategy() const { 
-    return privateBits->compressionStrategy; 
-  } 
- 
-  bool WriterOptions::getAlignedBitpacking() const { 
-    return privateBits->compressionStrategy == CompressionStrategy ::CompressionStrategy_SPEED; 
-  } 
- 
-  WriterOptions& WriterOptions::setPaddingTolerance(double tolerance) { 
-    privateBits->paddingTolerance = tolerance; 
-    return *this; 
-  } 
- 
-  double WriterOptions::getPaddingTolerance() const { 
-    return privateBits->paddingTolerance; 
-  } 
- 
-  WriterOptions& WriterOptions::setMemoryPool(MemoryPool* memoryPool) { 
-    privateBits->memoryPool = memoryPool; 
-    return *this; 
-  } 
- 
-  MemoryPool* WriterOptions::getMemoryPool() const { 
-    return privateBits->memoryPool; 
-  } 
- 
-  WriterOptions& WriterOptions::setErrorStream(std::ostream& errStream) { 
-    privateBits->errorStream = &errStream; 
-    return *this; 
-  } 
- 
-  std::ostream* WriterOptions::getErrorStream() const { 
-    return privateBits->errorStream; 
-  } 
- 
-  bool WriterOptions::getEnableIndex() const { 
-    return privateBits->enableIndex; 
-  } 
- 
-  bool WriterOptions::getEnableDictionary() const { 
-    return privateBits->dictionaryKeySizeThreshold > 0.0; 
-  } 
- 
-  WriterOptions& WriterOptions::setColumnsUseBloomFilter( 
-    const std::set<uint64_t>& columns) { 
-    privateBits->columnsUseBloomFilter = columns; 
-    return *this; 
-  } 
- 
-  bool WriterOptions::isColumnUseBloomFilter(uint64_t column) const { 
-    return privateBits->columnsUseBloomFilter.find(column) != 
-           privateBits->columnsUseBloomFilter.end(); 
-  } 
- 
-  WriterOptions& WriterOptions::setBloomFilterFPP(double fpp) { 
-    privateBits->bloomFilterFalsePositiveProb = fpp; 
-    return *this; 
-  } 
- 
-  double WriterOptions::getBloomFilterFPP() const { 
-    return privateBits->bloomFilterFalsePositiveProb; 
-  } 
- 
-  // delibrately not provide setter to write bloom filter version because 
-  // we only support UTF8 for now. 
-  BloomFilterVersion WriterOptions::getBloomFilterVersion() const { 
-    return privateBits->bloomFilterVersion; 
-  } 
- 
-  Writer::~Writer() { 
-    // PASS 
-  } 
- 
-  class WriterImpl : public Writer { 
-  private: 
-    std::unique_ptr<ColumnWriter> columnWriter; 
-    std::unique_ptr<BufferedOutputStream> compressionStream; 
-    std::unique_ptr<BufferedOutputStream> bufferedStream; 
-    std::unique_ptr<StreamsFactory> streamsFactory; 
-    OutputStream* outStream; 
-    WriterOptions options; 
-    const Type& type; 
-    uint64_t stripeRows, totalRows, indexRows; 
-    uint64_t currentOffset; 
-    proto::Footer fileFooter; 
-    proto::PostScript postScript; 
-    proto::StripeInformation stripeInfo; 
-    proto::Metadata metadata; 
- 
-    static const char* magicId; 
-    static const WriterId writerId; 
- 
-  public: 
-    WriterImpl( 
-               const Type& type, 
-               OutputStream* stream, 
-               const WriterOptions& options); 
- 
-    std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size) 
-                                                            const override; 
- 
-    void add(ColumnVectorBatch& rowsToAdd) override; 
- 
-    void close() override; 
- 
-    void addUserMetadata(const std::string name, const std::string value) override; 
- 
-  private: 
-    void init(); 
-    void initStripe(); 
-    void writeStripe(); 
-    void writeMetadata(); 
-    void writeFileFooter(); 
-    void writePostscript(); 
-    void buildFooterType(const Type& t, proto::Footer& footer, uint32_t& index); 
-    static proto::CompressionKind convertCompressionKind( 
-                                                  const CompressionKind& kind); 
-  }; 
- 
-  const char * WriterImpl::magicId = "ORC"; 
- 
-  const WriterId WriterImpl::writerId = WriterId::ORC_CPP_WRITER; 
- 
-  WriterImpl::WriterImpl( 
-                         const Type& t, 
-                         OutputStream* stream, 
-                         const WriterOptions& opts) : 
-                         outStream(stream), 
-                         options(opts), 
-                         type(t) { 
-    streamsFactory = createStreamsFactory(options, outStream); 
-    columnWriter = buildWriter(type, *streamsFactory, options); 
-    stripeRows = totalRows = indexRows = 0; 
-    currentOffset = 0; 
- 
-    // compression stream for stripe footer, file footer and metadata 
-    compressionStream = createCompressor( 
-                                  options.getCompression(), 
-                                  outStream, 
-                                  options.getCompressionStrategy(), 
-                                  1 * 1024 * 1024, // buffer capacity: 1M 
-                                  options.getCompressionBlockSize(), 
-                                  *options.getMemoryPool()); 
- 
-    // uncompressed stream for post script 
-    bufferedStream.reset(new BufferedOutputStream( 
-                                            *options.getMemoryPool(), 
-                                            outStream, 
-                                            1024, // buffer capacity: 1024 bytes 
-                                            options.getCompressionBlockSize())); 
- 
-    init(); 
-  } 
- 
-  std::unique_ptr<ColumnVectorBatch> WriterImpl::createRowBatch(uint64_t size) 
-                                                                         const { 
-    return type.createRowBatch(size, *options.getMemoryPool()); 
-  } 
- 
-  void WriterImpl::add(ColumnVectorBatch& rowsToAdd) { 
-    if (options.getEnableIndex()) { 
-      uint64_t pos = 0; 
-      uint64_t chunkSize = 0; 
-      uint64_t rowIndexStride = options.getRowIndexStride(); 
-      while (pos < rowsToAdd.numElements) { 
-        chunkSize = std::min(rowsToAdd.numElements - pos, 
-                             rowIndexStride - indexRows); 
-        columnWriter->add(rowsToAdd, pos, chunkSize, nullptr); 
- 
-        pos += chunkSize; 
-        indexRows += chunkSize; 
-        stripeRows += chunkSize; 
- 
-        if (indexRows >= rowIndexStride) { 
-          columnWriter->createRowIndexEntry(); 
-          indexRows = 0; 
-        } 
-      } 
-    } else { 
-      stripeRows += rowsToAdd.numElements; 
-      columnWriter->add(rowsToAdd, 0, rowsToAdd.numElements, nullptr); 
-    } 
- 
-    if (columnWriter->getEstimatedSize() >= options.getStripeSize()) { 
-      writeStripe(); 
-    } 
-  } 
- 
-  void WriterImpl::close() { 
-    if (stripeRows > 0) { 
-      writeStripe(); 
-    } 
-    writeMetadata(); 
-    writeFileFooter(); 
-    writePostscript(); 
-    outStream->close(); 
-  } 
- 
-  void WriterImpl::addUserMetadata(const std::string name, const std::string value){ 
-    proto::UserMetadataItem* userMetadataItem = fileFooter.add_metadata(); 
-    userMetadataItem->set_name(TString(name)); 
-    userMetadataItem->set_value(TString(value)); 
-  } 
- 
-  void WriterImpl::init() { 
-    // Write file header 
-    const static size_t magicIdLength = strlen(WriterImpl::magicId); 
-    outStream->write(WriterImpl::magicId, magicIdLength); 
-    currentOffset += magicIdLength; 
- 
-    // Initialize file footer 
-    fileFooter.set_headerlength(currentOffset); 
-    fileFooter.set_contentlength(0); 
-    fileFooter.set_numberofrows(0); 
-    fileFooter.set_rowindexstride( 
-                          static_cast<uint32_t>(options.getRowIndexStride())); 
-    fileFooter.set_writer(writerId); 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Common.hh"
+#include "orc/OrcFile.hh"
+
+#include "ColumnWriter.hh"
+#include "Timezone.hh"
+
+#include <memory>
+
+namespace orc {
+
+  struct WriterOptionsPrivate {
+    uint64_t stripeSize;
+    uint64_t compressionBlockSize;
+    uint64_t rowIndexStride;
+    CompressionKind compression;
+    CompressionStrategy compressionStrategy;
+    MemoryPool* memoryPool;
+    double paddingTolerance;
+    std::ostream* errorStream;
+    FileVersion fileVersion;
+    double dictionaryKeySizeThreshold;
+    bool enableIndex;
+    std::set<uint64_t> columnsUseBloomFilter;
+    double bloomFilterFalsePositiveProb;
+    BloomFilterVersion bloomFilterVersion;
+
+    WriterOptionsPrivate() :
+                            fileVersion(FileVersion::v_0_12()) { // default to Hive_0_12
+      stripeSize = 64 * 1024 * 1024; // 64M
+      compressionBlockSize = 64 * 1024; // 64K
+      rowIndexStride = 10000;
+      compression = CompressionKind_ZLIB;
+      compressionStrategy = CompressionStrategy_SPEED;
+      memoryPool = getDefaultPool();
+      paddingTolerance = 0.0;
+      errorStream = &std::cerr;
+      dictionaryKeySizeThreshold = 0.0;
+      enableIndex = true;
+      bloomFilterFalsePositiveProb = 0.05;
+      bloomFilterVersion = UTF8;
+    }
+  };
+
+  WriterOptions::WriterOptions():
+    privateBits(std::unique_ptr<WriterOptionsPrivate>
+                (new WriterOptionsPrivate())) {
+    // PASS
+  }
+
+  WriterOptions::WriterOptions(const WriterOptions& rhs):
+    privateBits(std::unique_ptr<WriterOptionsPrivate>
+                (new WriterOptionsPrivate(*(rhs.privateBits.get())))) {
+    // PASS
+  }
+
+  WriterOptions::WriterOptions(WriterOptions& rhs) {
+    // swap privateBits with rhs
+    WriterOptionsPrivate* l = privateBits.release();
+    privateBits.reset(rhs.privateBits.release());
+    rhs.privateBits.reset(l);
+  }
+
+  WriterOptions& WriterOptions::operator=(const WriterOptions& rhs) {
+    if (this != &rhs) {
+      privateBits.reset(new WriterOptionsPrivate(*(rhs.privateBits.get())));
+    }
+    return *this;
+  }
+
+  WriterOptions::~WriterOptions() {
+    // PASS
+  }
+  RleVersion WriterOptions::getRleVersion() const {
+    if(privateBits->fileVersion == FileVersion::v_0_11())
+    {
+      return RleVersion_1;
+    }
+
+    return RleVersion_2;
+  }
+
+  WriterOptions& WriterOptions::setStripeSize(uint64_t size) {
+    privateBits->stripeSize = size;
+    return *this;
+  }
+
+  uint64_t WriterOptions::getStripeSize() const {
+    return privateBits->stripeSize;
+  }
+
+  WriterOptions& WriterOptions::setCompressionBlockSize(uint64_t size) {
+    privateBits->compressionBlockSize = size;
+    return *this;
+  }
+
+  uint64_t WriterOptions::getCompressionBlockSize() const {
+    return privateBits->compressionBlockSize;
+  }
+
+  WriterOptions& WriterOptions::setRowIndexStride(uint64_t stride) {
+    privateBits->rowIndexStride = stride;
+    privateBits->enableIndex = (stride != 0);
+    return *this;
+  }
+
+  uint64_t WriterOptions::getRowIndexStride() const {
+    return privateBits->rowIndexStride;
+  }
+
+  WriterOptions& WriterOptions::setDictionaryKeySizeThreshold(double val) {
+    privateBits->dictionaryKeySizeThreshold = val;
+    return *this;
+  }
+
+  double WriterOptions::getDictionaryKeySizeThreshold() const {
+    return privateBits->dictionaryKeySizeThreshold;
+  }
+
+  WriterOptions& WriterOptions::setFileVersion(const FileVersion& version) {
+    // Only Hive_0_11 and Hive_0_12 version are supported currently
+    if (version.getMajor() == 0 && (version.getMinor() == 11 || version.getMinor() == 12)) {
+      privateBits->fileVersion = version;
+      return *this;
+    }
+    throw std::logic_error("Unsupported file version specified.");
+  }
+
+  FileVersion WriterOptions::getFileVersion() const {
+    return privateBits->fileVersion;
+  }
+
+  WriterOptions& WriterOptions::setCompression(CompressionKind comp) {
+    privateBits->compression = comp;
+    return *this;
+  }
+
+  CompressionKind WriterOptions::getCompression() const {
+    return privateBits->compression;
+  }
+
+  WriterOptions& WriterOptions::setCompressionStrategy(
+    CompressionStrategy strategy) {
+    privateBits->compressionStrategy = strategy;
+    return *this;
+  }
+
+  CompressionStrategy WriterOptions::getCompressionStrategy() const {
+    return privateBits->compressionStrategy;
+  }
+
+  bool WriterOptions::getAlignedBitpacking() const {
+    return privateBits->compressionStrategy == CompressionStrategy ::CompressionStrategy_SPEED;
+  }
+
+  WriterOptions& WriterOptions::setPaddingTolerance(double tolerance) {
+    privateBits->paddingTolerance = tolerance;
+    return *this;
+  }
+
+  double WriterOptions::getPaddingTolerance() const {
+    return privateBits->paddingTolerance;
+  }
+
+  WriterOptions& WriterOptions::setMemoryPool(MemoryPool* memoryPool) {
+    privateBits->memoryPool = memoryPool;
+    return *this;
+  }
+
+  MemoryPool* WriterOptions::getMemoryPool() const {
+    return privateBits->memoryPool;
+  }
+
+  WriterOptions& WriterOptions::setErrorStream(std::ostream& errStream) {
+    privateBits->errorStream = &errStream;
+    return *this;
+  }
+
+  std::ostream* WriterOptions::getErrorStream() const {
+    return privateBits->errorStream;
+  }
+
+  bool WriterOptions::getEnableIndex() const {
+    return privateBits->enableIndex;
+  }
+
+  bool WriterOptions::getEnableDictionary() const {
+    return privateBits->dictionaryKeySizeThreshold > 0.0;
+  }
+
+  WriterOptions& WriterOptions::setColumnsUseBloomFilter(
+    const std::set<uint64_t>& columns) {
+    privateBits->columnsUseBloomFilter = columns;
+    return *this;
+  }
+
+  bool WriterOptions::isColumnUseBloomFilter(uint64_t column) const {
+    return privateBits->columnsUseBloomFilter.find(column) !=
+           privateBits->columnsUseBloomFilter.end();
+  }
+
+  WriterOptions& WriterOptions::setBloomFilterFPP(double fpp) {
+    privateBits->bloomFilterFalsePositiveProb = fpp;
+    return *this;
+  }
+
+  double WriterOptions::getBloomFilterFPP() const {
+    return privateBits->bloomFilterFalsePositiveProb;
+  }
+
+  // delibrately not provide setter to write bloom filter version because
+  // we only support UTF8 for now.
+  BloomFilterVersion WriterOptions::getBloomFilterVersion() const {
+    return privateBits->bloomFilterVersion;
+  }
+
+  Writer::~Writer() {
+    // PASS
+  }
+
+  class WriterImpl : public Writer {
+  private:
+    std::unique_ptr<ColumnWriter> columnWriter;
+    std::unique_ptr<BufferedOutputStream> compressionStream;
+    std::unique_ptr<BufferedOutputStream> bufferedStream;
+    std::unique_ptr<StreamsFactory> streamsFactory;
+    OutputStream* outStream;
+    WriterOptions options;
+    const Type& type;
+    uint64_t stripeRows, totalRows, indexRows;
+    uint64_t currentOffset;
+    proto::Footer fileFooter;
+    proto::PostScript postScript;
+    proto::StripeInformation stripeInfo;
+    proto::Metadata metadata;
+
+    static const char* magicId;
+    static const WriterId writerId;
+
+  public:
+    WriterImpl(
+               const Type& type,
+               OutputStream* stream,
+               const WriterOptions& options);
+
+    std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size)
+                                                            const override;
+
+    void add(ColumnVectorBatch& rowsToAdd) override;
+
+    void close() override;
+
+    void addUserMetadata(const std::string name, const std::string value) override;
+
+  private:
+    void init();
+    void initStripe();
+    void writeStripe();
+    void writeMetadata();
+    void writeFileFooter();
+    void writePostscript();
+    void buildFooterType(const Type& t, proto::Footer& footer, uint32_t& index);
+    static proto::CompressionKind convertCompressionKind(
+                                                  const CompressionKind& kind);
+  };
+
+  const char * WriterImpl::magicId = "ORC";
+
+  const WriterId WriterImpl::writerId = WriterId::ORC_CPP_WRITER;
+
+  WriterImpl::WriterImpl(
+                         const Type& t,
+                         OutputStream* stream,
+                         const WriterOptions& opts) :
+                         outStream(stream),
+                         options(opts),
+                         type(t) {
+    streamsFactory = createStreamsFactory(options, outStream);
+    columnWriter = buildWriter(type, *streamsFactory, options);
+    stripeRows = totalRows = indexRows = 0;
+    currentOffset = 0;
+
+    // compression stream for stripe footer, file footer and metadata
+    compressionStream = createCompressor(
+                                  options.getCompression(),
+                                  outStream,
+                                  options.getCompressionStrategy(),
+                                  1 * 1024 * 1024, // buffer capacity: 1M
+                                  options.getCompressionBlockSize(),
+                                  *options.getMemoryPool());
+
+    // uncompressed stream for post script
+    bufferedStream.reset(new BufferedOutputStream(
+                                            *options.getMemoryPool(),
+                                            outStream,
+                                            1024, // buffer capacity: 1024 bytes
+                                            options.getCompressionBlockSize()));
+
+    init();
+  }
+
+  std::unique_ptr<ColumnVectorBatch> WriterImpl::createRowBatch(uint64_t size)
+                                                                         const {
+    return type.createRowBatch(size, *options.getMemoryPool());
+  }
+
+  void WriterImpl::add(ColumnVectorBatch& rowsToAdd) {
+    if (options.getEnableIndex()) {
+      uint64_t pos = 0;
+      uint64_t chunkSize = 0;
+      uint64_t rowIndexStride = options.getRowIndexStride();
+      while (pos < rowsToAdd.numElements) {
+        chunkSize = std::min(rowsToAdd.numElements - pos,
+                             rowIndexStride - indexRows);
+        columnWriter->add(rowsToAdd, pos, chunkSize, nullptr);
+
+        pos += chunkSize;
+        indexRows += chunkSize;
+        stripeRows += chunkSize;
+
+        if (indexRows >= rowIndexStride) {
+          columnWriter->createRowIndexEntry();
+          indexRows = 0;
+        }
+      }
+    } else {
+      stripeRows += rowsToAdd.numElements;
+      columnWriter->add(rowsToAdd, 0, rowsToAdd.numElements, nullptr);
+    }
+
+    if (columnWriter->getEstimatedSize() >= options.getStripeSize()) {
+      writeStripe();
+    }
+  }
+
+  void WriterImpl::close() {
+    if (stripeRows > 0) {
+      writeStripe();
+    }
+    writeMetadata();
+    writeFileFooter();
+    writePostscript();
+    outStream->close();
+  }
+
+  void WriterImpl::addUserMetadata(const std::string name, const std::string value){
+    proto::UserMetadataItem* userMetadataItem = fileFooter.add_metadata();
+    userMetadataItem->set_name(TString(name));
+    userMetadataItem->set_value(TString(value));
+  }
+
+  void WriterImpl::init() {
+    // Write file header
+    const static size_t magicIdLength = strlen(WriterImpl::magicId);
+    outStream->write(WriterImpl::magicId, magicIdLength);
+    currentOffset += magicIdLength;
+
+    // Initialize file footer
+    fileFooter.set_headerlength(currentOffset);
+    fileFooter.set_contentlength(0);
+    fileFooter.set_numberofrows(0);
+    fileFooter.set_rowindexstride(
+                          static_cast<uint32_t>(options.getRowIndexStride()));
+    fileFooter.set_writer(writerId);
     fileFooter.set_softwareversion(ORC_VERSION);
- 
-    uint32_t index = 0; 
-    buildFooterType(type, fileFooter, index); 
- 
-    // Initialize post script 
-    postScript.set_footerlength(0); 
-    postScript.set_compression( 
-                  WriterImpl::convertCompressionKind(options.getCompression())); 
-    postScript.set_compressionblocksize(options.getCompressionBlockSize()); 
- 
-    postScript.add_version(options.getFileVersion().getMajor()); 
-    postScript.add_version(options.getFileVersion().getMinor()); 
- 
-    postScript.set_writerversion(WriterVersion_ORC_135); 
-    postScript.set_magic("ORC"); 
- 
-    // Initialize first stripe 
-    initStripe(); 
-  } 
- 
-  void WriterImpl::initStripe() { 
-    stripeInfo.set_offset(currentOffset); 
-    stripeInfo.set_indexlength(0); 
-    stripeInfo.set_datalength(0); 
-    stripeInfo.set_footerlength(0); 
-    stripeInfo.set_numberofrows(0); 
- 
-    stripeRows = indexRows = 0; 
-  } 
- 
-  void WriterImpl::writeStripe() { 
-    if (options.getEnableIndex() && indexRows != 0) { 
-      columnWriter->createRowIndexEntry(); 
-      indexRows = 0; 
-    } else { 
-      columnWriter->mergeRowGroupStatsIntoStripeStats(); 
-    } 
- 
-    // dictionary should be written before any stream is flushed 
-    columnWriter->writeDictionary(); 
- 
-    std::vector<proto::Stream> streams; 
-    // write ROW_INDEX streams 
-    if (options.getEnableIndex()) { 
-      columnWriter->writeIndex(streams); 
-    } 
-    // write streams like PRESENT, DATA, etc. 
-    columnWriter->flush(streams); 
- 
-    // generate and write stripe footer 
-    proto::StripeFooter stripeFooter; 
-    for (uint32_t i = 0; i < streams.size(); ++i) { 
-      *stripeFooter.add_streams() = streams[i]; 
-    } 
- 
-    std::vector<proto::ColumnEncoding> encodings; 
-    columnWriter->getColumnEncoding(encodings); 
- 
-    for (uint32_t i = 0; i < encodings.size(); ++i) { 
-      *stripeFooter.add_columns() = encodings[i]; 
-    } 
- 
-    // use GMT to guarantee TimestampVectorBatch from reader can write 
-    // same wall clock time 
-    stripeFooter.set_writertimezone("GMT"); 
- 
-    // add stripe statistics to metadata 
-    proto::StripeStatistics* stripeStats = metadata.add_stripestats(); 
-    std::vector<proto::ColumnStatistics> colStats; 
-    columnWriter->getStripeStatistics(colStats); 
-    for (uint32_t i = 0; i != colStats.size(); ++i) { 
-      *stripeStats->add_colstats() = colStats[i]; 
-    } 
-    // merge stripe stats into file stats and clear stripe stats 
-    columnWriter->mergeStripeStatsIntoFileStats(); 
- 
-    if (!stripeFooter.SerializeToZeroCopyStream(compressionStream.get())) { 
-      throw std::logic_error("Failed to write stripe footer."); 
-    } 
-    uint64_t footerLength = compressionStream->flush(); 
- 
-    // calculate data length and index length 
-    uint64_t dataLength = 0; 
-    uint64_t indexLength = 0; 
-    for (uint32_t i = 0; i < streams.size(); ++i) { 
-      if (streams[i].kind() == proto::Stream_Kind_ROW_INDEX || 
-          streams[i].kind() == proto::Stream_Kind_BLOOM_FILTER_UTF8) { 
-        indexLength += streams[i].length(); 
-      } else { 
-        dataLength += streams[i].length(); 
-      } 
-    } 
- 
-    // update stripe info 
-    stripeInfo.set_indexlength(indexLength); 
-    stripeInfo.set_datalength(dataLength); 
-    stripeInfo.set_footerlength(footerLength); 
-    stripeInfo.set_numberofrows(stripeRows); 
- 
-    *fileFooter.add_stripes() = stripeInfo; 
- 
-    currentOffset = currentOffset + indexLength + dataLength + footerLength; 
-    totalRows += stripeRows; 
- 
-    columnWriter->reset(); 
- 
-    initStripe(); 
-  } 
- 
-  void WriterImpl::writeMetadata() { 
-    if (!metadata.SerializeToZeroCopyStream(compressionStream.get())) { 
-      throw std::logic_error("Failed to write metadata."); 
-    } 
-    postScript.set_metadatalength(compressionStream.get()->flush()); 
-  } 
- 
-  void WriterImpl::writeFileFooter() { 
-    fileFooter.set_contentlength(currentOffset - fileFooter.headerlength()); 
-    fileFooter.set_numberofrows(totalRows); 
- 
-    // update file statistics 
-    std::vector<proto::ColumnStatistics> colStats; 
-    columnWriter->getFileStatistics(colStats); 
-    for (uint32_t i = 0; i != colStats.size(); ++i) { 
-      *fileFooter.add_statistics() = colStats[i]; 
-    } 
- 
-    if (!fileFooter.SerializeToZeroCopyStream(compressionStream.get())) { 
-      throw std::logic_error("Failed to write file footer."); 
-    } 
-    postScript.set_footerlength(compressionStream->flush()); 
-  } 
- 
-  void WriterImpl::writePostscript() { 
-    if (!postScript.SerializeToZeroCopyStream(bufferedStream.get())) { 
-      throw std::logic_error("Failed to write post script."); 
-    } 
-    unsigned char psLength = 
-                      static_cast<unsigned char>(bufferedStream->flush()); 
-    outStream->write(&psLength, sizeof(unsigned char)); 
-  } 
- 
-  void WriterImpl::buildFooterType( 
-                                   const Type& t, 
-                                   proto::Footer& footer, 
-                                   uint32_t & index) { 
-    proto::Type protoType; 
-    protoType.set_maximumlength(static_cast<uint32_t>(t.getMaximumLength())); 
-    protoType.set_precision(static_cast<uint32_t>(t.getPrecision())); 
-    protoType.set_scale(static_cast<uint32_t>(t.getScale())); 
- 
-    switch (t.getKind()) { 
-    case BOOLEAN: { 
-      protoType.set_kind(proto::Type_Kind_BOOLEAN); 
-      break; 
-    } 
-    case BYTE: { 
-      protoType.set_kind(proto::Type_Kind_BYTE); 
-      break; 
-    } 
-    case SHORT: { 
-      protoType.set_kind(proto::Type_Kind_SHORT); 
-      break; 
-    } 
-    case INT: { 
-      protoType.set_kind(proto::Type_Kind_INT); 
-      break; 
-    } 
-    case LONG: { 
-      protoType.set_kind(proto::Type_Kind_LONG); 
-      break; 
-    } 
-    case FLOAT: { 
-      protoType.set_kind(proto::Type_Kind_FLOAT); 
-      break; 
-    } 
-    case DOUBLE: { 
-      protoType.set_kind(proto::Type_Kind_DOUBLE); 
-      break; 
-    } 
-    case STRING: { 
-      protoType.set_kind(proto::Type_Kind_STRING); 
-      break; 
-    } 
-    case BINARY: { 
-      protoType.set_kind(proto::Type_Kind_BINARY); 
-      break; 
-    } 
-    case TIMESTAMP: { 
-      protoType.set_kind(proto::Type_Kind_TIMESTAMP); 
-      break; 
-    } 
-    case LIST: { 
-      protoType.set_kind(proto::Type_Kind_LIST); 
-      break; 
-    } 
-    case MAP: { 
-      protoType.set_kind(proto::Type_Kind_MAP); 
-      break; 
-    } 
-    case STRUCT: { 
-      protoType.set_kind(proto::Type_Kind_STRUCT); 
-      break; 
-    } 
-    case UNION: { 
-      protoType.set_kind(proto::Type_Kind_UNION); 
-      break; 
-    } 
-    case DECIMAL: { 
-      protoType.set_kind(proto::Type_Kind_DECIMAL); 
-      break; 
-    } 
-    case DATE: { 
-      protoType.set_kind(proto::Type_Kind_DATE); 
-      break; 
-    } 
-    case VARCHAR: { 
-      protoType.set_kind(proto::Type_Kind_VARCHAR); 
-      break; 
-    } 
-    case CHAR: { 
-      protoType.set_kind(proto::Type_Kind_CHAR); 
-      break; 
-    } 
-    default: 
-      throw std::logic_error("Unknown type."); 
-    } 
- 
-    int pos = static_cast<int>(index); 
-    *footer.add_types() = protoType; 
- 
-    for (uint64_t i = 0; i < t.getSubtypeCount(); ++i) { 
-      // only add subtypes' field names if this type is STRUCT 
-      if (t.getKind() == STRUCT) { 
-        footer.mutable_types(pos)->add_fieldnames(TString(t.getFieldName(i))); 
-      } 
-      footer.mutable_types(pos)->add_subtypes(++index); 
-      buildFooterType(*t.getSubtype(i), footer, index); 
-    } 
-  } 
- 
-  proto::CompressionKind WriterImpl::convertCompressionKind( 
-                                      const CompressionKind& kind) { 
-    return static_cast<proto::CompressionKind>(kind); 
-  } 
- 
-  std::unique_ptr<Writer> createWriter( 
-                                       const Type& type, 
-                                       OutputStream* stream, 
-                                       const WriterOptions& options) { 
-    return std::unique_ptr<Writer>( 
-                                   new WriterImpl( 
-                                            type, 
-                                            stream, 
-                                            options)); 
-  } 
- 
-} 
- 
+
+    uint32_t index = 0;
+    buildFooterType(type, fileFooter, index);
+
+    // Initialize post script
+    postScript.set_footerlength(0);
+    postScript.set_compression(
+                  WriterImpl::convertCompressionKind(options.getCompression()));
+    postScript.set_compressionblocksize(options.getCompressionBlockSize());
+
+    postScript.add_version(options.getFileVersion().getMajor());
+    postScript.add_version(options.getFileVersion().getMinor());
+
+    postScript.set_writerversion(WriterVersion_ORC_135);
+    postScript.set_magic("ORC");
+
+    // Initialize first stripe
+    initStripe();
+  }
+
+  void WriterImpl::initStripe() {
+    stripeInfo.set_offset(currentOffset);
+    stripeInfo.set_indexlength(0);
+    stripeInfo.set_datalength(0);
+    stripeInfo.set_footerlength(0);
+    stripeInfo.set_numberofrows(0);
+
+    stripeRows = indexRows = 0;
+  }
+
+  void WriterImpl::writeStripe() {
+    if (options.getEnableIndex() && indexRows != 0) {
+      columnWriter->createRowIndexEntry();
+      indexRows = 0;
+    } else {
+      columnWriter->mergeRowGroupStatsIntoStripeStats();
+    }
+
+    // dictionary should be written before any stream is flushed
+    columnWriter->writeDictionary();
+
+    std::vector<proto::Stream> streams;
+    // write ROW_INDEX streams
+    if (options.getEnableIndex()) {
+      columnWriter->writeIndex(streams);
+    }
+    // write streams like PRESENT, DATA, etc.
+    columnWriter->flush(streams);
+
+    // generate and write stripe footer
+    proto::StripeFooter stripeFooter;
+    for (uint32_t i = 0; i < streams.size(); ++i) {
+      *stripeFooter.add_streams() = streams[i];
+    }
+
+    std::vector<proto::ColumnEncoding> encodings;
+    columnWriter->getColumnEncoding(encodings);
+
+    for (uint32_t i = 0; i < encodings.size(); ++i) {
+      *stripeFooter.add_columns() = encodings[i];
+    }
+
+    // use GMT to guarantee TimestampVectorBatch from reader can write
+    // same wall clock time
+    stripeFooter.set_writertimezone("GMT");
+
+    // add stripe statistics to metadata
+    proto::StripeStatistics* stripeStats = metadata.add_stripestats();
+    std::vector<proto::ColumnStatistics> colStats;
+    columnWriter->getStripeStatistics(colStats);
+    for (uint32_t i = 0; i != colStats.size(); ++i) {
+      *stripeStats->add_colstats() = colStats[i];
+    }
+    // merge stripe stats into file stats and clear stripe stats
+    columnWriter->mergeStripeStatsIntoFileStats();
+
+    if (!stripeFooter.SerializeToZeroCopyStream(compressionStream.get())) {
+      throw std::logic_error("Failed to write stripe footer.");
+    }
+    uint64_t footerLength = compressionStream->flush();
+
+    // calculate data length and index length
+    uint64_t dataLength = 0;
+    uint64_t indexLength = 0;
+    for (uint32_t i = 0; i < streams.size(); ++i) {
+      if (streams[i].kind() == proto::Stream_Kind_ROW_INDEX ||
+          streams[i].kind() == proto::Stream_Kind_BLOOM_FILTER_UTF8) {
+        indexLength += streams[i].length();
+      } else {
+        dataLength += streams[i].length();
+      }
+    }
+
+    // update stripe info
+    stripeInfo.set_indexlength(indexLength);
+    stripeInfo.set_datalength(dataLength);
+    stripeInfo.set_footerlength(footerLength);
+    stripeInfo.set_numberofrows(stripeRows);
+
+    *fileFooter.add_stripes() = stripeInfo;
+
+    currentOffset = currentOffset + indexLength + dataLength + footerLength;
+    totalRows += stripeRows;
+
+    columnWriter->reset();
+
+    initStripe();
+  }
+
+  void WriterImpl::writeMetadata() {
+    if (!metadata.SerializeToZeroCopyStream(compressionStream.get())) {
+      throw std::logic_error("Failed to write metadata.");
+    }
+    postScript.set_metadatalength(compressionStream.get()->flush());
+  }
+
+  void WriterImpl::writeFileFooter() {
+    fileFooter.set_contentlength(currentOffset - fileFooter.headerlength());
+    fileFooter.set_numberofrows(totalRows);
+
+    // update file statistics
+    std::vector<proto::ColumnStatistics> colStats;
+    columnWriter->getFileStatistics(colStats);
+    for (uint32_t i = 0; i != colStats.size(); ++i) {
+      *fileFooter.add_statistics() = colStats[i];
+    }
+
+    if (!fileFooter.SerializeToZeroCopyStream(compressionStream.get())) {
+      throw std::logic_error("Failed to write file footer.");
+    }
+    postScript.set_footerlength(compressionStream->flush());
+  }
+
+  void WriterImpl::writePostscript() {
+    if (!postScript.SerializeToZeroCopyStream(bufferedStream.get())) {
+      throw std::logic_error("Failed to write post script.");
+    }
+    unsigned char psLength =
+                      static_cast<unsigned char>(bufferedStream->flush());
+    outStream->write(&psLength, sizeof(unsigned char));
+  }
+
+  void WriterImpl::buildFooterType(
+                                   const Type& t,
+                                   proto::Footer& footer,
+                                   uint32_t & index) {
+    proto::Type protoType;
+    protoType.set_maximumlength(static_cast<uint32_t>(t.getMaximumLength()));
+    protoType.set_precision(static_cast<uint32_t>(t.getPrecision()));
+    protoType.set_scale(static_cast<uint32_t>(t.getScale()));
+
+    switch (t.getKind()) {
+    case BOOLEAN: {
+      protoType.set_kind(proto::Type_Kind_BOOLEAN);
+      break;
+    }
+    case BYTE: {
+      protoType.set_kind(proto::Type_Kind_BYTE);
+      break;
+    }
+    case SHORT: {
+      protoType.set_kind(proto::Type_Kind_SHORT);
+      break;
+    }
+    case INT: {
+      protoType.set_kind(proto::Type_Kind_INT);
+      break;
+    }
+    case LONG: {
+      protoType.set_kind(proto::Type_Kind_LONG);
+      break;
+    }
+    case FLOAT: {
+      protoType.set_kind(proto::Type_Kind_FLOAT);
+      break;
+    }
+    case DOUBLE: {
+      protoType.set_kind(proto::Type_Kind_DOUBLE);
+      break;
+    }
+    case STRING: {
+      protoType.set_kind(proto::Type_Kind_STRING);
+      break;
+    }
+    case BINARY: {
+      protoType.set_kind(proto::Type_Kind_BINARY);
+      break;
+    }
+    case TIMESTAMP: {
+      protoType.set_kind(proto::Type_Kind_TIMESTAMP);
+      break;
+    }
+    case LIST: {
+      protoType.set_kind(proto::Type_Kind_LIST);
+      break;
+    }
+    case MAP: {
+      protoType.set_kind(proto::Type_Kind_MAP);
+      break;
+    }
+    case STRUCT: {
+      protoType.set_kind(proto::Type_Kind_STRUCT);
+      break;
+    }
+    case UNION: {
+      protoType.set_kind(proto::Type_Kind_UNION);
+      break;
+    }
+    case DECIMAL: {
+      protoType.set_kind(proto::Type_Kind_DECIMAL);
+      break;
+    }
+    case DATE: {
+      protoType.set_kind(proto::Type_Kind_DATE);
+      break;
+    }
+    case VARCHAR: {
+      protoType.set_kind(proto::Type_Kind_VARCHAR);
+      break;
+    }
+    case CHAR: {
+      protoType.set_kind(proto::Type_Kind_CHAR);
+      break;
+    }
+    default:
+      throw std::logic_error("Unknown type.");
+    }
+
+    int pos = static_cast<int>(index);
+    *footer.add_types() = protoType;
+
+    for (uint64_t i = 0; i < t.getSubtypeCount(); ++i) {
+      // only add subtypes' field names if this type is STRUCT
+      if (t.getKind() == STRUCT) {
+        footer.mutable_types(pos)->add_fieldnames(TString(t.getFieldName(i)));
+      }
+      footer.mutable_types(pos)->add_subtypes(++index);
+      buildFooterType(*t.getSubtype(i), footer, index);
+    }
+  }
+
+  proto::CompressionKind WriterImpl::convertCompressionKind(
+                                      const CompressionKind& kind) {
+    return static_cast<proto::CompressionKind>(kind);
+  }
+
+  std::unique_ptr<Writer> createWriter(
+                                       const Type& type,
+                                       OutputStream* stream,
+                                       const WriterOptions& options) {
+    return std::unique_ptr<Writer>(
+                                   new WriterImpl(
+                                            type,
+                                            stream,
+                                            options));
+  }
+
+}
+
diff --git a/contrib/libs/apache/orc/c++/src/io/InputStream.cc b/contrib/libs/apache/orc/c++/src/io/InputStream.cc
index 201f6f9c1d..6e54b1412f 100644
--- a/contrib/libs/apache/orc/c++/src/io/InputStream.cc
+++ b/contrib/libs/apache/orc/c++/src/io/InputStream.cc
@@ -1,222 +1,222 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/Exceptions.hh" 
-#include "InputStream.hh" 
- 
-#include <algorithm> 
-#include <iomanip> 
- 
-namespace orc { 
- 
-  void printBuffer(std::ostream& out, 
-                   const char *buffer, 
-                   uint64_t length) { 
-    const uint64_t width = 24; 
-    out << std::hex; 
-    for(uint64_t line = 0; line < (length + width - 1) / width; ++line) { 
-      out << std::setfill('0') << std::setw(7) << (line * width); 
-      for(uint64_t byte = 0; 
-          byte < width && line * width + byte < length; ++byte) { 
-        out << " " << std::setfill('0') << std::setw(2) 
-            << static_cast<uint64_t>(0xff & buffer[line * width + 
-                                                   byte]); 
-      } 
-      out << "\n"; 
-    } 
-    out << std::dec; 
-  } 
- 
-  PositionProvider::PositionProvider(const std::list<uint64_t>& posns) { 
-    position = posns.begin(); 
-  } 
- 
-  uint64_t PositionProvider::next() { 
-    uint64_t result = *position; 
-    ++position; 
-    return result; 
-  } 
- 
-  SeekableInputStream::~SeekableInputStream() { 
-    // PASS 
-  } 
- 
-  SeekableArrayInputStream::~SeekableArrayInputStream() { 
-    // PASS 
-  } 
- 
-  SeekableArrayInputStream::SeekableArrayInputStream 
-               (const unsigned char* values, 
-                uint64_t size, 
-                uint64_t blkSize 
-               ): data(reinterpret_cast<const char*>(values)) { 
-    length = size; 
-    position = 0; 
-    blockSize = blkSize == 0 ? length : static_cast<uint64_t>(blkSize); 
-  } 
- 
-  SeekableArrayInputStream::SeekableArrayInputStream(const char* values, 
-                                                     uint64_t size, 
-                                                     uint64_t blkSize 
-  ): data(values) { 
-    length = size; 
-    position = 0; 
-    blockSize = blkSize == 0 ? length : static_cast<uint64_t>(blkSize); 
-  } 
- 
-  bool SeekableArrayInputStream::Next(const void** buffer, int*size) { 
-    uint64_t currentSize = std::min(length - position, blockSize); 
-    if (currentSize > 0) { 
-      *buffer = data + position; 
-      *size = static_cast<int>(currentSize); 
-      position += currentSize; 
-      return true; 
-    } 
-    *size = 0; 
-    return false; 
-  } 
- 
-  void SeekableArrayInputStream::BackUp(int count) { 
-    if (count >= 0) { 
-      uint64_t unsignedCount = static_cast<uint64_t>(count); 
-      if (unsignedCount <= blockSize && unsignedCount <= position) { 
-        position -= unsignedCount; 
-      } else { 
-        throw std::logic_error("Can't backup that much!"); 
-      } 
-    } 
-  } 
- 
-  bool SeekableArrayInputStream::Skip(int count) { 
-    if (count >= 0) { 
-      uint64_t unsignedCount = static_cast<uint64_t>(count); 
-      if (unsignedCount + position <= length) { 
-        position += unsignedCount; 
-        return true; 
-      } else { 
-        position = length; 
-      } 
-    } 
-    return false; 
-  } 
- 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Exceptions.hh"
+#include "InputStream.hh"
+
+#include <algorithm>
+#include <iomanip>
+
+namespace orc {
+
+  void printBuffer(std::ostream& out,
+                   const char *buffer,
+                   uint64_t length) {
+    const uint64_t width = 24;
+    out << std::hex;
+    for(uint64_t line = 0; line < (length + width - 1) / width; ++line) {
+      out << std::setfill('0') << std::setw(7) << (line * width);
+      for(uint64_t byte = 0;
+          byte < width && line * width + byte < length; ++byte) {
+        out << " " << std::setfill('0') << std::setw(2)
+            << static_cast<uint64_t>(0xff & buffer[line * width +
+                                                   byte]);
+      }
+      out << "\n";
+    }
+    out << std::dec;
+  }
+
+  PositionProvider::PositionProvider(const std::list<uint64_t>& posns) {
+    position = posns.begin();
+  }
+
+  uint64_t PositionProvider::next() {
+    uint64_t result = *position;
+    ++position;
+    return result;
+  }
+
+  SeekableInputStream::~SeekableInputStream() {
+    // PASS
+  }
+
+  SeekableArrayInputStream::~SeekableArrayInputStream() {
+    // PASS
+  }
+
+  SeekableArrayInputStream::SeekableArrayInputStream
+               (const unsigned char* values,
+                uint64_t size,
+                uint64_t blkSize
+               ): data(reinterpret_cast<const char*>(values)) {
+    length = size;
+    position = 0;
+    blockSize = blkSize == 0 ? length : static_cast<uint64_t>(blkSize);
+  }
+
+  SeekableArrayInputStream::SeekableArrayInputStream(const char* values,
+                                                     uint64_t size,
+                                                     uint64_t blkSize
+  ): data(values) {
+    length = size;
+    position = 0;
+    blockSize = blkSize == 0 ? length : static_cast<uint64_t>(blkSize);
+  }
+
+  bool SeekableArrayInputStream::Next(const void** buffer, int*size) {
+    uint64_t currentSize = std::min(length - position, blockSize);
+    if (currentSize > 0) {
+      *buffer = data + position;
+      *size = static_cast<int>(currentSize);
+      position += currentSize;
+      return true;
+    }
+    *size = 0;
+    return false;
+  }
+
+  void SeekableArrayInputStream::BackUp(int count) {
+    if (count >= 0) {
+      uint64_t unsignedCount = static_cast<uint64_t>(count);
+      if (unsignedCount <= blockSize && unsignedCount <= position) {
+        position -= unsignedCount;
+      } else {
+        throw std::logic_error("Can't backup that much!");
+      }
+    }
+  }
+
+  bool SeekableArrayInputStream::Skip(int count) {
+    if (count >= 0) {
+      uint64_t unsignedCount = static_cast<uint64_t>(count);
+      if (unsignedCount + position <= length) {
+        position += unsignedCount;
+        return true;
+      } else {
+        position = length;
+      }
+    }
+    return false;
+  }
+
   int64_t SeekableArrayInputStream::ByteCount() const {
-    return static_cast<google::protobuf::int64>(position); 
-  } 
- 
-  void SeekableArrayInputStream::seek(PositionProvider& seekPosition) { 
-    position = seekPosition.next(); 
-  } 
- 
-  std::string SeekableArrayInputStream::getName() const { 
-    std::ostringstream result; 
-    result << "SeekableArrayInputStream " << position << " of " << length; 
-    return result.str(); 
-  } 
- 
-  static uint64_t computeBlock(uint64_t request, uint64_t length) { 
-    return std::min(length, request == 0 ? 256 * 1024 : request); 
-  } 
- 
-  SeekableFileInputStream::SeekableFileInputStream(InputStream* stream, 
-                                                   uint64_t offset, 
-                                                   uint64_t byteCount, 
-                                                   MemoryPool& _pool, 
-                                                   uint64_t _blockSize 
-                                                   ):pool(_pool), 
-                                                     input(stream), 
-                                                     start(offset), 
-                                                     length(byteCount), 
-                                                     blockSize(computeBlock 
-                                                               (_blockSize, 
-                                                                length)) { 
- 
-    position = 0; 
-    buffer.reset(new DataBuffer<char>(pool)); 
-    pushBack = 0; 
-  } 
- 
-  SeekableFileInputStream::~SeekableFileInputStream() { 
-    // PASS 
-  } 
- 
-  bool SeekableFileInputStream::Next(const void** data, int*size) { 
-    uint64_t bytesRead; 
-    if (pushBack != 0) { 
-      *data = buffer->data() + (buffer->size() - pushBack); 
-      bytesRead = pushBack; 
-    } else { 
-      bytesRead = std::min(length - position, blockSize); 
-      buffer->resize(bytesRead); 
-      if (bytesRead > 0) { 
-        input->read(buffer->data(), bytesRead, start+position); 
-        *data = static_cast<void*>(buffer->data()); 
-      } 
-    } 
-    position += bytesRead; 
-    pushBack = 0; 
-    *size = static_cast<int>(bytesRead); 
-    return bytesRead != 0; 
-  } 
- 
-  void SeekableFileInputStream::BackUp(int signedCount) { 
-    if (signedCount < 0) { 
-      throw std::logic_error("can't backup negative distances"); 
-    } 
-    uint64_t count = static_cast<uint64_t>(signedCount); 
-    if (pushBack > 0) { 
-      throw std::logic_error("can't backup unless we just called Next"); 
-    } 
-    if (count > blockSize || count > position) { 
-      throw std::logic_error("can't backup that far"); 
-    } 
-    pushBack = static_cast<uint64_t>(count); 
-    position -= pushBack; 
-  } 
- 
-  bool SeekableFileInputStream::Skip(int signedCount) { 
-    if (signedCount < 0) { 
-      return false; 
-    } 
-    uint64_t count = static_cast<uint64_t>(signedCount); 
-    position = std::min(position + count, length); 
-    pushBack = 0; 
-    return position < length; 
-  } 
- 
-  int64_t SeekableFileInputStream::ByteCount() const { 
-    return static_cast<int64_t>(position); 
-  } 
- 
-  void SeekableFileInputStream::seek(PositionProvider& location) { 
-    position = location.next(); 
-    if (position > length) { 
-      position = length; 
-      throw std::logic_error("seek too far"); 
-    } 
-    pushBack = 0; 
-  } 
- 
-  std::string SeekableFileInputStream::getName() const { 
-    std::ostringstream result; 
-    result << input->getName() << " from " << start << " for " 
-           << length; 
-    return result.str(); 
-  } 
- 
-} 
+    return static_cast<google::protobuf::int64>(position);
+  }
+
+  void SeekableArrayInputStream::seek(PositionProvider& seekPosition) {
+    position = seekPosition.next();
+  }
+
+  std::string SeekableArrayInputStream::getName() const {
+    std::ostringstream result;
+    result << "SeekableArrayInputStream " << position << " of " << length;
+    return result.str();
+  }
+
+  static uint64_t computeBlock(uint64_t request, uint64_t length) {
+    return std::min(length, request == 0 ? 256 * 1024 : request);
+  }
+
+  SeekableFileInputStream::SeekableFileInputStream(InputStream* stream,
+                                                   uint64_t offset,
+                                                   uint64_t byteCount,
+                                                   MemoryPool& _pool,
+                                                   uint64_t _blockSize
+                                                   ):pool(_pool),
+                                                     input(stream),
+                                                     start(offset),
+                                                     length(byteCount),
+                                                     blockSize(computeBlock
+                                                               (_blockSize,
+                                                                length)) {
+
+    position = 0;
+    buffer.reset(new DataBuffer<char>(pool));
+    pushBack = 0;
+  }
+
+  SeekableFileInputStream::~SeekableFileInputStream() {
+    // PASS
+  }
+
+  bool SeekableFileInputStream::Next(const void** data, int*size) {
+    uint64_t bytesRead;
+    if (pushBack != 0) {
+      *data = buffer->data() + (buffer->size() - pushBack);
+      bytesRead = pushBack;
+    } else {
+      bytesRead = std::min(length - position, blockSize);
+      buffer->resize(bytesRead);
+      if (bytesRead > 0) {
+        input->read(buffer->data(), bytesRead, start+position);
+        *data = static_cast<void*>(buffer->data());
+      }
+    }
+    position += bytesRead;
+    pushBack = 0;
+    *size = static_cast<int>(bytesRead);
+    return bytesRead != 0;
+  }
+
+  void SeekableFileInputStream::BackUp(int signedCount) {
+    if (signedCount < 0) {
+      throw std::logic_error("can't backup negative distances");
+    }
+    uint64_t count = static_cast<uint64_t>(signedCount);
+    if (pushBack > 0) {
+      throw std::logic_error("can't backup unless we just called Next");
+    }
+    if (count > blockSize || count > position) {
+      throw std::logic_error("can't backup that far");
+    }
+    pushBack = static_cast<uint64_t>(count);
+    position -= pushBack;
+  }
+
+  bool SeekableFileInputStream::Skip(int signedCount) {
+    if (signedCount < 0) {
+      return false;
+    }
+    uint64_t count = static_cast<uint64_t>(signedCount);
+    position = std::min(position + count, length);
+    pushBack = 0;
+    return position < length;
+  }
+
+  int64_t SeekableFileInputStream::ByteCount() const {
+    return static_cast<int64_t>(position);
+  }
+
+  void SeekableFileInputStream::seek(PositionProvider& location) {
+    position = location.next();
+    if (position > length) {
+      position = length;
+      throw std::logic_error("seek too far");
+    }
+    pushBack = 0;
+  }
+
+  std::string SeekableFileInputStream::getName() const {
+    std::ostringstream result;
+    result << input->getName() << " from " << start << " for "
+           << length;
+    return result.str();
+  }
+
+}
diff --git a/contrib/libs/apache/orc/c++/src/io/InputStream.hh b/contrib/libs/apache/orc/c++/src/io/InputStream.hh
index 797049a300..d8bd3d4d8c 100644
--- a/contrib/libs/apache/orc/c++/src/io/InputStream.hh
+++ b/contrib/libs/apache/orc/c++/src/io/InputStream.hh
@@ -1,116 +1,116 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_INPUTSTREAM_HH 
-#define ORC_INPUTSTREAM_HH 
- 
-#include "Adaptor.hh" 
-#include "orc/OrcFile.hh" 
-#include "wrap/zero-copy-stream-wrapper.h" 
- 
-#include <list> 
-#include <fstream> 
-#include <iostream> 
-#include <sstream> 
-#include <vector> 
- 
-namespace orc { 
- 
-  void printBuffer(std::ostream& out, 
-                   const char *buffer, 
-                   uint64_t length); 
- 
-  class PositionProvider { 
-  private: 
-    std::list<uint64_t>::const_iterator position; 
-  public: 
-    PositionProvider(const std::list<uint64_t>& positions); 
-    uint64_t next(); 
-  }; 
- 
-  /** 
-   * A subclass of Google's ZeroCopyInputStream that supports seek. 
-   * By extending Google's class, we get the ability to pass it directly 
-   * to the protobuf readers. 
-   */ 
-  class SeekableInputStream: public google::protobuf::io::ZeroCopyInputStream { 
-  public: 
-    virtual ~SeekableInputStream(); 
-    virtual void seek(PositionProvider& position) = 0; 
-    virtual std::string getName() const = 0; 
-  }; 
- 
-  /** 
-   * Create a seekable input stream based on a memory range. 
-   */ 
-  class SeekableArrayInputStream: public SeekableInputStream { 
-  private: 
-    const char* data; 
-    uint64_t length; 
-    uint64_t position; 
-    uint64_t blockSize; 
- 
-  public: 
-    SeekableArrayInputStream(const unsigned char* list, 
-                             uint64_t length, 
-                             uint64_t block_size = 0); 
-    SeekableArrayInputStream(const char* list, 
-                             uint64_t length, 
-                             uint64_t block_size = 0); 
-    virtual ~SeekableArrayInputStream() override; 
-    virtual bool Next(const void** data, int*size) override; 
-    virtual void BackUp(int count) override; 
-    virtual bool Skip(int count) override; 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_INPUTSTREAM_HH
+#define ORC_INPUTSTREAM_HH
+
+#include "Adaptor.hh"
+#include "orc/OrcFile.hh"
+#include "wrap/zero-copy-stream-wrapper.h"
+
+#include <list>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+namespace orc {
+
+  void printBuffer(std::ostream& out,
+                   const char *buffer,
+                   uint64_t length);
+
+  class PositionProvider {
+  private:
+    std::list<uint64_t>::const_iterator position;
+  public:
+    PositionProvider(const std::list<uint64_t>& positions);
+    uint64_t next();
+  };
+
+  /**
+   * A subclass of Google's ZeroCopyInputStream that supports seek.
+   * By extending Google's class, we get the ability to pass it directly
+   * to the protobuf readers.
+   */
+  class SeekableInputStream: public google::protobuf::io::ZeroCopyInputStream {
+  public:
+    virtual ~SeekableInputStream();
+    virtual void seek(PositionProvider& position) = 0;
+    virtual std::string getName() const = 0;
+  };
+
+  /**
+   * Create a seekable input stream based on a memory range.
+   */
+  class SeekableArrayInputStream: public SeekableInputStream {
+  private:
+    const char* data;
+    uint64_t length;
+    uint64_t position;
+    uint64_t blockSize;
+
+  public:
+    SeekableArrayInputStream(const unsigned char* list,
+                             uint64_t length,
+                             uint64_t block_size = 0);
+    SeekableArrayInputStream(const char* list,
+                             uint64_t length,
+                             uint64_t block_size = 0);
+    virtual ~SeekableArrayInputStream() override;
+    virtual bool Next(const void** data, int*size) override;
+    virtual void BackUp(int count) override;
+    virtual bool Skip(int count) override;
     virtual int64_t ByteCount() const override;
-    virtual void seek(PositionProvider& position) override; 
-    virtual std::string getName() const override; 
-  }; 
- 
-  /** 
-   * Create a seekable input stream based on an input stream. 
-   */ 
-  class SeekableFileInputStream: public SeekableInputStream { 
-  private: 
-    MemoryPool& pool; 
-    InputStream* const input; 
-    const uint64_t start; 
-    const uint64_t length; 
-    const uint64_t blockSize; 
-    std::unique_ptr<DataBuffer<char> > buffer; 
-    uint64_t position; 
-    uint64_t pushBack; 
- 
-  public: 
-    SeekableFileInputStream(InputStream* input, 
-                            uint64_t offset, 
-                            uint64_t byteCount, 
-                            MemoryPool& pool, 
-                            uint64_t blockSize = 0); 
-    virtual ~SeekableFileInputStream() override; 
- 
-    virtual bool Next(const void** data, int*size) override; 
-    virtual void BackUp(int count) override; 
-    virtual bool Skip(int count) override; 
-    virtual int64_t ByteCount() const override; 
-    virtual void seek(PositionProvider& position) override; 
-    virtual std::string getName() const override; 
-  }; 
- 
-} 
- 
-#endif //ORC_INPUTSTREAM_HH 
+    virtual void seek(PositionProvider& position) override;
+    virtual std::string getName() const override;
+  };
+
+  /**
+   * Create a seekable input stream based on an input stream.
+   */
+  class SeekableFileInputStream: public SeekableInputStream {
+  private:
+    MemoryPool& pool;
+    InputStream* const input;
+    const uint64_t start;
+    const uint64_t length;
+    const uint64_t blockSize;
+    std::unique_ptr<DataBuffer<char> > buffer;
+    uint64_t position;
+    uint64_t pushBack;
+
+  public:
+    SeekableFileInputStream(InputStream* input,
+                            uint64_t offset,
+                            uint64_t byteCount,
+                            MemoryPool& pool,
+                            uint64_t blockSize = 0);
+    virtual ~SeekableFileInputStream() override;
+
+    virtual bool Next(const void** data, int*size) override;
+    virtual void BackUp(int count) override;
+    virtual bool Skip(int count) override;
+    virtual int64_t ByteCount() const override;
+    virtual void seek(PositionProvider& position) override;
+    virtual std::string getName() const override;
+  };
+
+}
+
+#endif //ORC_INPUTSTREAM_HH
diff --git a/contrib/libs/apache/orc/c++/src/io/OutputStream.cc b/contrib/libs/apache/orc/c++/src/io/OutputStream.cc
index dd9327adf9..11a21c0bd3 100644
--- a/contrib/libs/apache/orc/c++/src/io/OutputStream.cc
+++ b/contrib/libs/apache/orc/c++/src/io/OutputStream.cc
@@ -1,147 +1,147 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#include "orc/Exceptions.hh" 
-#include "OutputStream.hh" 
- 
-#include <sstream> 
- 
-namespace orc { 
- 
-  PositionRecorder::~PositionRecorder() { 
-    // PASS 
-  } 
- 
-  BufferedOutputStream::BufferedOutputStream( 
-                                    MemoryPool& pool, 
-                                    OutputStream * outStream, 
-                                    uint64_t capacity_, 
-                                    uint64_t blockSize_) 
-                                    : outputStream(outStream), 
-                                      blockSize(blockSize_) { 
-    dataBuffer.reset(new DataBuffer<char>(pool)); 
-    dataBuffer->reserve(capacity_); 
-  } 
- 
-  BufferedOutputStream::~BufferedOutputStream() { 
-    // PASS 
-  } 
- 
-  bool BufferedOutputStream::Next(void** buffer, int* size) { 
-    *size = static_cast<int>(blockSize); 
-    uint64_t oldSize = dataBuffer->size(); 
-    uint64_t newSize = oldSize + blockSize; 
-    uint64_t newCapacity = dataBuffer->capacity(); 
-    while (newCapacity < newSize) { 
-      newCapacity += dataBuffer->capacity(); 
-    } 
-    dataBuffer->reserve(newCapacity); 
-    dataBuffer->resize(newSize); 
-    *buffer = dataBuffer->data() + oldSize; 
-    return true; 
-  } 
- 
-  void BufferedOutputStream::BackUp(int count) { 
-    if (count >= 0) { 
-      uint64_t unsignedCount = static_cast<uint64_t>(count); 
-      if (unsignedCount <= dataBuffer->size()) { 
-        dataBuffer->resize(dataBuffer->size() - unsignedCount); 
-      } else { 
-        throw std::logic_error("Can't backup that much!"); 
-      } 
-    } 
-  } 
- 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Exceptions.hh"
+#include "OutputStream.hh"
+
+#include <sstream>
+
+namespace orc {
+
+  PositionRecorder::~PositionRecorder() {
+    // PASS
+  }
+
+  BufferedOutputStream::BufferedOutputStream(
+                                    MemoryPool& pool,
+                                    OutputStream * outStream,
+                                    uint64_t capacity_,
+                                    uint64_t blockSize_)
+                                    : outputStream(outStream),
+                                      blockSize(blockSize_) {
+    dataBuffer.reset(new DataBuffer<char>(pool));
+    dataBuffer->reserve(capacity_);
+  }
+
+  BufferedOutputStream::~BufferedOutputStream() {
+    // PASS
+  }
+
+  bool BufferedOutputStream::Next(void** buffer, int* size) {
+    *size = static_cast<int>(blockSize);
+    uint64_t oldSize = dataBuffer->size();
+    uint64_t newSize = oldSize + blockSize;
+    uint64_t newCapacity = dataBuffer->capacity();
+    while (newCapacity < newSize) {
+      newCapacity += dataBuffer->capacity();
+    }
+    dataBuffer->reserve(newCapacity);
+    dataBuffer->resize(newSize);
+    *buffer = dataBuffer->data() + oldSize;
+    return true;
+  }
+
+  void BufferedOutputStream::BackUp(int count) {
+    if (count >= 0) {
+      uint64_t unsignedCount = static_cast<uint64_t>(count);
+      if (unsignedCount <= dataBuffer->size()) {
+        dataBuffer->resize(dataBuffer->size() - unsignedCount);
+      } else {
+        throw std::logic_error("Can't backup that much!");
+      }
+    }
+  }
+
   int64_t BufferedOutputStream::ByteCount() const {
-    return static_cast<google::protobuf::int64>(dataBuffer->size()); 
-  } 
- 
-  bool BufferedOutputStream::WriteAliasedRaw(const void *, int) { 
-    throw NotImplementedYet("WriteAliasedRaw is not supported."); 
-  } 
- 
-  bool BufferedOutputStream::AllowsAliasing() const { 
-    return false; 
-  } 
- 
-  std::string BufferedOutputStream::getName() const { 
-    std::ostringstream result; 
-    result << "BufferedOutputStream " << dataBuffer->size() << " of " 
-                                              << dataBuffer->capacity(); 
-    return result.str(); 
-  } 
- 
-  uint64_t BufferedOutputStream::getSize() const { 
-    return dataBuffer->size(); 
-  } 
- 
-  uint64_t BufferedOutputStream::flush() { 
-    uint64_t dataSize = dataBuffer->size(); 
-    outputStream->write(dataBuffer->data(), dataSize); 
-    dataBuffer->resize(0); 
-    return dataSize; 
-  } 
- 
-  void AppendOnlyBufferedStream::write(const char * data, size_t size) { 
-    size_t dataOffset = 0; 
-    while (size > 0) { 
-      if (bufferOffset == bufferLength) { 
-        if (!outStream->Next( 
-                              reinterpret_cast<void **>(&buffer), 
-                              &bufferLength)) { 
-          throw std::logic_error("Failed to allocate buffer."); 
-        } 
-        bufferOffset = 0; 
-      } 
-      size_t len = std::min( 
-                           static_cast<size_t>(bufferLength - bufferOffset), 
-                           size); 
-      memcpy(buffer + bufferOffset, data + dataOffset, len); 
-      bufferOffset += static_cast<int>(len); 
-      dataOffset += len; 
-      size -= len; 
-    } 
-  } 
- 
-  uint64_t AppendOnlyBufferedStream::getSize() const { 
-    return outStream->getSize(); 
-  } 
- 
-  uint64_t AppendOnlyBufferedStream::flush() { 
-    outStream->BackUp(bufferLength - bufferOffset); 
-    bufferOffset = bufferLength = 0; 
-    buffer = nullptr; 
-    return outStream->flush(); 
-  } 
- 
-  void AppendOnlyBufferedStream::recordPosition(PositionRecorder* recorder) const { 
-    uint64_t flushedSize = outStream->getSize(); 
-    uint64_t unflushedSize = static_cast<uint64_t>(bufferOffset); 
-    if (outStream->isCompressed()) { 
-      // start of the compression chunk in the stream 
-      recorder->add(flushedSize); 
-      // number of decompressed bytes that need to be consumed 
-      recorder->add(unflushedSize); 
-    } else { 
-      flushedSize -= static_cast<uint64_t>(bufferLength); 
-      // byte offset of the start location 
-      recorder->add(flushedSize + unflushedSize); 
-    } 
-  } 
- 
-} 
+    return static_cast<google::protobuf::int64>(dataBuffer->size());
+  }
+
+  bool BufferedOutputStream::WriteAliasedRaw(const void *, int) {
+    throw NotImplementedYet("WriteAliasedRaw is not supported.");
+  }
+
+  bool BufferedOutputStream::AllowsAliasing() const {
+    return false;
+  }
+
+  std::string BufferedOutputStream::getName() const {
+    std::ostringstream result;
+    result << "BufferedOutputStream " << dataBuffer->size() << " of "
+                                              << dataBuffer->capacity();
+    return result.str();
+  }
+
+  uint64_t BufferedOutputStream::getSize() const {
+    return dataBuffer->size();
+  }
+
+  uint64_t BufferedOutputStream::flush() {
+    uint64_t dataSize = dataBuffer->size();
+    outputStream->write(dataBuffer->data(), dataSize);
+    dataBuffer->resize(0);
+    return dataSize;
+  }
+
+  void AppendOnlyBufferedStream::write(const char * data, size_t size) {
+    size_t dataOffset = 0;
+    while (size > 0) {
+      if (bufferOffset == bufferLength) {
+        if (!outStream->Next(
+                              reinterpret_cast<void **>(&buffer),
+                              &bufferLength)) {
+          throw std::logic_error("Failed to allocate buffer.");
+        }
+        bufferOffset = 0;
+      }
+      size_t len = std::min(
+                           static_cast<size_t>(bufferLength - bufferOffset),
+                           size);
+      memcpy(buffer + bufferOffset, data + dataOffset, len);
+      bufferOffset += static_cast<int>(len);
+      dataOffset += len;
+      size -= len;
+    }
+  }
+
+  uint64_t AppendOnlyBufferedStream::getSize() const {
+    return outStream->getSize();
+  }
+
+  uint64_t AppendOnlyBufferedStream::flush() {
+    outStream->BackUp(bufferLength - bufferOffset);
+    bufferOffset = bufferLength = 0;
+    buffer = nullptr;
+    return outStream->flush();
+  }
+
+  void AppendOnlyBufferedStream::recordPosition(PositionRecorder* recorder) const {
+    uint64_t flushedSize = outStream->getSize();
+    uint64_t unflushedSize = static_cast<uint64_t>(bufferOffset);
+    if (outStream->isCompressed()) {
+      // start of the compression chunk in the stream
+      recorder->add(flushedSize);
+      // number of decompressed bytes that need to be consumed
+      recorder->add(unflushedSize);
+    } else {
+      flushedSize -= static_cast<uint64_t>(bufferLength);
+      // byte offset of the start location
+      recorder->add(flushedSize + unflushedSize);
+    }
+  }
+
+}
diff --git a/contrib/libs/apache/orc/c++/src/io/OutputStream.hh b/contrib/libs/apache/orc/c++/src/io/OutputStream.hh
index e40263fdfb..7ce9fafa24 100644
--- a/contrib/libs/apache/orc/c++/src/io/OutputStream.hh
+++ b/contrib/libs/apache/orc/c++/src/io/OutputStream.hh
@@ -1,96 +1,96 @@
-/** 
- * Licensed to the Apache Software Foundation (ASF) under one 
- * or more contributor license agreements.  See the NOTICE file 
- * distributed with this work for additional information 
- * regarding copyright ownership.  The ASF licenses this file 
- * to you under the Apache License, Version 2.0 (the 
- * "License"); you may not use this file except in compliance 
- * with the License.  You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_OUTPUTSTREAM_HH 
-#define ORC_OUTPUTSTREAM_HH 
- 
-#include "Adaptor.hh" 
-#include "orc/OrcFile.hh" 
-#include "wrap/zero-copy-stream-wrapper.h" 
- 
-namespace orc { 
- 
-  /** 
-   * Record write position for creating index stream 
-  */ 
-  class PositionRecorder { 
-  public: 
-    virtual ~PositionRecorder(); 
-    virtual void add(uint64_t pos) = 0; 
-  }; 
- 
-  /** 
-   * A subclass of Google's ZeroCopyOutputStream that supports output to memory 
-   * buffer, and flushing to OutputStream. 
-   * By extending Google's class, we get the ability to pass it directly 
-   * to the protobuf writers. 
-   */ 
-  class BufferedOutputStream: public google::protobuf::io::ZeroCopyOutputStream { 
-  private: 
-    OutputStream * outputStream; 
-    std::unique_ptr<DataBuffer<char> > dataBuffer; 
-    uint64_t blockSize; 
- 
-  public: 
-    BufferedOutputStream(MemoryPool& pool, 
-                      OutputStream * outStream, 
-                      uint64_t capacity, 
-                      uint64_t block_size); 
-    virtual ~BufferedOutputStream() override; 
- 
-    virtual bool Next(void** data, int*size) override; 
-    virtual void BackUp(int count) override; 
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_OUTPUTSTREAM_HH
+#define ORC_OUTPUTSTREAM_HH
+
+#include "Adaptor.hh"
+#include "orc/OrcFile.hh"
+#include "wrap/zero-copy-stream-wrapper.h"
+
+namespace orc {
+
+  /**
+   * Record write position for creating index stream
+  */
+  class PositionRecorder {
+  public:
+    virtual ~PositionRecorder();
+    virtual void add(uint64_t pos) = 0;
+  };
+
+  /**
+   * A subclass of Google's ZeroCopyOutputStream that supports output to memory
+   * buffer, and flushing to OutputStream.
+   * By extending Google's class, we get the ability to pass it directly
+   * to the protobuf writers.
+   */
+  class BufferedOutputStream: public google::protobuf::io::ZeroCopyOutputStream {
+  private:
+    OutputStream * outputStream;
+    std::unique_ptr<DataBuffer<char> > dataBuffer;
+    uint64_t blockSize;
+
+  public:
+    BufferedOutputStream(MemoryPool& pool,
+                      OutputStream * outStream,
+                      uint64_t capacity,
+                      uint64_t block_size);
+    virtual ~BufferedOutputStream() override;
+
+    virtual bool Next(void** data, int*size) override;
+    virtual void BackUp(int count) override;
     virtual int64_t ByteCount() const override;
-    virtual bool WriteAliasedRaw(const void * data, int size) override; 
-    virtual bool AllowsAliasing() const override; 
- 
-    virtual std::string getName() const; 
-    virtual uint64_t getSize() const; 
-    virtual uint64_t flush(); 
- 
-    virtual bool isCompressed() const { return false; } 
-  }; 
- 
-  /** 
-   * An append only buffered stream that allows 
-   * buffer, and flushing to OutputStream. 
-   * By extending Google's class, we get the ability to pass it directly 
-   * to the protobuf writers. 
-   */ 
-  class AppendOnlyBufferedStream { 
-  private: 
-    std::unique_ptr<BufferedOutputStream> outStream; 
-    char * buffer; 
-    int bufferOffset, bufferLength; 
- 
-  public: 
-    AppendOnlyBufferedStream(std::unique_ptr<BufferedOutputStream> _outStream) : 
-                                              outStream(std::move(_outStream)) { 
-      buffer = nullptr; 
-      bufferOffset = bufferLength = 0; 
-    } 
- 
-    void write(const char * data, size_t size); 
-    uint64_t getSize() const; 
-    uint64_t flush(); 
- 
-    void recordPosition(PositionRecorder* recorder) const; 
-  }; 
-} 
- 
-#endif // ORC_OUTPUTSTREAM_HH 
+    virtual bool WriteAliasedRaw(const void * data, int size) override;
+    virtual bool AllowsAliasing() const override;
+
+    virtual std::string getName() const;
+    virtual uint64_t getSize() const;
+    virtual uint64_t flush();
+
+    virtual bool isCompressed() const { return false; }
+  };
+
+  /**
+   * An append only buffered stream that allows
+   * buffer, and flushing to OutputStream.
+   * By extending Google's class, we get the ability to pass it directly
+   * to the protobuf writers.
+   */
+  class AppendOnlyBufferedStream {
+  private:
+    std::unique_ptr<BufferedOutputStream> outStream;
+    char * buffer;
+    int bufferOffset, bufferLength;
+
+  public:
+    AppendOnlyBufferedStream(std::unique_ptr<BufferedOutputStream> _outStream) :
+                                              outStream(std::move(_outStream)) {
+      buffer = nullptr;
+      bufferOffset = bufferLength = 0;
+    }
+
+    void write(const char * data, size_t size);
+    uint64_t getSize() const;
+    uint64_t flush();
+
+    void recordPosition(PositionRecorder* recorder) const;
+  };
+}
+
+#endif // ORC_OUTPUTSTREAM_HH
diff --git a/contrib/libs/apache/orc/c++/src/wrap/coded-stream-wrapper.h b/contrib/libs/apache/orc/c++/src/wrap/coded-stream-wrapper.h
index 8d1eab50b4..605fbf826c 100644
--- a/contrib/libs/apache/orc/c++/src/wrap/coded-stream-wrapper.h
+++ b/contrib/libs/apache/orc/c++/src/wrap/coded-stream-wrapper.h
@@ -1,35 +1,35 @@
-/* 
- * Licensed under the Apache License, Version 2.0 (the "License"); 
- * you may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef CODED_STREAM_WRAPPER_HH 
-#define CODED_STREAM_WRAPPER_HH 
- 
-#include "Adaptor.hh" 
- 
-DIAGNOSTIC_PUSH 
- 
-#ifdef __clang__ 
-  DIAGNOSTIC_IGNORE("-Wshorten-64-to-32") 
-  DIAGNOSTIC_IGNORE("-Wreserved-id-macro") 
-#endif 
- 
-#if defined(__GNUC__) || defined(__clang__) 
-  DIAGNOSTIC_IGNORE("-Wconversion") 
-#endif 
- 
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CODED_STREAM_WRAPPER_HH
+#define CODED_STREAM_WRAPPER_HH
+
+#include "Adaptor.hh"
+
+DIAGNOSTIC_PUSH
+
+#ifdef __clang__
+  DIAGNOSTIC_IGNORE("-Wshorten-64-to-32")
+  DIAGNOSTIC_IGNORE("-Wreserved-id-macro")
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+  DIAGNOSTIC_IGNORE("-Wconversion")
+#endif
+
 #include <google/protobuf/io/coded_stream.h>
- 
-DIAGNOSTIC_POP 
- 
-#endif 
+
+DIAGNOSTIC_POP
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/wrap/orc-proto-wrapper.hh b/contrib/libs/apache/orc/c++/src/wrap/orc-proto-wrapper.hh
index dc8e9de7f6..5c161660cc 100644
--- a/contrib/libs/apache/orc/c++/src/wrap/orc-proto-wrapper.hh
+++ b/contrib/libs/apache/orc/c++/src/wrap/orc-proto-wrapper.hh
@@ -1,47 +1,47 @@
-/* 
- * Licensed under the Apache License, Version 2.0 (the "License"); 
- * you may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ORC_PROTO_WRAPPER_HH 
-#define ORC_PROTO_WRAPPER_HH 
- 
-#include "Adaptor.hh" 
- 
-DIAGNOSTIC_PUSH 
- 
-#if defined(__GNUC__) || defined(__clang__) 
-  DIAGNOSTIC_IGNORE("-Wconversion") 
-  DIAGNOSTIC_IGNORE("-Wdeprecated") 
-  DIAGNOSTIC_IGNORE("-Wsign-conversion") 
-  DIAGNOSTIC_IGNORE("-Wunused-parameter") 
-#endif 
- 
-#ifdef __clang__ 
-  DIAGNOSTIC_IGNORE("-Wnested-anon-types") 
-  DIAGNOSTIC_IGNORE("-Wreserved-id-macro") 
-  DIAGNOSTIC_IGNORE("-Wshorten-64-to-32") 
-  DIAGNOSTIC_IGNORE("-Wunknown-warning-option") 
-  DIAGNOSTIC_IGNORE("-Wweak-vtables") 
-  DIAGNOSTIC_IGNORE("-Wzero-as-null-pointer-constant") 
-#endif 
- 
-#if defined(_MSC_VER) 
-  DIAGNOSTIC_IGNORE(4146) // unary minus operator applied to unsigned type, result still unsigned 
-  DIAGNOSTIC_IGNORE(4800) // forcing value to bool 'true' or 'false' 
-#endif 
- 
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_PROTO_WRAPPER_HH
+#define ORC_PROTO_WRAPPER_HH
+
+#include "Adaptor.hh"
+
+DIAGNOSTIC_PUSH
+
+#if defined(__GNUC__) || defined(__clang__)
+  DIAGNOSTIC_IGNORE("-Wconversion")
+  DIAGNOSTIC_IGNORE("-Wdeprecated")
+  DIAGNOSTIC_IGNORE("-Wsign-conversion")
+  DIAGNOSTIC_IGNORE("-Wunused-parameter")
+#endif
+
+#ifdef __clang__
+  DIAGNOSTIC_IGNORE("-Wnested-anon-types")
+  DIAGNOSTIC_IGNORE("-Wreserved-id-macro")
+  DIAGNOSTIC_IGNORE("-Wshorten-64-to-32")
+  DIAGNOSTIC_IGNORE("-Wunknown-warning-option")
+  DIAGNOSTIC_IGNORE("-Wweak-vtables")
+  DIAGNOSTIC_IGNORE("-Wzero-as-null-pointer-constant")
+#endif
+
+#if defined(_MSC_VER)
+  DIAGNOSTIC_IGNORE(4146) // unary minus operator applied to unsigned type, result still unsigned
+  DIAGNOSTIC_IGNORE(4800) // forcing value to bool 'true' or 'false'
+#endif
+
 #include "contrib/libs/apache/orc/proto/orc_proto.pb.h"
- 
-DIAGNOSTIC_POP 
- 
-#endif 
+
+DIAGNOSTIC_POP
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/wrap/snappy-wrapper.h b/contrib/libs/apache/orc/c++/src/wrap/snappy-wrapper.h
index 497ae6f508..aeab0f0033 100644
--- a/contrib/libs/apache/orc/c++/src/wrap/snappy-wrapper.h
+++ b/contrib/libs/apache/orc/c++/src/wrap/snappy-wrapper.h
@@ -1,30 +1,30 @@
-/* 
- * Licensed under the Apache License, Version 2.0 (the "License"); 
- * you may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef SNAPPY_WRAPPER_HH 
-#define SNAPPY_WRAPPER_HH 
- 
-#include "Adaptor.hh" 
- 
-DIAGNOSTIC_PUSH 
- 
-#ifdef __clang__ 
-  DIAGNOSTIC_IGNORE("-Wreserved-id-macro") 
-#endif 
- 
-#include <snappy.h> 
- 
-DIAGNOSTIC_POP 
- 
-#endif 
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SNAPPY_WRAPPER_HH
+#define SNAPPY_WRAPPER_HH
+
+#include "Adaptor.hh"
+
+DIAGNOSTIC_PUSH
+
+#ifdef __clang__
+  DIAGNOSTIC_IGNORE("-Wreserved-id-macro")
+#endif
+
+#include <snappy.h>
+
+DIAGNOSTIC_POP
+
+#endif
diff --git a/contrib/libs/apache/orc/c++/src/wrap/zero-copy-stream-wrapper.h b/contrib/libs/apache/orc/c++/src/wrap/zero-copy-stream-wrapper.h
index 7cf1491d3d..1af0bd002d 100644
--- a/contrib/libs/apache/orc/c++/src/wrap/zero-copy-stream-wrapper.h
+++ b/contrib/libs/apache/orc/c++/src/wrap/zero-copy-stream-wrapper.h
@@ -1,36 +1,36 @@
-/* 
- * Licensed under the Apache License, Version 2.0 (the "License"); 
- * you may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-#ifndef ZERO_COPY_STREAM_WRAPPER_HH 
-#define ZERO_COPY_STREAM_WRAPPER_HH 
- 
-#include "Adaptor.hh" 
- 
-DIAGNOSTIC_PUSH 
- 
-#if defined(__GNUC__) || defined(__clang__) 
-  DIAGNOSTIC_IGNORE("-Wdeprecated") 
-  DIAGNOSTIC_IGNORE("-Wpadded") 
-  DIAGNOSTIC_IGNORE("-Wunused-parameter") 
-#endif 
- 
-#ifdef __clang__ 
-  DIAGNOSTIC_IGNORE("-Wreserved-id-macro") 
-#endif 
- 
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ZERO_COPY_STREAM_WRAPPER_HH
+#define ZERO_COPY_STREAM_WRAPPER_HH
+
+#include "Adaptor.hh"
+
+DIAGNOSTIC_PUSH
+
+#if defined(__GNUC__) || defined(__clang__)
+  DIAGNOSTIC_IGNORE("-Wdeprecated")
+  DIAGNOSTIC_IGNORE("-Wpadded")
+  DIAGNOSTIC_IGNORE("-Wunused-parameter")
+#endif
+
+#ifdef __clang__
+  DIAGNOSTIC_IGNORE("-Wreserved-id-macro")
+#endif
+
 #include <google/protobuf/io/zero_copy_stream.h>
- 
-DIAGNOSTIC_POP 
- 
-#endif 
+
+DIAGNOSTIC_POP
+
+#endif
author	iaz1607 <iaz1607@yandex-team.ru>	2022-02-10 16:45:37 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:45:37 +0300
commit	94e51c602b555459333b3c6ae92476c424c930bc (patch)
tree	b2cc84ee7850122e7ccf51d0ea21e4fa7e7a5685 /contrib/libs/apache/orc/c++
parent	e5437feb4ac2d2dc044e1090b9312dde5ef197e0 (diff)
download	ydb-94e51c602b555459333b3c6ae92476c424c930bc.tar.gz