aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/apache/orc/c++/src/Statistics.cc
diff options
context:
space:
mode:
authoriaz1607 <iaz1607@yandex-team.ru>2022-02-10 16:45:37 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:37 +0300
commite5437feb4ac2d2dc044e1090b9312dde5ef197e0 (patch)
treef5a238c69dd20a1fa2092127a31b8aff25020f7d /contrib/libs/apache/orc/c++/src/Statistics.cc
parentf4945d0a44b8770f0801de3056aa41639b0b7bd2 (diff)
downloadydb-e5437feb4ac2d2dc044e1090b9312dde5ef197e0.tar.gz
Restoring authorship annotation for <iaz1607@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/apache/orc/c++/src/Statistics.cc')
-rw-r--r--contrib/libs/apache/orc/c++/src/Statistics.cc816
1 files changed, 408 insertions, 408 deletions
diff --git a/contrib/libs/apache/orc/c++/src/Statistics.cc b/contrib/libs/apache/orc/c++/src/Statistics.cc
index 2401f5e0cb..f13381b5b0 100644
--- a/contrib/libs/apache/orc/c++/src/Statistics.cc
+++ b/contrib/libs/apache/orc/c++/src/Statistics.cc
@@ -1,408 +1,408 @@
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "orc/Exceptions.hh"
-#include "RLE.hh"
-#include "Statistics.hh"
-
-#include "wrap/coded-stream-wrapper.h"
-
-namespace orc {
-
- ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s,
- const StatContext& statContext) {
- if (s.has_intstatistics()) {
- return new IntegerColumnStatisticsImpl(s);
- } else if (s.has_doublestatistics()) {
- return new DoubleColumnStatisticsImpl(s);
- } else if (s.has_stringstatistics()) {
- return new StringColumnStatisticsImpl(s, statContext);
- } else if (s.has_bucketstatistics()) {
- return new BooleanColumnStatisticsImpl(s, statContext);
- } else if (s.has_decimalstatistics()) {
- return new DecimalColumnStatisticsImpl(s, statContext);
- } else if (s.has_timestampstatistics()) {
- return new TimestampColumnStatisticsImpl(s, statContext);
- } else if (s.has_datestatistics()) {
- return new DateColumnStatisticsImpl(s, statContext);
- } else if (s.has_binarystatistics()) {
- return new BinaryColumnStatisticsImpl(s, statContext);
- } else {
- return new ColumnStatisticsImpl(s);
- }
- }
-
- StatisticsImpl::StatisticsImpl(const proto::StripeStatistics& stripeStats,
- const StatContext& statContext) {
- for(int i = 0; i < stripeStats.colstats_size(); i++) {
- colStats.push_back(
- convertColumnStatistics(stripeStats.colstats(i), statContext));
- }
- }
-
- StatisticsImpl::StatisticsImpl(const proto::Footer& footer,
- const StatContext& statContext) {
- for(int i = 0; i < footer.statistics_size(); i++) {
- colStats.push_back(
- convertColumnStatistics(footer.statistics(i), statContext));
- }
- }
-
- StatisticsImpl::~StatisticsImpl() {
- for(std::vector<ColumnStatistics*>::iterator ptr = colStats.begin();
- ptr != colStats.end();
- ++ptr) {
- delete *ptr;
- }
- }
-
- Statistics::~Statistics() {
- // PASS
- }
-
- StripeStatistics::~StripeStatistics() {
- // PASS
- }
-
- StripeStatisticsImpl::~StripeStatisticsImpl() {
- // PASS
- }
-
- StripeStatisticsImpl::StripeStatisticsImpl(
- const proto::StripeStatistics& stripeStats,
- std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
- const StatContext& statContext) {
- columnStats.reset(new StatisticsImpl(stripeStats, statContext));
- rowIndexStats.resize(indexStats.size());
- for(size_t i = 0; i < rowIndexStats.size(); i++) {
- for(size_t j = 0; j < indexStats[i].size(); j++) {
- rowIndexStats[i].push_back(
- std::shared_ptr<const ColumnStatistics>(
- convertColumnStatistics(indexStats[i][j], statContext)));
- }
- }
- }
-
-
- ColumnStatistics::~ColumnStatistics() {
- // PASS
- }
-
- BinaryColumnStatistics::~BinaryColumnStatistics() {
- // PASS
- }
-
- BooleanColumnStatistics::~BooleanColumnStatistics() {
- // PASS
- }
-
- DateColumnStatistics::~DateColumnStatistics() {
- // PASS
- }
-
- DecimalColumnStatistics::~DecimalColumnStatistics() {
- // PASS
- }
-
- DoubleColumnStatistics::~DoubleColumnStatistics() {
- // PASS
- }
-
- IntegerColumnStatistics::~IntegerColumnStatistics() {
- // PASS
- }
-
- StringColumnStatistics::~StringColumnStatistics() {
- // PASS
- }
-
- TimestampColumnStatistics::~TimestampColumnStatistics() {
- // PASS
- }
-
- MutableColumnStatistics::~MutableColumnStatistics() {
- // PASS
- }
-
- ColumnStatisticsImpl::~ColumnStatisticsImpl() {
- // PASS
- }
-
- BinaryColumnStatisticsImpl::~BinaryColumnStatisticsImpl() {
- // PASS
- }
-
- BooleanColumnStatisticsImpl::~BooleanColumnStatisticsImpl() {
- // PASS
- }
-
- DateColumnStatisticsImpl::~DateColumnStatisticsImpl() {
- // PASS
- }
-
- DecimalColumnStatisticsImpl::~DecimalColumnStatisticsImpl() {
- // PASS
- }
-
- DoubleColumnStatisticsImpl::~DoubleColumnStatisticsImpl() {
- // PASS
- }
-
- IntegerColumnStatisticsImpl::~IntegerColumnStatisticsImpl() {
- // PASS
- }
-
- StringColumnStatisticsImpl::~StringColumnStatisticsImpl() {
- // PASS
- }
-
- TimestampColumnStatisticsImpl::~TimestampColumnStatisticsImpl() {
- // PASS
- }
-
- ColumnStatisticsImpl::ColumnStatisticsImpl
- (const proto::ColumnStatistics& pb) {
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- }
-
- BinaryColumnStatisticsImpl::BinaryColumnStatisticsImpl
- (const proto::ColumnStatistics& pb, const StatContext& statContext){
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (pb.has_binarystatistics() && statContext.correctStats) {
- _stats.setHasTotalLength(pb.binarystatistics().has_sum());
- _stats.setTotalLength(
- static_cast<uint64_t>(pb.binarystatistics().sum()));
- }
- }
-
- BooleanColumnStatisticsImpl::BooleanColumnStatisticsImpl
- (const proto::ColumnStatistics& pb, const StatContext& statContext){
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (pb.has_bucketstatistics() && statContext.correctStats) {
- _hasCount = true;
- _trueCount = pb.bucketstatistics().count(0);
- } else {
- _hasCount = false;
- _trueCount = 0;
- }
- }
-
- DateColumnStatisticsImpl::DateColumnStatisticsImpl
- (const proto::ColumnStatistics& pb, const StatContext& statContext){
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (!pb.has_datestatistics() || !statContext.correctStats) {
- // hasMinimum_ is false by default;
- // hasMaximum_ is false by default;
- _stats.setMinimum(0);
- _stats.setMaximum(0);
- } else {
- _stats.setHasMinimum(pb.datestatistics().has_minimum());
- _stats.setHasMaximum(pb.datestatistics().has_maximum());
- _stats.setMinimum(pb.datestatistics().minimum());
- _stats.setMaximum(pb.datestatistics().maximum());
- }
- }
-
- DecimalColumnStatisticsImpl::DecimalColumnStatisticsImpl
- (const proto::ColumnStatistics& pb, const StatContext& statContext){
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (pb.has_decimalstatistics() && statContext.correctStats) {
- const proto::DecimalStatistics& stats = pb.decimalstatistics();
- _stats.setHasMinimum(stats.has_minimum());
- _stats.setHasMaximum(stats.has_maximum());
- _stats.setHasSum(stats.has_sum());
-
- _stats.setMinimum(Decimal(stats.minimum()));
- _stats.setMaximum(Decimal(stats.maximum()));
- _stats.setSum(Decimal(stats.sum()));
- }
- }
-
- DoubleColumnStatisticsImpl::DoubleColumnStatisticsImpl
- (const proto::ColumnStatistics& pb){
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (!pb.has_doublestatistics()) {
- _stats.setMinimum(0);
- _stats.setMaximum(0);
- _stats.setSum(0);
- }else{
- const proto::DoubleStatistics& stats = pb.doublestatistics();
- _stats.setHasMinimum(stats.has_minimum());
- _stats.setHasMaximum(stats.has_maximum());
- _stats.setHasSum(stats.has_sum());
-
- _stats.setMinimum(stats.minimum());
- _stats.setMaximum(stats.maximum());
- _stats.setSum(stats.sum());
- }
- }
-
- IntegerColumnStatisticsImpl::IntegerColumnStatisticsImpl
- (const proto::ColumnStatistics& pb){
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (!pb.has_intstatistics()) {
- _stats.setMinimum(0);
- _stats.setMaximum(0);
- _stats.setSum(0);
- }else{
- const proto::IntegerStatistics& stats = pb.intstatistics();
- _stats.setHasMinimum(stats.has_minimum());
- _stats.setHasMaximum(stats.has_maximum());
- _stats.setHasSum(stats.has_sum());
-
- _stats.setMinimum(stats.minimum());
- _stats.setMaximum(stats.maximum());
- _stats.setSum(stats.sum());
- }
- }
-
- StringColumnStatisticsImpl::StringColumnStatisticsImpl
- (const proto::ColumnStatistics& pb, const StatContext& statContext){
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (!pb.has_stringstatistics() || !statContext.correctStats) {
- _stats.setTotalLength(0);
- }else{
- const proto::StringStatistics& stats = pb.stringstatistics();
- _stats.setHasMinimum(stats.has_minimum());
- _stats.setHasMaximum(stats.has_maximum());
- _stats.setHasTotalLength(stats.has_sum());
-
- _stats.setMinimum(stats.minimum());
- _stats.setMaximum(stats.maximum());
- _stats.setTotalLength(static_cast<uint64_t>(stats.sum()));
- }
- }
-
- TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl
- (const proto::ColumnStatistics& pb, const StatContext& statContext) {
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (!pb.has_timestampstatistics() || !statContext.correctStats) {
- _stats.setMinimum(0);
- _stats.setMaximum(0);
- _lowerBound = 0;
- _upperBound = 0;
- }else{
- const proto::TimestampStatistics& stats = pb.timestampstatistics();
- _stats.setHasMinimum(
- stats.has_minimumutc() ||
- (stats.has_minimum() && (statContext.writerTimezone != nullptr)));
- _stats.setHasMaximum(
- stats.has_maximumutc() ||
- (stats.has_maximum() && (statContext.writerTimezone != nullptr)));
- _hasLowerBound = stats.has_minimumutc() || stats.has_minimum();
- _hasUpperBound = stats.has_maximumutc() || stats.has_maximum();
-
- // Timestamp stats are stored in milliseconds
- if (stats.has_minimumutc()) {
- int64_t minimum = stats.minimumutc();
- _stats.setMinimum(minimum);
- _lowerBound = minimum;
- } else if (statContext.writerTimezone) {
- int64_t writerTimeSec = stats.minimum() / 1000;
- // multiply the offset by 1000 to convert to millisecond
- int64_t minimum =
- stats.minimum() +
- (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset)
- * 1000;
- _stats.setMinimum(minimum);
- _lowerBound = minimum;
- } else {
- _stats.setMinimum(0);
- // subtract 1 day 1 hour (25 hours) in milliseconds to handle unknown
- // TZ and daylight savings
- _lowerBound = stats.minimum() - (25 * SECONDS_PER_HOUR * 1000);
- }
-
- // Timestamp stats are stored in milliseconds
- if (stats.has_maximumutc()) {
- int64_t maximum = stats.maximumutc();
- _stats.setMaximum(maximum);
- _upperBound = maximum;
- } else if (statContext.writerTimezone) {
- int64_t writerTimeSec = stats.maximum() / 1000;
- // multiply the offset by 1000 to convert to millisecond
- int64_t maximum = stats.maximum() +
- (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset)
- * 1000;
- _stats.setMaximum(maximum);
- _upperBound = maximum;
- } else {
- _stats.setMaximum(0);
- // add 1 day 1 hour (25 hours) in milliseconds to handle unknown
- // TZ and daylight savings
- _upperBound = stats.maximum() + (25 * SECONDS_PER_HOUR * 1000);
- }
- // Add 1 millisecond to account for microsecond precision of values
- _upperBound += 1;
- }
- }
-
- std::unique_ptr<MutableColumnStatistics> createColumnStatistics(
- const Type& type) {
- switch (static_cast<int64_t>(type.getKind())) {
- case BOOLEAN:
- return std::unique_ptr<MutableColumnStatistics>(
- new BooleanColumnStatisticsImpl());
- case BYTE:
- case INT:
- case LONG:
- case SHORT:
- return std::unique_ptr<MutableColumnStatistics>(
- new IntegerColumnStatisticsImpl());
- case STRUCT:
- case MAP:
- case LIST:
- case UNION:
- return std::unique_ptr<MutableColumnStatistics>(
- new ColumnStatisticsImpl());
- case FLOAT:
- case DOUBLE:
- return std::unique_ptr<MutableColumnStatistics>(
- new DoubleColumnStatisticsImpl());
- case BINARY:
- return std::unique_ptr<MutableColumnStatistics>(
- new BinaryColumnStatisticsImpl());
- case STRING:
- case CHAR:
- case VARCHAR:
- return std::unique_ptr<MutableColumnStatistics>(
- new StringColumnStatisticsImpl());
- case DATE:
- return std::unique_ptr<MutableColumnStatistics>(
- new DateColumnStatisticsImpl());
- case TIMESTAMP:
- return std::unique_ptr<MutableColumnStatistics>(
- new TimestampColumnStatisticsImpl());
- case DECIMAL:
- return std::unique_ptr<MutableColumnStatistics>(
- new DecimalColumnStatisticsImpl());
- default:
- throw NotImplementedYet("Not supported type: " + type.toString());
- }
- }
-
-}// namespace
+ /**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Exceptions.hh"
+#include "RLE.hh"
+#include "Statistics.hh"
+
+#include "wrap/coded-stream-wrapper.h"
+
+namespace orc {
+
+ ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s,
+ const StatContext& statContext) {
+ if (s.has_intstatistics()) {
+ return new IntegerColumnStatisticsImpl(s);
+ } else if (s.has_doublestatistics()) {
+ return new DoubleColumnStatisticsImpl(s);
+ } else if (s.has_stringstatistics()) {
+ return new StringColumnStatisticsImpl(s, statContext);
+ } else if (s.has_bucketstatistics()) {
+ return new BooleanColumnStatisticsImpl(s, statContext);
+ } else if (s.has_decimalstatistics()) {
+ return new DecimalColumnStatisticsImpl(s, statContext);
+ } else if (s.has_timestampstatistics()) {
+ return new TimestampColumnStatisticsImpl(s, statContext);
+ } else if (s.has_datestatistics()) {
+ return new DateColumnStatisticsImpl(s, statContext);
+ } else if (s.has_binarystatistics()) {
+ return new BinaryColumnStatisticsImpl(s, statContext);
+ } else {
+ return new ColumnStatisticsImpl(s);
+ }
+ }
+
+ StatisticsImpl::StatisticsImpl(const proto::StripeStatistics& stripeStats,
+ const StatContext& statContext) {
+ for(int i = 0; i < stripeStats.colstats_size(); i++) {
+ colStats.push_back(
+ convertColumnStatistics(stripeStats.colstats(i), statContext));
+ }
+ }
+
+ StatisticsImpl::StatisticsImpl(const proto::Footer& footer,
+ const StatContext& statContext) {
+ for(int i = 0; i < footer.statistics_size(); i++) {
+ colStats.push_back(
+ convertColumnStatistics(footer.statistics(i), statContext));
+ }
+ }
+
+ StatisticsImpl::~StatisticsImpl() {
+ for(std::vector<ColumnStatistics*>::iterator ptr = colStats.begin();
+ ptr != colStats.end();
+ ++ptr) {
+ delete *ptr;
+ }
+ }
+
+ Statistics::~Statistics() {
+ // PASS
+ }
+
+ StripeStatistics::~StripeStatistics() {
+ // PASS
+ }
+
+ StripeStatisticsImpl::~StripeStatisticsImpl() {
+ // PASS
+ }
+
+ StripeStatisticsImpl::StripeStatisticsImpl(
+ const proto::StripeStatistics& stripeStats,
+ std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
+ const StatContext& statContext) {
+ columnStats.reset(new StatisticsImpl(stripeStats, statContext));
+ rowIndexStats.resize(indexStats.size());
+ for(size_t i = 0; i < rowIndexStats.size(); i++) {
+ for(size_t j = 0; j < indexStats[i].size(); j++) {
+ rowIndexStats[i].push_back(
+ std::shared_ptr<const ColumnStatistics>(
+ convertColumnStatistics(indexStats[i][j], statContext)));
+ }
+ }
+ }
+
+
+ ColumnStatistics::~ColumnStatistics() {
+ // PASS
+ }
+
+ BinaryColumnStatistics::~BinaryColumnStatistics() {
+ // PASS
+ }
+
+ BooleanColumnStatistics::~BooleanColumnStatistics() {
+ // PASS
+ }
+
+ DateColumnStatistics::~DateColumnStatistics() {
+ // PASS
+ }
+
+ DecimalColumnStatistics::~DecimalColumnStatistics() {
+ // PASS
+ }
+
+ DoubleColumnStatistics::~DoubleColumnStatistics() {
+ // PASS
+ }
+
+ IntegerColumnStatistics::~IntegerColumnStatistics() {
+ // PASS
+ }
+
+ StringColumnStatistics::~StringColumnStatistics() {
+ // PASS
+ }
+
+ TimestampColumnStatistics::~TimestampColumnStatistics() {
+ // PASS
+ }
+
+ MutableColumnStatistics::~MutableColumnStatistics() {
+ // PASS
+ }
+
+ ColumnStatisticsImpl::~ColumnStatisticsImpl() {
+ // PASS
+ }
+
+ BinaryColumnStatisticsImpl::~BinaryColumnStatisticsImpl() {
+ // PASS
+ }
+
+ BooleanColumnStatisticsImpl::~BooleanColumnStatisticsImpl() {
+ // PASS
+ }
+
+ DateColumnStatisticsImpl::~DateColumnStatisticsImpl() {
+ // PASS
+ }
+
+ DecimalColumnStatisticsImpl::~DecimalColumnStatisticsImpl() {
+ // PASS
+ }
+
+ DoubleColumnStatisticsImpl::~DoubleColumnStatisticsImpl() {
+ // PASS
+ }
+
+ IntegerColumnStatisticsImpl::~IntegerColumnStatisticsImpl() {
+ // PASS
+ }
+
+ StringColumnStatisticsImpl::~StringColumnStatisticsImpl() {
+ // PASS
+ }
+
+ TimestampColumnStatisticsImpl::~TimestampColumnStatisticsImpl() {
+ // PASS
+ }
+
+ ColumnStatisticsImpl::ColumnStatisticsImpl
+ (const proto::ColumnStatistics& pb) {
+ _stats.setNumberOfValues(pb.numberofvalues());
+ _stats.setHasNull(pb.hasnull());
+ }
+
+ BinaryColumnStatisticsImpl::BinaryColumnStatisticsImpl
+ (const proto::ColumnStatistics& pb, const StatContext& statContext){
+ _stats.setNumberOfValues(pb.numberofvalues());
+ _stats.setHasNull(pb.hasnull());
+ if (pb.has_binarystatistics() && statContext.correctStats) {
+ _stats.setHasTotalLength(pb.binarystatistics().has_sum());
+ _stats.setTotalLength(
+ static_cast<uint64_t>(pb.binarystatistics().sum()));
+ }
+ }
+
+ BooleanColumnStatisticsImpl::BooleanColumnStatisticsImpl
+ (const proto::ColumnStatistics& pb, const StatContext& statContext){
+ _stats.setNumberOfValues(pb.numberofvalues());
+ _stats.setHasNull(pb.hasnull());
+ if (pb.has_bucketstatistics() && statContext.correctStats) {
+ _hasCount = true;
+ _trueCount = pb.bucketstatistics().count(0);
+ } else {
+ _hasCount = false;
+ _trueCount = 0;
+ }
+ }
+
+ DateColumnStatisticsImpl::DateColumnStatisticsImpl
+ (const proto::ColumnStatistics& pb, const StatContext& statContext){
+ _stats.setNumberOfValues(pb.numberofvalues());
+ _stats.setHasNull(pb.hasnull());
+ if (!pb.has_datestatistics() || !statContext.correctStats) {
+ // hasMinimum_ is false by default;
+ // hasMaximum_ is false by default;
+ _stats.setMinimum(0);
+ _stats.setMaximum(0);
+ } else {
+ _stats.setHasMinimum(pb.datestatistics().has_minimum());
+ _stats.setHasMaximum(pb.datestatistics().has_maximum());
+ _stats.setMinimum(pb.datestatistics().minimum());
+ _stats.setMaximum(pb.datestatistics().maximum());
+ }
+ }
+
+ DecimalColumnStatisticsImpl::DecimalColumnStatisticsImpl
+ (const proto::ColumnStatistics& pb, const StatContext& statContext){
+ _stats.setNumberOfValues(pb.numberofvalues());
+ _stats.setHasNull(pb.hasnull());
+ if (pb.has_decimalstatistics() && statContext.correctStats) {
+ const proto::DecimalStatistics& stats = pb.decimalstatistics();
+ _stats.setHasMinimum(stats.has_minimum());
+ _stats.setHasMaximum(stats.has_maximum());
+ _stats.setHasSum(stats.has_sum());
+
+ _stats.setMinimum(Decimal(stats.minimum()));
+ _stats.setMaximum(Decimal(stats.maximum()));
+ _stats.setSum(Decimal(stats.sum()));
+ }
+ }
+
+ DoubleColumnStatisticsImpl::DoubleColumnStatisticsImpl
+ (const proto::ColumnStatistics& pb){
+ _stats.setNumberOfValues(pb.numberofvalues());
+ _stats.setHasNull(pb.hasnull());
+ if (!pb.has_doublestatistics()) {
+ _stats.setMinimum(0);
+ _stats.setMaximum(0);
+ _stats.setSum(0);
+ }else{
+ const proto::DoubleStatistics& stats = pb.doublestatistics();
+ _stats.setHasMinimum(stats.has_minimum());
+ _stats.setHasMaximum(stats.has_maximum());
+ _stats.setHasSum(stats.has_sum());
+
+ _stats.setMinimum(stats.minimum());
+ _stats.setMaximum(stats.maximum());
+ _stats.setSum(stats.sum());
+ }
+ }
+
+ IntegerColumnStatisticsImpl::IntegerColumnStatisticsImpl
+ (const proto::ColumnStatistics& pb){
+ _stats.setNumberOfValues(pb.numberofvalues());
+ _stats.setHasNull(pb.hasnull());
+ if (!pb.has_intstatistics()) {
+ _stats.setMinimum(0);
+ _stats.setMaximum(0);
+ _stats.setSum(0);
+ }else{
+ const proto::IntegerStatistics& stats = pb.intstatistics();
+ _stats.setHasMinimum(stats.has_minimum());
+ _stats.setHasMaximum(stats.has_maximum());
+ _stats.setHasSum(stats.has_sum());
+
+ _stats.setMinimum(stats.minimum());
+ _stats.setMaximum(stats.maximum());
+ _stats.setSum(stats.sum());
+ }
+ }
+
+ StringColumnStatisticsImpl::StringColumnStatisticsImpl
+ (const proto::ColumnStatistics& pb, const StatContext& statContext){
+ _stats.setNumberOfValues(pb.numberofvalues());
+ _stats.setHasNull(pb.hasnull());
+ if (!pb.has_stringstatistics() || !statContext.correctStats) {
+ _stats.setTotalLength(0);
+ }else{
+ const proto::StringStatistics& stats = pb.stringstatistics();
+ _stats.setHasMinimum(stats.has_minimum());
+ _stats.setHasMaximum(stats.has_maximum());
+ _stats.setHasTotalLength(stats.has_sum());
+
+ _stats.setMinimum(stats.minimum());
+ _stats.setMaximum(stats.maximum());
+ _stats.setTotalLength(static_cast<uint64_t>(stats.sum()));
+ }
+ }
+
+ TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl
+ (const proto::ColumnStatistics& pb, const StatContext& statContext) {
+ _stats.setNumberOfValues(pb.numberofvalues());
+ _stats.setHasNull(pb.hasnull());
+ if (!pb.has_timestampstatistics() || !statContext.correctStats) {
+ _stats.setMinimum(0);
+ _stats.setMaximum(0);
+ _lowerBound = 0;
+ _upperBound = 0;
+ }else{
+ const proto::TimestampStatistics& stats = pb.timestampstatistics();
+ _stats.setHasMinimum(
+ stats.has_minimumutc() ||
+ (stats.has_minimum() && (statContext.writerTimezone != nullptr)));
+ _stats.setHasMaximum(
+ stats.has_maximumutc() ||
+ (stats.has_maximum() && (statContext.writerTimezone != nullptr)));
+ _hasLowerBound = stats.has_minimumutc() || stats.has_minimum();
+ _hasUpperBound = stats.has_maximumutc() || stats.has_maximum();
+
+ // Timestamp stats are stored in milliseconds
+ if (stats.has_minimumutc()) {
+ int64_t minimum = stats.minimumutc();
+ _stats.setMinimum(minimum);
+ _lowerBound = minimum;
+ } else if (statContext.writerTimezone) {
+ int64_t writerTimeSec = stats.minimum() / 1000;
+ // multiply the offset by 1000 to convert to millisecond
+ int64_t minimum =
+ stats.minimum() +
+ (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset)
+ * 1000;
+ _stats.setMinimum(minimum);
+ _lowerBound = minimum;
+ } else {
+ _stats.setMinimum(0);
+ // subtract 1 day 1 hour (25 hours) in milliseconds to handle unknown
+ // TZ and daylight savings
+ _lowerBound = stats.minimum() - (25 * SECONDS_PER_HOUR * 1000);
+ }
+
+ // Timestamp stats are stored in milliseconds
+ if (stats.has_maximumutc()) {
+ int64_t maximum = stats.maximumutc();
+ _stats.setMaximum(maximum);
+ _upperBound = maximum;
+ } else if (statContext.writerTimezone) {
+ int64_t writerTimeSec = stats.maximum() / 1000;
+ // multiply the offset by 1000 to convert to millisecond
+ int64_t maximum = stats.maximum() +
+ (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset)
+ * 1000;
+ _stats.setMaximum(maximum);
+ _upperBound = maximum;
+ } else {
+ _stats.setMaximum(0);
+ // add 1 day 1 hour (25 hours) in milliseconds to handle unknown
+ // TZ and daylight savings
+ _upperBound = stats.maximum() + (25 * SECONDS_PER_HOUR * 1000);
+ }
+ // Add 1 millisecond to account for microsecond precision of values
+ _upperBound += 1;
+ }
+ }
+
+ std::unique_ptr<MutableColumnStatistics> createColumnStatistics(
+ const Type& type) {
+ switch (static_cast<int64_t>(type.getKind())) {
+ case BOOLEAN:
+ return std::unique_ptr<MutableColumnStatistics>(
+ new BooleanColumnStatisticsImpl());
+ case BYTE:
+ case INT:
+ case LONG:
+ case SHORT:
+ return std::unique_ptr<MutableColumnStatistics>(
+ new IntegerColumnStatisticsImpl());
+ case STRUCT:
+ case MAP:
+ case LIST:
+ case UNION:
+ return std::unique_ptr<MutableColumnStatistics>(
+ new ColumnStatisticsImpl());
+ case FLOAT:
+ case DOUBLE:
+ return std::unique_ptr<MutableColumnStatistics>(
+ new DoubleColumnStatisticsImpl());
+ case BINARY:
+ return std::unique_ptr<MutableColumnStatistics>(
+ new BinaryColumnStatisticsImpl());
+ case STRING:
+ case CHAR:
+ case VARCHAR:
+ return std::unique_ptr<MutableColumnStatistics>(
+ new StringColumnStatisticsImpl());
+ case DATE:
+ return std::unique_ptr<MutableColumnStatistics>(
+ new DateColumnStatisticsImpl());
+ case TIMESTAMP:
+ return std::unique_ptr<MutableColumnStatistics>(
+ new TimestampColumnStatisticsImpl());
+ case DECIMAL:
+ return std::unique_ptr<MutableColumnStatistics>(
+ new DecimalColumnStatisticsImpl());
+ default:
+ throw NotImplementedYet("Not supported type: " + type.toString());
+ }
+ }
+
+}// namespace