diff options
author | iaz1607 <iaz1607@yandex-team.ru> | 2022-02-10 16:45:37 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:37 +0300 |
commit | e5437feb4ac2d2dc044e1090b9312dde5ef197e0 (patch) | |
tree | f5a238c69dd20a1fa2092127a31b8aff25020f7d /contrib/libs/apache/orc/c++/src/Statistics.hh | |
parent | f4945d0a44b8770f0801de3056aa41639b0b7bd2 (diff) | |
download | ydb-e5437feb4ac2d2dc044e1090b9312dde5ef197e0.tar.gz |
Restoring authorship annotation for <iaz1607@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/apache/orc/c++/src/Statistics.hh')
-rw-r--r-- | contrib/libs/apache/orc/c++/src/Statistics.hh | 2906 |
1 files changed, 1453 insertions, 1453 deletions
diff --git a/contrib/libs/apache/orc/c++/src/Statistics.hh b/contrib/libs/apache/orc/c++/src/Statistics.hh index ee9db23f86..849019d8d7 100644 --- a/contrib/libs/apache/orc/c++/src/Statistics.hh +++ b/contrib/libs/apache/orc/c++/src/Statistics.hh @@ -1,971 +1,971 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ORC_STATISTICS_IMPL_HH -#define ORC_STATISTICS_IMPL_HH - -#include "orc/Common.hh" -#include "orc/Int128.hh" -#include "orc/OrcFile.hh" -#include "orc/Reader.hh" - -#include "Timezone.hh" -#include "TypeImpl.hh" - -namespace orc { - -/** - * StatContext contains fields required to compute statistics - */ - - struct StatContext { - const bool correctStats; - const Timezone* const writerTimezone; - StatContext() : correctStats(false), writerTimezone(nullptr) {} - StatContext(bool cStat, const Timezone* const timezone = nullptr) : - correctStats(cStat), writerTimezone(timezone) {} - }; - -/** - * Internal Statistics Implementation - */ - - template <typename T> - class InternalStatisticsImpl { - private: - bool _hasNull; - bool _hasMinimum; - bool _hasMaximum; - bool _hasSum; - bool _hasTotalLength; - uint64_t _totalLength; - uint64_t _valueCount; - T _minimum; - T _maximum; - T _sum; - public: - InternalStatisticsImpl() { - _hasNull = false; - _hasMinimum = false; - _hasMaximum = false; - _hasSum = false; - _hasTotalLength = false; - _totalLength = 0; - _valueCount = 0; - } - - ~InternalStatisticsImpl() {} - - // GET / SET _totalLength - bool hasTotalLength() const { return _hasTotalLength; } - - void setHasTotalLength(bool hasTotalLength) { - _hasTotalLength = hasTotalLength; - } - - uint64_t getTotalLength() const { return _totalLength; } - - void setTotalLength(uint64_t totalLength) { _totalLength = totalLength; } - - // GET / SET _sum - bool hasSum() const { return _hasSum; } - - void setHasSum(bool hasSum) { _hasSum = hasSum; } - - T getSum() const { return _sum; } - - void setSum(T sum) { _sum = sum; } - - // GET / SET _maximum - bool hasMaximum() const { return _hasMaximum; } - +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ORC_STATISTICS_IMPL_HH +#define ORC_STATISTICS_IMPL_HH + +#include "orc/Common.hh" +#include "orc/Int128.hh" +#include "orc/OrcFile.hh" +#include "orc/Reader.hh" + +#include "Timezone.hh" +#include "TypeImpl.hh" + +namespace orc { + +/** + * StatContext contains fields required to compute statistics + */ + + struct StatContext { + const bool correctStats; + const Timezone* const writerTimezone; + StatContext() : correctStats(false), writerTimezone(nullptr) {} + StatContext(bool cStat, const Timezone* const timezone = nullptr) : + correctStats(cStat), writerTimezone(timezone) {} + }; + +/** + * Internal Statistics Implementation + */ + + template <typename T> + class InternalStatisticsImpl { + private: + bool _hasNull; + bool _hasMinimum; + bool _hasMaximum; + bool _hasSum; + bool _hasTotalLength; + uint64_t _totalLength; + uint64_t _valueCount; + T _minimum; + T _maximum; + T _sum; + public: + InternalStatisticsImpl() { + _hasNull = false; + _hasMinimum = false; + _hasMaximum = false; + _hasSum = false; + _hasTotalLength = false; + _totalLength = 0; + _valueCount = 0; + } + + ~InternalStatisticsImpl() {} + + // GET / SET _totalLength + bool hasTotalLength() const { return _hasTotalLength; } + + void setHasTotalLength(bool hasTotalLength) { + _hasTotalLength = hasTotalLength; + } + + uint64_t getTotalLength() const { return _totalLength; } + + void setTotalLength(uint64_t totalLength) { _totalLength = totalLength; } + + // GET / SET _sum + bool hasSum() const { return _hasSum; } + + void setHasSum(bool hasSum) { _hasSum = hasSum; } + + T getSum() const { return _sum; } + + void setSum(T sum) { _sum = sum; } + + // GET / SET _maximum + bool hasMaximum() const { return _hasMaximum; } + const T & getMaximum() const { return _maximum; } - - void setHasMaximum(bool hasMax) { _hasMaximum = hasMax; } - - void setMaximum(T max) { _maximum = max; } - - // GET / SET _minimum - bool hasMinimum() const { return _hasMinimum; } - - void setHasMinimum(bool hasMin) { _hasMinimum = hasMin; } - + + void setHasMaximum(bool hasMax) { _hasMaximum = hasMax; } + + void setMaximum(T max) { _maximum = max; } + + // GET / SET _minimum + bool hasMinimum() const { return _hasMinimum; } + + void setHasMinimum(bool hasMin) { _hasMinimum = hasMin; } + const T & getMinimum() const { return _minimum; } - - void setMinimum(T min) { _minimum = min; } - - // GET / SET _valueCount - uint64_t getNumberOfValues() const { return _valueCount; } - - void setNumberOfValues(uint64_t numValues) { _valueCount = numValues; } - - // GET / SET _hasNullValue - bool hasNull() const { return _hasNull; } - - void setHasNull(bool hasNull) { _hasNull = hasNull; } - - void reset() { - _hasNull = false; - _hasMinimum = false; - _hasMaximum = false; - _hasSum = false; - _hasTotalLength = false; - _totalLength = 0; - _valueCount = 0; - } - - void updateMinMax(T value) { - if (!_hasMinimum) { - _hasMinimum = _hasMaximum = true; - _minimum = _maximum = value; - } else if (compare(value, _minimum)) { - _minimum = value; - } else if (compare(_maximum, value)) { - _maximum = value; - } - } - - // sum is not merged here as we need to check overflow - void merge(const InternalStatisticsImpl& other) { - _hasNull = _hasNull || other._hasNull; - _valueCount += other._valueCount; - - if (other._hasMinimum) { - if (!_hasMinimum) { - _hasMinimum = _hasMaximum = true; - _minimum = other._minimum; - _maximum = other._maximum; - } else { - // all template types should support operator< - if (compare(_maximum, other._maximum)) { - _maximum = other._maximum; - } - if (compare(other._minimum, _minimum)) { - _minimum = other._minimum; - } - } - } - - _hasTotalLength = _hasTotalLength && other._hasTotalLength; - _totalLength += other._totalLength; - } - }; - - typedef InternalStatisticsImpl<char> InternalCharStatistics; - typedef InternalStatisticsImpl<char> InternalBooleanStatistics; - typedef InternalStatisticsImpl<int64_t> InternalIntegerStatistics; - typedef InternalStatisticsImpl<int32_t> InternalDateStatistics; - typedef InternalStatisticsImpl<double> InternalDoubleStatistics; - typedef InternalStatisticsImpl<Decimal> InternalDecimalStatistics; - typedef InternalStatisticsImpl<std::string> InternalStringStatistics; - - /** - * Mutable column statistics for use by the writer. - */ - class MutableColumnStatistics { - public: - virtual ~MutableColumnStatistics(); - - virtual void increase(uint64_t count) = 0; - - virtual void setNumberOfValues(uint64_t value) = 0; - - virtual void setHasNull(bool hasNull) = 0; - - virtual void merge(const MutableColumnStatistics& other) = 0; - - virtual void reset() = 0; - - virtual void toProtoBuf(proto::ColumnStatistics& pbStats) const = 0; - }; - -/** - * ColumnStatistics Implementation - */ - - class ColumnStatisticsImpl: public ColumnStatistics, - public MutableColumnStatistics { - private: - InternalCharStatistics _stats; - public: - ColumnStatisticsImpl() { reset(); } - ColumnStatisticsImpl(const proto::ColumnStatistics& stats); - virtual ~ColumnStatisticsImpl() override; - - uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); - } - - void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); - } - - void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); - } - - bool hasNull() const override { - return _stats.hasNull(); - } - - void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); - } - - void merge(const MutableColumnStatistics& other) override { - _stats.merge(dynamic_cast<const ColumnStatisticsImpl&>(other)._stats); - } - - void reset() override { - _stats.reset(); - } - - void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); - } - - std::string toString() const override { - std::ostringstream buffer; - buffer << "Column has " << getNumberOfValues() << " values" - << " and has null value: " << (hasNull() ? "yes" : "no") - << std::endl; - return buffer.str(); - } - }; - - class BinaryColumnStatisticsImpl: public BinaryColumnStatistics, - public MutableColumnStatistics { - private: - InternalCharStatistics _stats; - public: - BinaryColumnStatisticsImpl() { reset(); } - BinaryColumnStatisticsImpl(const proto::ColumnStatistics& stats, - const StatContext& statContext); - virtual ~BinaryColumnStatisticsImpl() override; - - uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); - } - - void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); - } - - void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); - } - - bool hasNull() const override { - return _stats.hasNull(); - } - - void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); - } - - bool hasTotalLength() const override { - return _stats.hasTotalLength(); - } - - uint64_t getTotalLength() const override { - if(hasTotalLength()){ - return _stats.getTotalLength(); - }else{ - throw ParseError("Total length is not defined."); - } - } - - void setTotalLength(uint64_t length) { - _stats.setHasTotalLength(true); - _stats.setTotalLength(length); - } - - void update(size_t length) { - _stats.setTotalLength(_stats.getTotalLength() + length); - } - - void merge(const MutableColumnStatistics& other) override { - const BinaryColumnStatisticsImpl& binStats = - dynamic_cast<const BinaryColumnStatisticsImpl&>(other); - _stats.merge(binStats._stats); - } - - void reset() override { - _stats.reset(); - setTotalLength(0); - } - - void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); - - proto::BinaryStatistics* binStats = pbStats.mutable_binarystatistics(); - binStats->set_sum(static_cast<int64_t>(_stats.getTotalLength())); - } - - std::string toString() const override { - std::ostringstream buffer; - buffer << "Data type: Binary" << std::endl - << "Values: " << getNumberOfValues() << std::endl - << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; - if(hasTotalLength()){ - buffer << "Total length: " << getTotalLength() << std::endl; - }else{ - buffer << "Total length: not defined" << std::endl; - } - return buffer.str(); - } - }; - - class BooleanColumnStatisticsImpl: public BooleanColumnStatistics, - public MutableColumnStatistics { - private: - InternalBooleanStatistics _stats; - bool _hasCount; - uint64_t _trueCount; - - public: - BooleanColumnStatisticsImpl() { reset(); } - BooleanColumnStatisticsImpl(const proto::ColumnStatistics& stats, - const StatContext& statContext); - virtual ~BooleanColumnStatisticsImpl() override; - - bool hasCount() const override { - return _hasCount; - } - - void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); - _hasCount = true; - } - - uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); - } - - void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); - } - - bool hasNull() const override { - return _stats.hasNull(); - } - - void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); - } - - uint64_t getFalseCount() const override { - if(hasCount()){ - return getNumberOfValues() - _trueCount; - }else{ - throw ParseError("False count is not defined."); - } - } - - uint64_t getTrueCount() const override { - if(hasCount()){ - return _trueCount; - }else{ - throw ParseError("True count is not defined."); - } - } - - void setTrueCount(uint64_t trueCount) { - _hasCount = true; - _trueCount = trueCount; - } - - void update(bool value, size_t repetitions) { - if (value) { - _trueCount += repetitions; - } - } - - void merge(const MutableColumnStatistics& other) override { - const BooleanColumnStatisticsImpl& boolStats = - dynamic_cast<const BooleanColumnStatisticsImpl&>(other); - _stats.merge(boolStats._stats); - _hasCount = _hasCount && boolStats._hasCount; - _trueCount += boolStats._trueCount; - } - - void reset() override { - _stats.reset(); - setTrueCount(0); - } - - void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); - - proto::BucketStatistics* bucketStats = pbStats.mutable_bucketstatistics(); - if (_hasCount) { - bucketStats->add_count(_trueCount); - } else { - bucketStats->clear_count(); - } - } - - std::string toString() const override { - std::ostringstream buffer; - buffer << "Data type: Boolean" << std::endl - << "Values: " << getNumberOfValues() << std::endl - << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; - if(hasCount()){ - buffer << "(true: " << getTrueCount() << "; false: " - << getFalseCount() << ")" << std::endl; - } else { - buffer << "(true: not defined; false: not defined)" << std::endl; - buffer << "True and false counts are not defined" << std::endl; - } - return buffer.str(); - } - }; - - class DateColumnStatisticsImpl: public DateColumnStatistics, - public MutableColumnStatistics{ - private: - InternalDateStatistics _stats; - public: - DateColumnStatisticsImpl() { reset(); } - DateColumnStatisticsImpl(const proto::ColumnStatistics& stats, - const StatContext& statContext); - virtual ~DateColumnStatisticsImpl() override; - - bool hasMinimum() const override { - return _stats.hasMinimum(); - } - - bool hasMaximum() const override { - return _stats.hasMaximum(); - } - - void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); - } - - uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); - } - - void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); - } - - bool hasNull() const override { - return _stats.hasNull(); - } - - void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); - } - - int32_t getMinimum() const override { - if(hasMinimum()){ - return _stats.getMinimum(); - }else{ - throw ParseError("Minimum is not defined."); - } - } - - int32_t getMaximum() const override { - if(hasMaximum()){ - return _stats.getMaximum(); - }else{ - throw ParseError("Maximum is not defined."); - } - } - - void setMinimum(int32_t minimum) { - _stats.setHasMinimum(true); - _stats.setMinimum(minimum); - } - - void setMaximum(int32_t maximum) { - _stats.setHasMaximum(true); - _stats.setMaximum(maximum); - } - - void update(int32_t value) { - _stats.updateMinMax(value); - } - - void merge(const MutableColumnStatistics& other) override { - const DateColumnStatisticsImpl& dateStats = - dynamic_cast<const DateColumnStatisticsImpl&>(other); - _stats.merge(dateStats._stats); - } - - void reset() override { - _stats.reset(); - } - - void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); - - proto::DateStatistics* dateStatistics = - pbStats.mutable_datestatistics(); - if (_stats.hasMinimum()) { - dateStatistics->set_maximum(_stats.getMaximum()); - dateStatistics->set_minimum(_stats.getMinimum()); - } else { - dateStatistics->clear_minimum(); - dateStatistics->clear_maximum(); - } - } - - std::string toString() const override { - std::ostringstream buffer; - buffer << "Data type: Date" << std::endl - << "Values: " << getNumberOfValues() << std::endl - << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; - if(hasMinimum()){ - buffer << "Minimum: " << getMinimum() << std::endl; - }else{ - buffer << "Minimum: not defined" << std::endl; - } - - if(hasMaximum()){ - buffer << "Maximum: " << getMaximum() << std::endl; - }else{ - buffer << "Maximum: not defined" << std::endl; - } - return buffer.str(); - } - }; - - class DecimalColumnStatisticsImpl: public DecimalColumnStatistics, - public MutableColumnStatistics { - private: - InternalDecimalStatistics _stats; - - public: - DecimalColumnStatisticsImpl() { reset(); } - DecimalColumnStatisticsImpl(const proto::ColumnStatistics& stats, - const StatContext& statContext); - virtual ~DecimalColumnStatisticsImpl() override; - - bool hasMinimum() const override { - return _stats.hasMinimum(); - } - - bool hasMaximum() const override { - return _stats.hasMaximum(); - } - - bool hasSum() const override { - return _stats.hasSum(); - } - - void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); - } - - uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); - } - - void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); - } - - bool hasNull() const override { - return _stats.hasNull(); - } - - void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); - } - - Decimal getMinimum() const override { - if(hasMinimum()){ - return _stats.getMinimum(); - }else{ - throw ParseError("Minimum is not defined."); - } - } - - Decimal getMaximum() const override { - if(hasMaximum()){ - return _stats.getMaximum(); - }else{ - throw ParseError("Maximum is not defined."); - } - } - - void setMinimum(Decimal minimum) { - _stats.setHasMinimum(true); - _stats.setMinimum(minimum); - } - - void setMaximum(Decimal maximum) { - _stats.setHasMaximum(true); - _stats.setMaximum(maximum); - } - - Decimal getSum() const override { - if(hasSum()){ - return _stats.getSum(); - }else{ - throw ParseError("Sum is not defined."); - } - } - - void setSum(Decimal sum) { - _stats.setHasSum(true); - _stats.setSum(sum); - } - - void update(const Decimal& value) { - _stats.updateMinMax(value); - - if (_stats.hasSum()) { - updateSum(value); - } - } - - void merge(const MutableColumnStatistics& other) override { - const DecimalColumnStatisticsImpl& decStats = - dynamic_cast<const DecimalColumnStatisticsImpl&>(other); - - _stats.merge(decStats._stats); - - _stats.setHasSum(_stats.hasSum() && decStats.hasSum()); - if (_stats.hasSum()) { - updateSum(decStats.getSum()); - } - } - - void reset() override { - _stats.reset(); - setSum(Decimal()); - } - - void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); - - proto::DecimalStatistics* decStats = pbStats.mutable_decimalstatistics(); - if (_stats.hasMinimum()) { - decStats->set_minimum(TString(_stats.getMinimum().toString())); - decStats->set_maximum(TString(_stats.getMaximum().toString())); - } else { - decStats->clear_minimum(); - decStats->clear_maximum(); - } - if (_stats.hasSum()) { - decStats->set_sum(TString(_stats.getSum().toString())); - } else { - decStats->clear_sum(); - } - } - - std::string toString() const override { - std::ostringstream buffer; - buffer << "Data type: Decimal" << std::endl - << "Values: " << getNumberOfValues() << std::endl - << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; - if(hasMinimum()){ - buffer << "Minimum: " << getMinimum().toString() << std::endl; - }else{ - buffer << "Minimum: not defined" << std::endl; - } - - if(hasMaximum()){ - buffer << "Maximum: " << getMaximum().toString() << std::endl; - }else{ - buffer << "Maximum: not defined" << std::endl; - } - - if(hasSum()){ - buffer << "Sum: " << getSum().toString() << std::endl; - }else{ - buffer << "Sum: not defined" << std::endl; - } - - return buffer.str(); - } - - private: - void updateSum(Decimal value) { - if (_stats.hasSum()) { - bool overflow = false; - Decimal sum = _stats.getSum(); - if (sum.scale > value.scale) { - value.value = scaleUpInt128ByPowerOfTen(value.value, - sum.scale - value.scale, - overflow); - } else if (sum.scale < value.scale) { - sum.value = scaleUpInt128ByPowerOfTen(sum.value, - value.scale - sum.scale, - overflow); - sum.scale = value.scale; - } - - if (!overflow) { - bool wasPositive = sum.value >= 0; - sum.value += value.value; - if ((value.value >= 0) == wasPositive) { - _stats.setHasSum((sum.value >= 0) == wasPositive); - } - } else { - _stats.setHasSum(false); - } - - if (_stats.hasSum()) { - _stats.setSum(sum); - } - } - } - }; - - class DoubleColumnStatisticsImpl: public DoubleColumnStatistics, - public MutableColumnStatistics { - private: - InternalDoubleStatistics _stats; - public: - DoubleColumnStatisticsImpl() { reset(); } - DoubleColumnStatisticsImpl(const proto::ColumnStatistics& stats); - virtual ~DoubleColumnStatisticsImpl() override; - - bool hasMinimum() const override { - return _stats.hasMinimum(); - } - - bool hasMaximum() const override { - return _stats.hasMaximum(); - } - - bool hasSum() const override { - return _stats.hasSum(); - } - - void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); - } - - uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); - } - - void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); - } - - bool hasNull() const override { - return _stats.hasNull(); - } - - void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); - } - - double getMinimum() const override { - if(hasMinimum()){ - return _stats.getMinimum(); - }else{ - throw ParseError("Minimum is not defined."); - } - } - - double getMaximum() const override { - if(hasMaximum()){ - return _stats.getMaximum(); - }else{ - throw ParseError("Maximum is not defined."); - } - } - - void setMinimum(double minimum) { - _stats.setHasMinimum(true); - _stats.setMinimum(minimum); - } - - void setMaximum(double maximum) { - _stats.setHasMaximum(true); - _stats.setMaximum(maximum); - } - - double getSum() const override { - if(hasSum()){ - return _stats.getSum(); - }else{ - throw ParseError("Sum is not defined."); - } - } - - void setSum(double sum) { - _stats.setHasSum(true); - _stats.setSum(sum); - } - - void update(double value) { - _stats.updateMinMax(value); - _stats.setSum(_stats.getSum() + value); - } - - void merge(const MutableColumnStatistics& other) override { - const DoubleColumnStatisticsImpl& doubleStats = - dynamic_cast<const DoubleColumnStatisticsImpl&>(other); - _stats.merge(doubleStats._stats); - - _stats.setHasSum(_stats.hasSum() && doubleStats.hasSum()); - if (_stats.hasSum()) { - _stats.setSum(_stats.getSum() + doubleStats.getSum()); - } - } - - void reset() override { - _stats.reset(); - setSum(0.0); - } - - void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); - - proto::DoubleStatistics* doubleStats = pbStats.mutable_doublestatistics(); - if (_stats.hasMinimum()) { - doubleStats->set_minimum(_stats.getMinimum()); - doubleStats->set_maximum(_stats.getMaximum()); - } else { - doubleStats->clear_minimum(); - doubleStats->clear_maximum(); - } - if (_stats.hasSum()) { - doubleStats->set_sum(_stats.getSum()); - } else { - doubleStats->clear_sum(); - } - } - - std::string toString() const override { - std::ostringstream buffer; - buffer << "Data type: Double" << std::endl - << "Values: " << getNumberOfValues() << std::endl - << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; - if(hasMinimum()){ - buffer << "Minimum: " << getMinimum() << std::endl; - }else{ - buffer << "Minimum: not defined" << std::endl; - } - - if(hasMaximum()){ - buffer << "Maximum: " << getMaximum() << std::endl; - }else{ - buffer << "Maximum: not defined" << std::endl; - } - - if(hasSum()){ - buffer << "Sum: " << getSum() << std::endl; - }else{ - buffer << "Sum: not defined" << std::endl; - } - return buffer.str(); - } - }; - - class IntegerColumnStatisticsImpl: public IntegerColumnStatistics, - public MutableColumnStatistics { - private: - InternalIntegerStatistics _stats; - public: - IntegerColumnStatisticsImpl() { reset(); } - IntegerColumnStatisticsImpl(const proto::ColumnStatistics& stats); - virtual ~IntegerColumnStatisticsImpl() override; - - bool hasMinimum() const override { - return _stats.hasMinimum(); - } - - bool hasMaximum() const override { - return _stats.hasMaximum(); - } - - bool hasSum() const override { - return _stats.hasSum(); - } - - void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); - } - - uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); - } - - void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); - } - - bool hasNull() const override { - return _stats.hasNull(); - } - - void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); - } - - int64_t getMinimum() const override { - if(hasMinimum()){ - return _stats.getMinimum(); - }else{ - throw ParseError("Minimum is not defined."); - } - } - - int64_t getMaximum() const override { - if(hasMaximum()){ - return _stats.getMaximum(); - }else{ - throw ParseError("Maximum is not defined."); - } - } - - void setMinimum(int64_t minimum) { - _stats.setHasMinimum(true); - _stats.setMinimum(minimum); - } - - void setMaximum(int64_t maximum) { - _stats.setHasMaximum(true); - _stats.setMaximum(maximum); - } - - int64_t getSum() const override { - if(hasSum()){ - return _stats.getSum(); - }else{ - throw ParseError("Sum is not defined."); - } - } - - void setSum(int64_t sum) { - _stats.setHasSum(true); - _stats.setSum(sum); - } - + + void setMinimum(T min) { _minimum = min; } + + // GET / SET _valueCount + uint64_t getNumberOfValues() const { return _valueCount; } + + void setNumberOfValues(uint64_t numValues) { _valueCount = numValues; } + + // GET / SET _hasNullValue + bool hasNull() const { return _hasNull; } + + void setHasNull(bool hasNull) { _hasNull = hasNull; } + + void reset() { + _hasNull = false; + _hasMinimum = false; + _hasMaximum = false; + _hasSum = false; + _hasTotalLength = false; + _totalLength = 0; + _valueCount = 0; + } + + void updateMinMax(T value) { + if (!_hasMinimum) { + _hasMinimum = _hasMaximum = true; + _minimum = _maximum = value; + } else if (compare(value, _minimum)) { + _minimum = value; + } else if (compare(_maximum, value)) { + _maximum = value; + } + } + + // sum is not merged here as we need to check overflow + void merge(const InternalStatisticsImpl& other) { + _hasNull = _hasNull || other._hasNull; + _valueCount += other._valueCount; + + if (other._hasMinimum) { + if (!_hasMinimum) { + _hasMinimum = _hasMaximum = true; + _minimum = other._minimum; + _maximum = other._maximum; + } else { + // all template types should support operator< + if (compare(_maximum, other._maximum)) { + _maximum = other._maximum; + } + if (compare(other._minimum, _minimum)) { + _minimum = other._minimum; + } + } + } + + _hasTotalLength = _hasTotalLength && other._hasTotalLength; + _totalLength += other._totalLength; + } + }; + + typedef InternalStatisticsImpl<char> InternalCharStatistics; + typedef InternalStatisticsImpl<char> InternalBooleanStatistics; + typedef InternalStatisticsImpl<int64_t> InternalIntegerStatistics; + typedef InternalStatisticsImpl<int32_t> InternalDateStatistics; + typedef InternalStatisticsImpl<double> InternalDoubleStatistics; + typedef InternalStatisticsImpl<Decimal> InternalDecimalStatistics; + typedef InternalStatisticsImpl<std::string> InternalStringStatistics; + + /** + * Mutable column statistics for use by the writer. + */ + class MutableColumnStatistics { + public: + virtual ~MutableColumnStatistics(); + + virtual void increase(uint64_t count) = 0; + + virtual void setNumberOfValues(uint64_t value) = 0; + + virtual void setHasNull(bool hasNull) = 0; + + virtual void merge(const MutableColumnStatistics& other) = 0; + + virtual void reset() = 0; + + virtual void toProtoBuf(proto::ColumnStatistics& pbStats) const = 0; + }; + +/** + * ColumnStatistics Implementation + */ + + class ColumnStatisticsImpl: public ColumnStatistics, + public MutableColumnStatistics { + private: + InternalCharStatistics _stats; + public: + ColumnStatisticsImpl() { reset(); } + ColumnStatisticsImpl(const proto::ColumnStatistics& stats); + virtual ~ColumnStatisticsImpl() override; + + uint64_t getNumberOfValues() const override { + return _stats.getNumberOfValues(); + } + + void setNumberOfValues(uint64_t value) override { + _stats.setNumberOfValues(value); + } + + void increase(uint64_t count) override { + _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + } + + bool hasNull() const override { + return _stats.hasNull(); + } + + void setHasNull(bool hasNull) override { + _stats.setHasNull(hasNull); + } + + void merge(const MutableColumnStatistics& other) override { + _stats.merge(dynamic_cast<const ColumnStatisticsImpl&>(other)._stats); + } + + void reset() override { + _stats.reset(); + } + + void toProtoBuf(proto::ColumnStatistics& pbStats) const override { + pbStats.set_hasnull(_stats.hasNull()); + pbStats.set_numberofvalues(_stats.getNumberOfValues()); + } + + std::string toString() const override { + std::ostringstream buffer; + buffer << "Column has " << getNumberOfValues() << " values" + << " and has null value: " << (hasNull() ? "yes" : "no") + << std::endl; + return buffer.str(); + } + }; + + class BinaryColumnStatisticsImpl: public BinaryColumnStatistics, + public MutableColumnStatistics { + private: + InternalCharStatistics _stats; + public: + BinaryColumnStatisticsImpl() { reset(); } + BinaryColumnStatisticsImpl(const proto::ColumnStatistics& stats, + const StatContext& statContext); + virtual ~BinaryColumnStatisticsImpl() override; + + uint64_t getNumberOfValues() const override { + return _stats.getNumberOfValues(); + } + + void setNumberOfValues(uint64_t value) override { + _stats.setNumberOfValues(value); + } + + void increase(uint64_t count) override { + _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + } + + bool hasNull() const override { + return _stats.hasNull(); + } + + void setHasNull(bool hasNull) override { + _stats.setHasNull(hasNull); + } + + bool hasTotalLength() const override { + return _stats.hasTotalLength(); + } + + uint64_t getTotalLength() const override { + if(hasTotalLength()){ + return _stats.getTotalLength(); + }else{ + throw ParseError("Total length is not defined."); + } + } + + void setTotalLength(uint64_t length) { + _stats.setHasTotalLength(true); + _stats.setTotalLength(length); + } + + void update(size_t length) { + _stats.setTotalLength(_stats.getTotalLength() + length); + } + + void merge(const MutableColumnStatistics& other) override { + const BinaryColumnStatisticsImpl& binStats = + dynamic_cast<const BinaryColumnStatisticsImpl&>(other); + _stats.merge(binStats._stats); + } + + void reset() override { + _stats.reset(); + setTotalLength(0); + } + + void toProtoBuf(proto::ColumnStatistics& pbStats) const override { + pbStats.set_hasnull(_stats.hasNull()); + pbStats.set_numberofvalues(_stats.getNumberOfValues()); + + proto::BinaryStatistics* binStats = pbStats.mutable_binarystatistics(); + binStats->set_sum(static_cast<int64_t>(_stats.getTotalLength())); + } + + std::string toString() const override { + std::ostringstream buffer; + buffer << "Data type: Binary" << std::endl + << "Values: " << getNumberOfValues() << std::endl + << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; + if(hasTotalLength()){ + buffer << "Total length: " << getTotalLength() << std::endl; + }else{ + buffer << "Total length: not defined" << std::endl; + } + return buffer.str(); + } + }; + + class BooleanColumnStatisticsImpl: public BooleanColumnStatistics, + public MutableColumnStatistics { + private: + InternalBooleanStatistics _stats; + bool _hasCount; + uint64_t _trueCount; + + public: + BooleanColumnStatisticsImpl() { reset(); } + BooleanColumnStatisticsImpl(const proto::ColumnStatistics& stats, + const StatContext& statContext); + virtual ~BooleanColumnStatisticsImpl() override; + + bool hasCount() const override { + return _hasCount; + } + + void increase(uint64_t count) override { + _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + _hasCount = true; + } + + uint64_t getNumberOfValues() const override { + return _stats.getNumberOfValues(); + } + + void setNumberOfValues(uint64_t value) override { + _stats.setNumberOfValues(value); + } + + bool hasNull() const override { + return _stats.hasNull(); + } + + void setHasNull(bool hasNull) override { + _stats.setHasNull(hasNull); + } + + uint64_t getFalseCount() const override { + if(hasCount()){ + return getNumberOfValues() - _trueCount; + }else{ + throw ParseError("False count is not defined."); + } + } + + uint64_t getTrueCount() const override { + if(hasCount()){ + return _trueCount; + }else{ + throw ParseError("True count is not defined."); + } + } + + void setTrueCount(uint64_t trueCount) { + _hasCount = true; + _trueCount = trueCount; + } + + void update(bool value, size_t repetitions) { + if (value) { + _trueCount += repetitions; + } + } + + void merge(const MutableColumnStatistics& other) override { + const BooleanColumnStatisticsImpl& boolStats = + dynamic_cast<const BooleanColumnStatisticsImpl&>(other); + _stats.merge(boolStats._stats); + _hasCount = _hasCount && boolStats._hasCount; + _trueCount += boolStats._trueCount; + } + + void reset() override { + _stats.reset(); + setTrueCount(0); + } + + void toProtoBuf(proto::ColumnStatistics& pbStats) const override { + pbStats.set_hasnull(_stats.hasNull()); + pbStats.set_numberofvalues(_stats.getNumberOfValues()); + + proto::BucketStatistics* bucketStats = pbStats.mutable_bucketstatistics(); + if (_hasCount) { + bucketStats->add_count(_trueCount); + } else { + bucketStats->clear_count(); + } + } + + std::string toString() const override { + std::ostringstream buffer; + buffer << "Data type: Boolean" << std::endl + << "Values: " << getNumberOfValues() << std::endl + << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; + if(hasCount()){ + buffer << "(true: " << getTrueCount() << "; false: " + << getFalseCount() << ")" << std::endl; + } else { + buffer << "(true: not defined; false: not defined)" << std::endl; + buffer << "True and false counts are not defined" << std::endl; + } + return buffer.str(); + } + }; + + class DateColumnStatisticsImpl: public DateColumnStatistics, + public MutableColumnStatistics{ + private: + InternalDateStatistics _stats; + public: + DateColumnStatisticsImpl() { reset(); } + DateColumnStatisticsImpl(const proto::ColumnStatistics& stats, + const StatContext& statContext); + virtual ~DateColumnStatisticsImpl() override; + + bool hasMinimum() const override { + return _stats.hasMinimum(); + } + + bool hasMaximum() const override { + return _stats.hasMaximum(); + } + + void increase(uint64_t count) override { + _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + } + + uint64_t getNumberOfValues() const override { + return _stats.getNumberOfValues(); + } + + void setNumberOfValues(uint64_t value) override { + _stats.setNumberOfValues(value); + } + + bool hasNull() const override { + return _stats.hasNull(); + } + + void setHasNull(bool hasNull) override { + _stats.setHasNull(hasNull); + } + + int32_t getMinimum() const override { + if(hasMinimum()){ + return _stats.getMinimum(); + }else{ + throw ParseError("Minimum is not defined."); + } + } + + int32_t getMaximum() const override { + if(hasMaximum()){ + return _stats.getMaximum(); + }else{ + throw ParseError("Maximum is not defined."); + } + } + + void setMinimum(int32_t minimum) { + _stats.setHasMinimum(true); + _stats.setMinimum(minimum); + } + + void setMaximum(int32_t maximum) { + _stats.setHasMaximum(true); + _stats.setMaximum(maximum); + } + + void update(int32_t value) { + _stats.updateMinMax(value); + } + + void merge(const MutableColumnStatistics& other) override { + const DateColumnStatisticsImpl& dateStats = + dynamic_cast<const DateColumnStatisticsImpl&>(other); + _stats.merge(dateStats._stats); + } + + void reset() override { + _stats.reset(); + } + + void toProtoBuf(proto::ColumnStatistics& pbStats) const override { + pbStats.set_hasnull(_stats.hasNull()); + pbStats.set_numberofvalues(_stats.getNumberOfValues()); + + proto::DateStatistics* dateStatistics = + pbStats.mutable_datestatistics(); + if (_stats.hasMinimum()) { + dateStatistics->set_maximum(_stats.getMaximum()); + dateStatistics->set_minimum(_stats.getMinimum()); + } else { + dateStatistics->clear_minimum(); + dateStatistics->clear_maximum(); + } + } + + std::string toString() const override { + std::ostringstream buffer; + buffer << "Data type: Date" << std::endl + << "Values: " << getNumberOfValues() << std::endl + << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; + if(hasMinimum()){ + buffer << "Minimum: " << getMinimum() << std::endl; + }else{ + buffer << "Minimum: not defined" << std::endl; + } + + if(hasMaximum()){ + buffer << "Maximum: " << getMaximum() << std::endl; + }else{ + buffer << "Maximum: not defined" << std::endl; + } + return buffer.str(); + } + }; + + class DecimalColumnStatisticsImpl: public DecimalColumnStatistics, + public MutableColumnStatistics { + private: + InternalDecimalStatistics _stats; + + public: + DecimalColumnStatisticsImpl() { reset(); } + DecimalColumnStatisticsImpl(const proto::ColumnStatistics& stats, + const StatContext& statContext); + virtual ~DecimalColumnStatisticsImpl() override; + + bool hasMinimum() const override { + return _stats.hasMinimum(); + } + + bool hasMaximum() const override { + return _stats.hasMaximum(); + } + + bool hasSum() const override { + return _stats.hasSum(); + } + + void increase(uint64_t count) override { + _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + } + + uint64_t getNumberOfValues() const override { + return _stats.getNumberOfValues(); + } + + void setNumberOfValues(uint64_t value) override { + _stats.setNumberOfValues(value); + } + + bool hasNull() const override { + return _stats.hasNull(); + } + + void setHasNull(bool hasNull) override { + _stats.setHasNull(hasNull); + } + + Decimal getMinimum() const override { + if(hasMinimum()){ + return _stats.getMinimum(); + }else{ + throw ParseError("Minimum is not defined."); + } + } + + Decimal getMaximum() const override { + if(hasMaximum()){ + return _stats.getMaximum(); + }else{ + throw ParseError("Maximum is not defined."); + } + } + + void setMinimum(Decimal minimum) { + _stats.setHasMinimum(true); + _stats.setMinimum(minimum); + } + + void setMaximum(Decimal maximum) { + _stats.setHasMaximum(true); + _stats.setMaximum(maximum); + } + + Decimal getSum() const override { + if(hasSum()){ + return _stats.getSum(); + }else{ + throw ParseError("Sum is not defined."); + } + } + + void setSum(Decimal sum) { + _stats.setHasSum(true); + _stats.setSum(sum); + } + + void update(const Decimal& value) { + _stats.updateMinMax(value); + + if (_stats.hasSum()) { + updateSum(value); + } + } + + void merge(const MutableColumnStatistics& other) override { + const DecimalColumnStatisticsImpl& decStats = + dynamic_cast<const DecimalColumnStatisticsImpl&>(other); + + _stats.merge(decStats._stats); + + _stats.setHasSum(_stats.hasSum() && decStats.hasSum()); + if (_stats.hasSum()) { + updateSum(decStats.getSum()); + } + } + + void reset() override { + _stats.reset(); + setSum(Decimal()); + } + + void toProtoBuf(proto::ColumnStatistics& pbStats) const override { + pbStats.set_hasnull(_stats.hasNull()); + pbStats.set_numberofvalues(_stats.getNumberOfValues()); + + proto::DecimalStatistics* decStats = pbStats.mutable_decimalstatistics(); + if (_stats.hasMinimum()) { + decStats->set_minimum(TString(_stats.getMinimum().toString())); + decStats->set_maximum(TString(_stats.getMaximum().toString())); + } else { + decStats->clear_minimum(); + decStats->clear_maximum(); + } + if (_stats.hasSum()) { + decStats->set_sum(TString(_stats.getSum().toString())); + } else { + decStats->clear_sum(); + } + } + + std::string toString() const override { + std::ostringstream buffer; + buffer << "Data type: Decimal" << std::endl + << "Values: " << getNumberOfValues() << std::endl + << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; + if(hasMinimum()){ + buffer << "Minimum: " << getMinimum().toString() << std::endl; + }else{ + buffer << "Minimum: not defined" << std::endl; + } + + if(hasMaximum()){ + buffer << "Maximum: " << getMaximum().toString() << std::endl; + }else{ + buffer << "Maximum: not defined" << std::endl; + } + + if(hasSum()){ + buffer << "Sum: " << getSum().toString() << std::endl; + }else{ + buffer << "Sum: not defined" << std::endl; + } + + return buffer.str(); + } + + private: + void updateSum(Decimal value) { + if (_stats.hasSum()) { + bool overflow = false; + Decimal sum = _stats.getSum(); + if (sum.scale > value.scale) { + value.value = scaleUpInt128ByPowerOfTen(value.value, + sum.scale - value.scale, + overflow); + } else if (sum.scale < value.scale) { + sum.value = scaleUpInt128ByPowerOfTen(sum.value, + value.scale - sum.scale, + overflow); + sum.scale = value.scale; + } + + if (!overflow) { + bool wasPositive = sum.value >= 0; + sum.value += value.value; + if ((value.value >= 0) == wasPositive) { + _stats.setHasSum((sum.value >= 0) == wasPositive); + } + } else { + _stats.setHasSum(false); + } + + if (_stats.hasSum()) { + _stats.setSum(sum); + } + } + } + }; + + class DoubleColumnStatisticsImpl: public DoubleColumnStatistics, + public MutableColumnStatistics { + private: + InternalDoubleStatistics _stats; + public: + DoubleColumnStatisticsImpl() { reset(); } + DoubleColumnStatisticsImpl(const proto::ColumnStatistics& stats); + virtual ~DoubleColumnStatisticsImpl() override; + + bool hasMinimum() const override { + return _stats.hasMinimum(); + } + + bool hasMaximum() const override { + return _stats.hasMaximum(); + } + + bool hasSum() const override { + return _stats.hasSum(); + } + + void increase(uint64_t count) override { + _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + } + + uint64_t getNumberOfValues() const override { + return _stats.getNumberOfValues(); + } + + void setNumberOfValues(uint64_t value) override { + _stats.setNumberOfValues(value); + } + + bool hasNull() const override { + return _stats.hasNull(); + } + + void setHasNull(bool hasNull) override { + _stats.setHasNull(hasNull); + } + + double getMinimum() const override { + if(hasMinimum()){ + return _stats.getMinimum(); + }else{ + throw ParseError("Minimum is not defined."); + } + } + + double getMaximum() const override { + if(hasMaximum()){ + return _stats.getMaximum(); + }else{ + throw ParseError("Maximum is not defined."); + } + } + + void setMinimum(double minimum) { + _stats.setHasMinimum(true); + _stats.setMinimum(minimum); + } + + void setMaximum(double maximum) { + _stats.setHasMaximum(true); + _stats.setMaximum(maximum); + } + + double getSum() const override { + if(hasSum()){ + return _stats.getSum(); + }else{ + throw ParseError("Sum is not defined."); + } + } + + void setSum(double sum) { + _stats.setHasSum(true); + _stats.setSum(sum); + } + + void update(double value) { + _stats.updateMinMax(value); + _stats.setSum(_stats.getSum() + value); + } + + void merge(const MutableColumnStatistics& other) override { + const DoubleColumnStatisticsImpl& doubleStats = + dynamic_cast<const DoubleColumnStatisticsImpl&>(other); + _stats.merge(doubleStats._stats); + + _stats.setHasSum(_stats.hasSum() && doubleStats.hasSum()); + if (_stats.hasSum()) { + _stats.setSum(_stats.getSum() + doubleStats.getSum()); + } + } + + void reset() override { + _stats.reset(); + setSum(0.0); + } + + void toProtoBuf(proto::ColumnStatistics& pbStats) const override { + pbStats.set_hasnull(_stats.hasNull()); + pbStats.set_numberofvalues(_stats.getNumberOfValues()); + + proto::DoubleStatistics* doubleStats = pbStats.mutable_doublestatistics(); + if (_stats.hasMinimum()) { + doubleStats->set_minimum(_stats.getMinimum()); + doubleStats->set_maximum(_stats.getMaximum()); + } else { + doubleStats->clear_minimum(); + doubleStats->clear_maximum(); + } + if (_stats.hasSum()) { + doubleStats->set_sum(_stats.getSum()); + } else { + doubleStats->clear_sum(); + } + } + + std::string toString() const override { + std::ostringstream buffer; + buffer << "Data type: Double" << std::endl + << "Values: " << getNumberOfValues() << std::endl + << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; + if(hasMinimum()){ + buffer << "Minimum: " << getMinimum() << std::endl; + }else{ + buffer << "Minimum: not defined" << std::endl; + } + + if(hasMaximum()){ + buffer << "Maximum: " << getMaximum() << std::endl; + }else{ + buffer << "Maximum: not defined" << std::endl; + } + + if(hasSum()){ + buffer << "Sum: " << getSum() << std::endl; + }else{ + buffer << "Sum: not defined" << std::endl; + } + return buffer.str(); + } + }; + + class IntegerColumnStatisticsImpl: public IntegerColumnStatistics, + public MutableColumnStatistics { + private: + InternalIntegerStatistics _stats; + public: + IntegerColumnStatisticsImpl() { reset(); } + IntegerColumnStatisticsImpl(const proto::ColumnStatistics& stats); + virtual ~IntegerColumnStatisticsImpl() override; + + bool hasMinimum() const override { + return _stats.hasMinimum(); + } + + bool hasMaximum() const override { + return _stats.hasMaximum(); + } + + bool hasSum() const override { + return _stats.hasSum(); + } + + void increase(uint64_t count) override { + _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + } + + uint64_t getNumberOfValues() const override { + return _stats.getNumberOfValues(); + } + + void setNumberOfValues(uint64_t value) override { + _stats.setNumberOfValues(value); + } + + bool hasNull() const override { + return _stats.hasNull(); + } + + void setHasNull(bool hasNull) override { + _stats.setHasNull(hasNull); + } + + int64_t getMinimum() const override { + if(hasMinimum()){ + return _stats.getMinimum(); + }else{ + throw ParseError("Minimum is not defined."); + } + } + + int64_t getMaximum() const override { + if(hasMaximum()){ + return _stats.getMaximum(); + }else{ + throw ParseError("Maximum is not defined."); + } + } + + void setMinimum(int64_t minimum) { + _stats.setHasMinimum(true); + _stats.setMinimum(minimum); + } + + void setMaximum(int64_t maximum) { + _stats.setHasMaximum(true); + _stats.setMaximum(maximum); + } + + int64_t getSum() const override { + if(hasSum()){ + return _stats.getSum(); + }else{ + throw ParseError("Sum is not defined."); + } + } + + void setSum(int64_t sum) { + _stats.setHasSum(true); + _stats.setSum(sum); + } + void update(int64_t value, int repetitions) { _stats.updateMinMax(value); - + if (_stats.hasSum()) { if (repetitions > 1) { _stats.setHasSum(multiplyExact(value, repetitions, &value)); @@ -981,498 +981,498 @@ namespace orc { } } - void merge(const MutableColumnStatistics& other) override { - const IntegerColumnStatisticsImpl& intStats = - dynamic_cast<const IntegerColumnStatisticsImpl&>(other); - - _stats.merge(intStats._stats); - - // update sum and check overflow - _stats.setHasSum(_stats.hasSum() && intStats.hasSum()); - if (_stats.hasSum()) { + void merge(const MutableColumnStatistics& other) override { + const IntegerColumnStatisticsImpl& intStats = + dynamic_cast<const IntegerColumnStatisticsImpl&>(other); + + _stats.merge(intStats._stats); + + // update sum and check overflow + _stats.setHasSum(_stats.hasSum() && intStats.hasSum()); + if (_stats.hasSum()) { int64_t value; _stats.setHasSum(addExact(_stats.getSum(), intStats.getSum(), &value)); if (_stats.hasSum()) { _stats.setSum(value); - } - } - } - - void reset() override { - _stats.reset(); - setSum(0); - } - - void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); - - proto::IntegerStatistics* intStats = pbStats.mutable_intstatistics(); - if (_stats.hasMinimum()) { - intStats->set_minimum(_stats.getMinimum()); - intStats->set_maximum(_stats.getMaximum()); - } else { - intStats->clear_minimum(); - intStats->clear_maximum(); - } - if (_stats.hasSum()) { - intStats->set_sum(_stats.getSum()); - } else { - intStats->clear_sum(); - } - } - - std::string toString() const override { - std::ostringstream buffer; - buffer << "Data type: Integer" << std::endl - << "Values: " << getNumberOfValues() << std::endl - << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; - if(hasMinimum()){ - buffer << "Minimum: " << getMinimum() << std::endl; - }else{ - buffer << "Minimum: not defined" << std::endl; - } - - if(hasMaximum()){ - buffer << "Maximum: " << getMaximum() << std::endl; - }else{ - buffer << "Maximum: not defined" << std::endl; - } - - if(hasSum()){ - buffer << "Sum: " << getSum() << std::endl; - }else{ - buffer << "Sum: not defined" << std::endl; - } - return buffer.str(); - } - }; - - class StringColumnStatisticsImpl: public StringColumnStatistics, - public MutableColumnStatistics{ - private: - InternalStringStatistics _stats; - - public: - StringColumnStatisticsImpl() { - reset(); - } - StringColumnStatisticsImpl(const proto::ColumnStatistics& stats, - const StatContext& statContext); - virtual ~StringColumnStatisticsImpl() override; - - bool hasMinimum() const override { - return _stats.hasMinimum(); - } - - bool hasMaximum() const override { - return _stats.hasMaximum(); - } - - bool hasTotalLength() const override { - return _stats.hasTotalLength(); - } - - void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); - } - - uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); - } - - void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); - } - - bool hasNull() const override { - return _stats.hasNull(); - } - - void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); - } - + } + } + } + + void reset() override { + _stats.reset(); + setSum(0); + } + + void toProtoBuf(proto::ColumnStatistics& pbStats) const override { + pbStats.set_hasnull(_stats.hasNull()); + pbStats.set_numberofvalues(_stats.getNumberOfValues()); + + proto::IntegerStatistics* intStats = pbStats.mutable_intstatistics(); + if (_stats.hasMinimum()) { + intStats->set_minimum(_stats.getMinimum()); + intStats->set_maximum(_stats.getMaximum()); + } else { + intStats->clear_minimum(); + intStats->clear_maximum(); + } + if (_stats.hasSum()) { + intStats->set_sum(_stats.getSum()); + } else { + intStats->clear_sum(); + } + } + + std::string toString() const override { + std::ostringstream buffer; + buffer << "Data type: Integer" << std::endl + << "Values: " << getNumberOfValues() << std::endl + << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; + if(hasMinimum()){ + buffer << "Minimum: " << getMinimum() << std::endl; + }else{ + buffer << "Minimum: not defined" << std::endl; + } + + if(hasMaximum()){ + buffer << "Maximum: " << getMaximum() << std::endl; + }else{ + buffer << "Maximum: not defined" << std::endl; + } + + if(hasSum()){ + buffer << "Sum: " << getSum() << std::endl; + }else{ + buffer << "Sum: not defined" << std::endl; + } + return buffer.str(); + } + }; + + class StringColumnStatisticsImpl: public StringColumnStatistics, + public MutableColumnStatistics{ + private: + InternalStringStatistics _stats; + + public: + StringColumnStatisticsImpl() { + reset(); + } + StringColumnStatisticsImpl(const proto::ColumnStatistics& stats, + const StatContext& statContext); + virtual ~StringColumnStatisticsImpl() override; + + bool hasMinimum() const override { + return _stats.hasMinimum(); + } + + bool hasMaximum() const override { + return _stats.hasMaximum(); + } + + bool hasTotalLength() const override { + return _stats.hasTotalLength(); + } + + void increase(uint64_t count) override { + _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + } + + uint64_t getNumberOfValues() const override { + return _stats.getNumberOfValues(); + } + + void setNumberOfValues(uint64_t value) override { + _stats.setNumberOfValues(value); + } + + bool hasNull() const override { + return _stats.hasNull(); + } + + void setHasNull(bool hasNull) override { + _stats.setHasNull(hasNull); + } + const std::string & getMinimum() const override { - if(hasMinimum()){ - return _stats.getMinimum(); - }else{ - throw ParseError("Minimum is not defined."); - } - } - + if(hasMinimum()){ + return _stats.getMinimum(); + }else{ + throw ParseError("Minimum is not defined."); + } + } + const std::string & getMaximum() const override { - if(hasMaximum()){ - return _stats.getMaximum(); - }else{ - throw ParseError("Maximum is not defined."); - } - } - - void setMinimum(std::string minimum) { - _stats.setHasMinimum(true); - _stats.setMinimum(minimum); - } - - void setMaximum(std::string maximum) { - _stats.setHasMaximum(true); - _stats.setMaximum(maximum); - } - - uint64_t getTotalLength() const override { - if(hasTotalLength()){ - return _stats.getTotalLength(); - }else{ - throw ParseError("Total length is not defined."); - } - } - - void setTotalLength(uint64_t length) { - _stats.setHasTotalLength(true); - _stats.setTotalLength(length); - } - - void update(const char* value, size_t length) { - if (value != nullptr) { - if (!_stats.hasMinimum()) { - std::string tempStr(value, value + length); - setMinimum(tempStr); - setMaximum(tempStr); - } else { - // update min - int minCmp = strncmp(_stats.getMinimum().c_str(), - value, - std::min(_stats.getMinimum().length(), length)); - if (minCmp > 0 || - (minCmp == 0 && length < _stats.getMinimum().length())) { - setMinimum(std::string(value, value + length)); - } - - // update max - int maxCmp = strncmp(_stats.getMaximum().c_str(), - value, - std::min(_stats.getMaximum().length(), length)); - if (maxCmp < 0 || - (maxCmp == 0 && length > _stats.getMaximum().length())) { - setMaximum(std::string(value, value + length)); - } - } - } - - _stats.setTotalLength(_stats.getTotalLength() + length); - } - - void update(std::string value) { - update(value.c_str(), value.length()); - } - - void merge(const MutableColumnStatistics& other) override { - const StringColumnStatisticsImpl& strStats = - dynamic_cast<const StringColumnStatisticsImpl&>(other); - _stats.merge(strStats._stats); - } - - void reset() override { - _stats.reset(); - setTotalLength(0); - } - - void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); - - proto::StringStatistics* strStats = pbStats.mutable_stringstatistics(); - if (_stats.hasMinimum()) { - strStats->set_minimum(TString(_stats.getMinimum())); - strStats->set_maximum(TString(_stats.getMaximum())); - } else { - strStats->clear_minimum(); - strStats->clear_maximum(); - } - if (_stats.hasTotalLength()) { - strStats->set_sum(static_cast<int64_t>(_stats.getTotalLength())); - } else { - strStats->clear_sum(); - } - } - - std::string toString() const override { - std::ostringstream buffer; - buffer << "Data type: String" << std::endl - << "Values: " << getNumberOfValues() << std::endl - << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; - if(hasMinimum()){ - buffer << "Minimum: " << getMinimum() << std::endl; - }else{ - buffer << "Minimum is not defined" << std::endl; - } - - if(hasMaximum()){ - buffer << "Maximum: " << getMaximum() << std::endl; - }else{ - buffer << "Maximum is not defined" << std::endl; - } - - if(hasTotalLength()){ - buffer << "Total length: " << getTotalLength() << std::endl; - }else{ - buffer << "Total length is not defined" << std::endl; - } - return buffer.str(); - } - }; - - class TimestampColumnStatisticsImpl: public TimestampColumnStatistics, - public MutableColumnStatistics { - private: - InternalIntegerStatistics _stats; - bool _hasLowerBound; - bool _hasUpperBound; - int64_t _lowerBound; - int64_t _upperBound; - - public: - TimestampColumnStatisticsImpl() { reset(); } - TimestampColumnStatisticsImpl(const proto::ColumnStatistics& stats, - const StatContext& statContext); - virtual ~TimestampColumnStatisticsImpl() override; - - bool hasMinimum() const override { - return _stats.hasMinimum(); - } - - bool hasMaximum() const override { - return _stats.hasMaximum(); - } - - uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); - } - - void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); - } - - void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); - } - - bool hasNull() const override { - return _stats.hasNull(); - } - - void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); - } - - int64_t getMinimum() const override { - if(hasMinimum()){ - return _stats.getMinimum(); - }else{ - throw ParseError("Minimum is not defined."); - } - } - - int64_t getMaximum() const override { - if(hasMaximum()){ - return _stats.getMaximum(); - }else{ - throw ParseError("Maximum is not defined."); - } - } - - void setMinimum(int64_t minimum) { - _stats.setHasMinimum(true); - _stats.setMinimum(minimum); - } - - void setMaximum(int64_t maximum) { - _stats.setHasMaximum(true); - _stats.setMaximum(maximum); - } - - void update(int64_t value) { - _stats.updateMinMax(value); - } - - void merge(const MutableColumnStatistics& other) override { - const TimestampColumnStatisticsImpl& tsStats = - dynamic_cast<const TimestampColumnStatisticsImpl&>(other); - _stats.merge(tsStats._stats); - } - - void reset() override { - _stats.reset(); - } - - void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); - - proto::TimestampStatistics* tsStats = - pbStats.mutable_timestampstatistics(); - if (_stats.hasMinimum()) { - tsStats->set_minimumutc(_stats.getMinimum()); - tsStats->set_maximumutc(_stats.getMaximum()); - } else { - tsStats->clear_minimumutc(); - tsStats->clear_maximumutc(); - } - } - - std::string toString() const override { - std::ostringstream buffer; - struct tm tmValue; - char timeBuffer[20]; - time_t secs = 0; - - buffer << "Data type: Timestamp" << std::endl - << "Values: " << getNumberOfValues() << std::endl - << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; - if(hasMinimum()){ - secs = static_cast<time_t>(getMinimum() / 1000); - gmtime_r(&secs, &tmValue); - strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); - buffer << "Minimum: " << timeBuffer << "." - << (getMinimum() % 1000) << std::endl; - }else{ - buffer << "Minimum is not defined" << std::endl; - } - - if(hasLowerBound()){ - secs = static_cast<time_t>(getLowerBound() / 1000); - gmtime_r(&secs, &tmValue); - strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); - buffer << "LowerBound: " << timeBuffer << "." - << (getLowerBound() % 1000) << std::endl; - }else{ - buffer << "LowerBound is not defined" << std::endl; - } - - if(hasMaximum()){ - secs = static_cast<time_t>(getMaximum()/1000); - gmtime_r(&secs, &tmValue); - strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); - buffer << "Maximum: " << timeBuffer << "." - << (getMaximum() % 1000) << std::endl; - }else{ - buffer << "Maximum is not defined" << std::endl; - } - - if(hasUpperBound()){ - secs = static_cast<time_t>(getUpperBound() / 1000); - gmtime_r(&secs, &tmValue); - strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); - buffer << "UpperBound: " << timeBuffer << "." - << (getUpperBound() % 1000) << std::endl; - }else{ - buffer << "UpperBound is not defined" << std::endl; - } - - return buffer.str(); - } - - bool hasLowerBound() const override { - return _hasLowerBound; - } - - bool hasUpperBound() const override { - return _hasUpperBound; - } - - int64_t getLowerBound() const override { - if(hasLowerBound()){ - return _lowerBound; - }else{ - throw ParseError("LowerBound is not defined."); - } - } - - int64_t getUpperBound() const override { - if(hasUpperBound()){ - return _upperBound; - }else{ - throw ParseError("UpperBound is not defined."); - } - } - }; - - ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s, - const StatContext& statContext); - - class StatisticsImpl: public Statistics { - private: - std::vector<ColumnStatistics*> colStats; - - // DELIBERATELY NOT IMPLEMENTED - StatisticsImpl(const StatisticsImpl&); - StatisticsImpl& operator=(const StatisticsImpl&); - - public: - StatisticsImpl(const proto::StripeStatistics& stripeStats, - const StatContext& statContext); - - StatisticsImpl(const proto::Footer& footer, const StatContext& statContext); - - virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId - ) const override { - return colStats[columnId]; - } - - virtual ~StatisticsImpl() override; - - uint32_t getNumberOfColumns() const override { - return static_cast<uint32_t>(colStats.size()); - } - }; - - class StripeStatisticsImpl: public StripeStatistics { - private: - std::unique_ptr<StatisticsImpl> columnStats; - std::vector<std::vector<std::shared_ptr<const ColumnStatistics> > > - rowIndexStats; - - // DELIBERATELY NOT IMPLEMENTED - StripeStatisticsImpl(const StripeStatisticsImpl&); - StripeStatisticsImpl& operator=(const StripeStatisticsImpl&); - - public: - StripeStatisticsImpl( - const proto::StripeStatistics& stripeStats, - std::vector<std::vector<proto::ColumnStatistics> >& indexStats, - const StatContext& statContext); - - virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId - ) const override { - return columnStats->getColumnStatistics(columnId); - } - - uint32_t getNumberOfColumns() const override { - return columnStats->getNumberOfColumns(); - } - - virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId, - uint32_t rowIndex - ) const override { - // check id indices are valid - return rowIndexStats[columnId][rowIndex].get(); - } - - virtual ~StripeStatisticsImpl() override; - - uint32_t getNumberOfRowIndexStats(uint32_t columnId) const override { - return static_cast<uint32_t>(rowIndexStats[columnId].size()); - } - }; - - /** - * Create ColumnStatistics for writers - * @param type of column - * @return MutableColumnStatistics instances - */ - std::unique_ptr<MutableColumnStatistics> createColumnStatistics( - const Type& type); - -}// namespace - -#endif + if(hasMaximum()){ + return _stats.getMaximum(); + }else{ + throw ParseError("Maximum is not defined."); + } + } + + void setMinimum(std::string minimum) { + _stats.setHasMinimum(true); + _stats.setMinimum(minimum); + } + + void setMaximum(std::string maximum) { + _stats.setHasMaximum(true); + _stats.setMaximum(maximum); + } + + uint64_t getTotalLength() const override { + if(hasTotalLength()){ + return _stats.getTotalLength(); + }else{ + throw ParseError("Total length is not defined."); + } + } + + void setTotalLength(uint64_t length) { + _stats.setHasTotalLength(true); + _stats.setTotalLength(length); + } + + void update(const char* value, size_t length) { + if (value != nullptr) { + if (!_stats.hasMinimum()) { + std::string tempStr(value, value + length); + setMinimum(tempStr); + setMaximum(tempStr); + } else { + // update min + int minCmp = strncmp(_stats.getMinimum().c_str(), + value, + std::min(_stats.getMinimum().length(), length)); + if (minCmp > 0 || + (minCmp == 0 && length < _stats.getMinimum().length())) { + setMinimum(std::string(value, value + length)); + } + + // update max + int maxCmp = strncmp(_stats.getMaximum().c_str(), + value, + std::min(_stats.getMaximum().length(), length)); + if (maxCmp < 0 || + (maxCmp == 0 && length > _stats.getMaximum().length())) { + setMaximum(std::string(value, value + length)); + } + } + } + + _stats.setTotalLength(_stats.getTotalLength() + length); + } + + void update(std::string value) { + update(value.c_str(), value.length()); + } + + void merge(const MutableColumnStatistics& other) override { + const StringColumnStatisticsImpl& strStats = + dynamic_cast<const StringColumnStatisticsImpl&>(other); + _stats.merge(strStats._stats); + } + + void reset() override { + _stats.reset(); + setTotalLength(0); + } + + void toProtoBuf(proto::ColumnStatistics& pbStats) const override { + pbStats.set_hasnull(_stats.hasNull()); + pbStats.set_numberofvalues(_stats.getNumberOfValues()); + + proto::StringStatistics* strStats = pbStats.mutable_stringstatistics(); + if (_stats.hasMinimum()) { + strStats->set_minimum(TString(_stats.getMinimum())); + strStats->set_maximum(TString(_stats.getMaximum())); + } else { + strStats->clear_minimum(); + strStats->clear_maximum(); + } + if (_stats.hasTotalLength()) { + strStats->set_sum(static_cast<int64_t>(_stats.getTotalLength())); + } else { + strStats->clear_sum(); + } + } + + std::string toString() const override { + std::ostringstream buffer; + buffer << "Data type: String" << std::endl + << "Values: " << getNumberOfValues() << std::endl + << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; + if(hasMinimum()){ + buffer << "Minimum: " << getMinimum() << std::endl; + }else{ + buffer << "Minimum is not defined" << std::endl; + } + + if(hasMaximum()){ + buffer << "Maximum: " << getMaximum() << std::endl; + }else{ + buffer << "Maximum is not defined" << std::endl; + } + + if(hasTotalLength()){ + buffer << "Total length: " << getTotalLength() << std::endl; + }else{ + buffer << "Total length is not defined" << std::endl; + } + return buffer.str(); + } + }; + + class TimestampColumnStatisticsImpl: public TimestampColumnStatistics, + public MutableColumnStatistics { + private: + InternalIntegerStatistics _stats; + bool _hasLowerBound; + bool _hasUpperBound; + int64_t _lowerBound; + int64_t _upperBound; + + public: + TimestampColumnStatisticsImpl() { reset(); } + TimestampColumnStatisticsImpl(const proto::ColumnStatistics& stats, + const StatContext& statContext); + virtual ~TimestampColumnStatisticsImpl() override; + + bool hasMinimum() const override { + return _stats.hasMinimum(); + } + + bool hasMaximum() const override { + return _stats.hasMaximum(); + } + + uint64_t getNumberOfValues() const override { + return _stats.getNumberOfValues(); + } + + void setNumberOfValues(uint64_t value) override { + _stats.setNumberOfValues(value); + } + + void increase(uint64_t count) override { + _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + } + + bool hasNull() const override { + return _stats.hasNull(); + } + + void setHasNull(bool hasNull) override { + _stats.setHasNull(hasNull); + } + + int64_t getMinimum() const override { + if(hasMinimum()){ + return _stats.getMinimum(); + }else{ + throw ParseError("Minimum is not defined."); + } + } + + int64_t getMaximum() const override { + if(hasMaximum()){ + return _stats.getMaximum(); + }else{ + throw ParseError("Maximum is not defined."); + } + } + + void setMinimum(int64_t minimum) { + _stats.setHasMinimum(true); + _stats.setMinimum(minimum); + } + + void setMaximum(int64_t maximum) { + _stats.setHasMaximum(true); + _stats.setMaximum(maximum); + } + + void update(int64_t value) { + _stats.updateMinMax(value); + } + + void merge(const MutableColumnStatistics& other) override { + const TimestampColumnStatisticsImpl& tsStats = + dynamic_cast<const TimestampColumnStatisticsImpl&>(other); + _stats.merge(tsStats._stats); + } + + void reset() override { + _stats.reset(); + } + + void toProtoBuf(proto::ColumnStatistics& pbStats) const override { + pbStats.set_hasnull(_stats.hasNull()); + pbStats.set_numberofvalues(_stats.getNumberOfValues()); + + proto::TimestampStatistics* tsStats = + pbStats.mutable_timestampstatistics(); + if (_stats.hasMinimum()) { + tsStats->set_minimumutc(_stats.getMinimum()); + tsStats->set_maximumutc(_stats.getMaximum()); + } else { + tsStats->clear_minimumutc(); + tsStats->clear_maximumutc(); + } + } + + std::string toString() const override { + std::ostringstream buffer; + struct tm tmValue; + char timeBuffer[20]; + time_t secs = 0; + + buffer << "Data type: Timestamp" << std::endl + << "Values: " << getNumberOfValues() << std::endl + << "Has null: " << (hasNull() ? "yes" : "no") << std::endl; + if(hasMinimum()){ + secs = static_cast<time_t>(getMinimum() / 1000); + gmtime_r(&secs, &tmValue); + strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); + buffer << "Minimum: " << timeBuffer << "." + << (getMinimum() % 1000) << std::endl; + }else{ + buffer << "Minimum is not defined" << std::endl; + } + + if(hasLowerBound()){ + secs = static_cast<time_t>(getLowerBound() / 1000); + gmtime_r(&secs, &tmValue); + strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); + buffer << "LowerBound: " << timeBuffer << "." + << (getLowerBound() % 1000) << std::endl; + }else{ + buffer << "LowerBound is not defined" << std::endl; + } + + if(hasMaximum()){ + secs = static_cast<time_t>(getMaximum()/1000); + gmtime_r(&secs, &tmValue); + strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); + buffer << "Maximum: " << timeBuffer << "." + << (getMaximum() % 1000) << std::endl; + }else{ + buffer << "Maximum is not defined" << std::endl; + } + + if(hasUpperBound()){ + secs = static_cast<time_t>(getUpperBound() / 1000); + gmtime_r(&secs, &tmValue); + strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); + buffer << "UpperBound: " << timeBuffer << "." + << (getUpperBound() % 1000) << std::endl; + }else{ + buffer << "UpperBound is not defined" << std::endl; + } + + return buffer.str(); + } + + bool hasLowerBound() const override { + return _hasLowerBound; + } + + bool hasUpperBound() const override { + return _hasUpperBound; + } + + int64_t getLowerBound() const override { + if(hasLowerBound()){ + return _lowerBound; + }else{ + throw ParseError("LowerBound is not defined."); + } + } + + int64_t getUpperBound() const override { + if(hasUpperBound()){ + return _upperBound; + }else{ + throw ParseError("UpperBound is not defined."); + } + } + }; + + ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s, + const StatContext& statContext); + + class StatisticsImpl: public Statistics { + private: + std::vector<ColumnStatistics*> colStats; + + // DELIBERATELY NOT IMPLEMENTED + StatisticsImpl(const StatisticsImpl&); + StatisticsImpl& operator=(const StatisticsImpl&); + + public: + StatisticsImpl(const proto::StripeStatistics& stripeStats, + const StatContext& statContext); + + StatisticsImpl(const proto::Footer& footer, const StatContext& statContext); + + virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId + ) const override { + return colStats[columnId]; + } + + virtual ~StatisticsImpl() override; + + uint32_t getNumberOfColumns() const override { + return static_cast<uint32_t>(colStats.size()); + } + }; + + class StripeStatisticsImpl: public StripeStatistics { + private: + std::unique_ptr<StatisticsImpl> columnStats; + std::vector<std::vector<std::shared_ptr<const ColumnStatistics> > > + rowIndexStats; + + // DELIBERATELY NOT IMPLEMENTED + StripeStatisticsImpl(const StripeStatisticsImpl&); + StripeStatisticsImpl& operator=(const StripeStatisticsImpl&); + + public: + StripeStatisticsImpl( + const proto::StripeStatistics& stripeStats, + std::vector<std::vector<proto::ColumnStatistics> >& indexStats, + const StatContext& statContext); + + virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId + ) const override { + return columnStats->getColumnStatistics(columnId); + } + + uint32_t getNumberOfColumns() const override { + return columnStats->getNumberOfColumns(); + } + + virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId, + uint32_t rowIndex + ) const override { + // check id indices are valid + return rowIndexStats[columnId][rowIndex].get(); + } + + virtual ~StripeStatisticsImpl() override; + + uint32_t getNumberOfRowIndexStats(uint32_t columnId) const override { + return static_cast<uint32_t>(rowIndexStats[columnId].size()); + } + }; + + /** + * Create ColumnStatistics for writers + * @param type of column + * @return MutableColumnStatistics instances + */ + std::unique_ptr<MutableColumnStatistics> createColumnStatistics( + const Type& type); + +}// namespace + +#endif |