aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/apache/orc/c++/src/Statistics.hh
diff options
context:
space:
mode:
authoriaz1607 <iaz1607@yandex-team.ru>2022-02-10 16:45:37 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:37 +0300
commite5437feb4ac2d2dc044e1090b9312dde5ef197e0 (patch)
treef5a238c69dd20a1fa2092127a31b8aff25020f7d /contrib/libs/apache/orc/c++/src/Statistics.hh
parentf4945d0a44b8770f0801de3056aa41639b0b7bd2 (diff)
downloadydb-e5437feb4ac2d2dc044e1090b9312dde5ef197e0.tar.gz
Restoring authorship annotation for <iaz1607@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/apache/orc/c++/src/Statistics.hh')
-rw-r--r--contrib/libs/apache/orc/c++/src/Statistics.hh2906
1 files changed, 1453 insertions, 1453 deletions
diff --git a/contrib/libs/apache/orc/c++/src/Statistics.hh b/contrib/libs/apache/orc/c++/src/Statistics.hh
index ee9db23f86..849019d8d7 100644
--- a/contrib/libs/apache/orc/c++/src/Statistics.hh
+++ b/contrib/libs/apache/orc/c++/src/Statistics.hh
@@ -1,971 +1,971 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ORC_STATISTICS_IMPL_HH
-#define ORC_STATISTICS_IMPL_HH
-
-#include "orc/Common.hh"
-#include "orc/Int128.hh"
-#include "orc/OrcFile.hh"
-#include "orc/Reader.hh"
-
-#include "Timezone.hh"
-#include "TypeImpl.hh"
-
-namespace orc {
-
-/**
- * StatContext contains fields required to compute statistics
- */
-
- struct StatContext {
- const bool correctStats;
- const Timezone* const writerTimezone;
- StatContext() : correctStats(false), writerTimezone(nullptr) {}
- StatContext(bool cStat, const Timezone* const timezone = nullptr) :
- correctStats(cStat), writerTimezone(timezone) {}
- };
-
-/**
- * Internal Statistics Implementation
- */
-
- template <typename T>
- class InternalStatisticsImpl {
- private:
- bool _hasNull;
- bool _hasMinimum;
- bool _hasMaximum;
- bool _hasSum;
- bool _hasTotalLength;
- uint64_t _totalLength;
- uint64_t _valueCount;
- T _minimum;
- T _maximum;
- T _sum;
- public:
- InternalStatisticsImpl() {
- _hasNull = false;
- _hasMinimum = false;
- _hasMaximum = false;
- _hasSum = false;
- _hasTotalLength = false;
- _totalLength = 0;
- _valueCount = 0;
- }
-
- ~InternalStatisticsImpl() {}
-
- // GET / SET _totalLength
- bool hasTotalLength() const { return _hasTotalLength; }
-
- void setHasTotalLength(bool hasTotalLength) {
- _hasTotalLength = hasTotalLength;
- }
-
- uint64_t getTotalLength() const { return _totalLength; }
-
- void setTotalLength(uint64_t totalLength) { _totalLength = totalLength; }
-
- // GET / SET _sum
- bool hasSum() const { return _hasSum; }
-
- void setHasSum(bool hasSum) { _hasSum = hasSum; }
-
- T getSum() const { return _sum; }
-
- void setSum(T sum) { _sum = sum; }
-
- // GET / SET _maximum
- bool hasMaximum() const { return _hasMaximum; }
-
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_STATISTICS_IMPL_HH
+#define ORC_STATISTICS_IMPL_HH
+
+#include "orc/Common.hh"
+#include "orc/Int128.hh"
+#include "orc/OrcFile.hh"
+#include "orc/Reader.hh"
+
+#include "Timezone.hh"
+#include "TypeImpl.hh"
+
+namespace orc {
+
+/**
+ * StatContext contains fields required to compute statistics
+ */
+
+ struct StatContext {
+ const bool correctStats;
+ const Timezone* const writerTimezone;
+ StatContext() : correctStats(false), writerTimezone(nullptr) {}
+ StatContext(bool cStat, const Timezone* const timezone = nullptr) :
+ correctStats(cStat), writerTimezone(timezone) {}
+ };
+
+/**
+ * Internal Statistics Implementation
+ */
+
+ template <typename T>
+ class InternalStatisticsImpl {
+ private:
+ bool _hasNull;
+ bool _hasMinimum;
+ bool _hasMaximum;
+ bool _hasSum;
+ bool _hasTotalLength;
+ uint64_t _totalLength;
+ uint64_t _valueCount;
+ T _minimum;
+ T _maximum;
+ T _sum;
+ public:
+ InternalStatisticsImpl() {
+ _hasNull = false;
+ _hasMinimum = false;
+ _hasMaximum = false;
+ _hasSum = false;
+ _hasTotalLength = false;
+ _totalLength = 0;
+ _valueCount = 0;
+ }
+
+ ~InternalStatisticsImpl() {}
+
+ // GET / SET _totalLength
+ bool hasTotalLength() const { return _hasTotalLength; }
+
+ void setHasTotalLength(bool hasTotalLength) {
+ _hasTotalLength = hasTotalLength;
+ }
+
+ uint64_t getTotalLength() const { return _totalLength; }
+
+ void setTotalLength(uint64_t totalLength) { _totalLength = totalLength; }
+
+ // GET / SET _sum
+ bool hasSum() const { return _hasSum; }
+
+ void setHasSum(bool hasSum) { _hasSum = hasSum; }
+
+ T getSum() const { return _sum; }
+
+ void setSum(T sum) { _sum = sum; }
+
+ // GET / SET _maximum
+ bool hasMaximum() const { return _hasMaximum; }
+
const T & getMaximum() const { return _maximum; }
-
- void setHasMaximum(bool hasMax) { _hasMaximum = hasMax; }
-
- void setMaximum(T max) { _maximum = max; }
-
- // GET / SET _minimum
- bool hasMinimum() const { return _hasMinimum; }
-
- void setHasMinimum(bool hasMin) { _hasMinimum = hasMin; }
-
+
+ void setHasMaximum(bool hasMax) { _hasMaximum = hasMax; }
+
+ void setMaximum(T max) { _maximum = max; }
+
+ // GET / SET _minimum
+ bool hasMinimum() const { return _hasMinimum; }
+
+ void setHasMinimum(bool hasMin) { _hasMinimum = hasMin; }
+
const T & getMinimum() const { return _minimum; }
-
- void setMinimum(T min) { _minimum = min; }
-
- // GET / SET _valueCount
- uint64_t getNumberOfValues() const { return _valueCount; }
-
- void setNumberOfValues(uint64_t numValues) { _valueCount = numValues; }
-
- // GET / SET _hasNullValue
- bool hasNull() const { return _hasNull; }
-
- void setHasNull(bool hasNull) { _hasNull = hasNull; }
-
- void reset() {
- _hasNull = false;
- _hasMinimum = false;
- _hasMaximum = false;
- _hasSum = false;
- _hasTotalLength = false;
- _totalLength = 0;
- _valueCount = 0;
- }
-
- void updateMinMax(T value) {
- if (!_hasMinimum) {
- _hasMinimum = _hasMaximum = true;
- _minimum = _maximum = value;
- } else if (compare(value, _minimum)) {
- _minimum = value;
- } else if (compare(_maximum, value)) {
- _maximum = value;
- }
- }
-
- // sum is not merged here as we need to check overflow
- void merge(const InternalStatisticsImpl& other) {
- _hasNull = _hasNull || other._hasNull;
- _valueCount += other._valueCount;
-
- if (other._hasMinimum) {
- if (!_hasMinimum) {
- _hasMinimum = _hasMaximum = true;
- _minimum = other._minimum;
- _maximum = other._maximum;
- } else {
- // all template types should support operator<
- if (compare(_maximum, other._maximum)) {
- _maximum = other._maximum;
- }
- if (compare(other._minimum, _minimum)) {
- _minimum = other._minimum;
- }
- }
- }
-
- _hasTotalLength = _hasTotalLength && other._hasTotalLength;
- _totalLength += other._totalLength;
- }
- };
-
- typedef InternalStatisticsImpl<char> InternalCharStatistics;
- typedef InternalStatisticsImpl<char> InternalBooleanStatistics;
- typedef InternalStatisticsImpl<int64_t> InternalIntegerStatistics;
- typedef InternalStatisticsImpl<int32_t> InternalDateStatistics;
- typedef InternalStatisticsImpl<double> InternalDoubleStatistics;
- typedef InternalStatisticsImpl<Decimal> InternalDecimalStatistics;
- typedef InternalStatisticsImpl<std::string> InternalStringStatistics;
-
- /**
- * Mutable column statistics for use by the writer.
- */
- class MutableColumnStatistics {
- public:
- virtual ~MutableColumnStatistics();
-
- virtual void increase(uint64_t count) = 0;
-
- virtual void setNumberOfValues(uint64_t value) = 0;
-
- virtual void setHasNull(bool hasNull) = 0;
-
- virtual void merge(const MutableColumnStatistics& other) = 0;
-
- virtual void reset() = 0;
-
- virtual void toProtoBuf(proto::ColumnStatistics& pbStats) const = 0;
- };
-
-/**
- * ColumnStatistics Implementation
- */
-
- class ColumnStatisticsImpl: public ColumnStatistics,
- public MutableColumnStatistics {
- private:
- InternalCharStatistics _stats;
- public:
- ColumnStatisticsImpl() { reset(); }
- ColumnStatisticsImpl(const proto::ColumnStatistics& stats);
- virtual ~ColumnStatisticsImpl() override;
-
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
-
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
-
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
-
- bool hasNull() const override {
- return _stats.hasNull();
- }
-
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
-
- void merge(const MutableColumnStatistics& other) override {
- _stats.merge(dynamic_cast<const ColumnStatisticsImpl&>(other)._stats);
- }
-
- void reset() override {
- _stats.reset();
- }
-
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
- }
-
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Column has " << getNumberOfValues() << " values"
- << " and has null value: " << (hasNull() ? "yes" : "no")
- << std::endl;
- return buffer.str();
- }
- };
-
- class BinaryColumnStatisticsImpl: public BinaryColumnStatistics,
- public MutableColumnStatistics {
- private:
- InternalCharStatistics _stats;
- public:
- BinaryColumnStatisticsImpl() { reset(); }
- BinaryColumnStatisticsImpl(const proto::ColumnStatistics& stats,
- const StatContext& statContext);
- virtual ~BinaryColumnStatisticsImpl() override;
-
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
-
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
-
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
-
- bool hasNull() const override {
- return _stats.hasNull();
- }
-
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
-
- bool hasTotalLength() const override {
- return _stats.hasTotalLength();
- }
-
- uint64_t getTotalLength() const override {
- if(hasTotalLength()){
- return _stats.getTotalLength();
- }else{
- throw ParseError("Total length is not defined.");
- }
- }
-
- void setTotalLength(uint64_t length) {
- _stats.setHasTotalLength(true);
- _stats.setTotalLength(length);
- }
-
- void update(size_t length) {
- _stats.setTotalLength(_stats.getTotalLength() + length);
- }
-
- void merge(const MutableColumnStatistics& other) override {
- const BinaryColumnStatisticsImpl& binStats =
- dynamic_cast<const BinaryColumnStatisticsImpl&>(other);
- _stats.merge(binStats._stats);
- }
-
- void reset() override {
- _stats.reset();
- setTotalLength(0);
- }
-
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
-
- proto::BinaryStatistics* binStats = pbStats.mutable_binarystatistics();
- binStats->set_sum(static_cast<int64_t>(_stats.getTotalLength()));
- }
-
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: Binary" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if(hasTotalLength()){
- buffer << "Total length: " << getTotalLength() << std::endl;
- }else{
- buffer << "Total length: not defined" << std::endl;
- }
- return buffer.str();
- }
- };
-
- class BooleanColumnStatisticsImpl: public BooleanColumnStatistics,
- public MutableColumnStatistics {
- private:
- InternalBooleanStatistics _stats;
- bool _hasCount;
- uint64_t _trueCount;
-
- public:
- BooleanColumnStatisticsImpl() { reset(); }
- BooleanColumnStatisticsImpl(const proto::ColumnStatistics& stats,
- const StatContext& statContext);
- virtual ~BooleanColumnStatisticsImpl() override;
-
- bool hasCount() const override {
- return _hasCount;
- }
-
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- _hasCount = true;
- }
-
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
-
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
-
- bool hasNull() const override {
- return _stats.hasNull();
- }
-
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
-
- uint64_t getFalseCount() const override {
- if(hasCount()){
- return getNumberOfValues() - _trueCount;
- }else{
- throw ParseError("False count is not defined.");
- }
- }
-
- uint64_t getTrueCount() const override {
- if(hasCount()){
- return _trueCount;
- }else{
- throw ParseError("True count is not defined.");
- }
- }
-
- void setTrueCount(uint64_t trueCount) {
- _hasCount = true;
- _trueCount = trueCount;
- }
-
- void update(bool value, size_t repetitions) {
- if (value) {
- _trueCount += repetitions;
- }
- }
-
- void merge(const MutableColumnStatistics& other) override {
- const BooleanColumnStatisticsImpl& boolStats =
- dynamic_cast<const BooleanColumnStatisticsImpl&>(other);
- _stats.merge(boolStats._stats);
- _hasCount = _hasCount && boolStats._hasCount;
- _trueCount += boolStats._trueCount;
- }
-
- void reset() override {
- _stats.reset();
- setTrueCount(0);
- }
-
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
-
- proto::BucketStatistics* bucketStats = pbStats.mutable_bucketstatistics();
- if (_hasCount) {
- bucketStats->add_count(_trueCount);
- } else {
- bucketStats->clear_count();
- }
- }
-
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: Boolean" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if(hasCount()){
- buffer << "(true: " << getTrueCount() << "; false: "
- << getFalseCount() << ")" << std::endl;
- } else {
- buffer << "(true: not defined; false: not defined)" << std::endl;
- buffer << "True and false counts are not defined" << std::endl;
- }
- return buffer.str();
- }
- };
-
- class DateColumnStatisticsImpl: public DateColumnStatistics,
- public MutableColumnStatistics{
- private:
- InternalDateStatistics _stats;
- public:
- DateColumnStatisticsImpl() { reset(); }
- DateColumnStatisticsImpl(const proto::ColumnStatistics& stats,
- const StatContext& statContext);
- virtual ~DateColumnStatisticsImpl() override;
-
- bool hasMinimum() const override {
- return _stats.hasMinimum();
- }
-
- bool hasMaximum() const override {
- return _stats.hasMaximum();
- }
-
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
-
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
-
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
-
- bool hasNull() const override {
- return _stats.hasNull();
- }
-
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
-
- int32_t getMinimum() const override {
- if(hasMinimum()){
- return _stats.getMinimum();
- }else{
- throw ParseError("Minimum is not defined.");
- }
- }
-
- int32_t getMaximum() const override {
- if(hasMaximum()){
- return _stats.getMaximum();
- }else{
- throw ParseError("Maximum is not defined.");
- }
- }
-
- void setMinimum(int32_t minimum) {
- _stats.setHasMinimum(true);
- _stats.setMinimum(minimum);
- }
-
- void setMaximum(int32_t maximum) {
- _stats.setHasMaximum(true);
- _stats.setMaximum(maximum);
- }
-
- void update(int32_t value) {
- _stats.updateMinMax(value);
- }
-
- void merge(const MutableColumnStatistics& other) override {
- const DateColumnStatisticsImpl& dateStats =
- dynamic_cast<const DateColumnStatisticsImpl&>(other);
- _stats.merge(dateStats._stats);
- }
-
- void reset() override {
- _stats.reset();
- }
-
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
-
- proto::DateStatistics* dateStatistics =
- pbStats.mutable_datestatistics();
- if (_stats.hasMinimum()) {
- dateStatistics->set_maximum(_stats.getMaximum());
- dateStatistics->set_minimum(_stats.getMinimum());
- } else {
- dateStatistics->clear_minimum();
- dateStatistics->clear_maximum();
- }
- }
-
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: Date" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if(hasMinimum()){
- buffer << "Minimum: " << getMinimum() << std::endl;
- }else{
- buffer << "Minimum: not defined" << std::endl;
- }
-
- if(hasMaximum()){
- buffer << "Maximum: " << getMaximum() << std::endl;
- }else{
- buffer << "Maximum: not defined" << std::endl;
- }
- return buffer.str();
- }
- };
-
- class DecimalColumnStatisticsImpl: public DecimalColumnStatistics,
- public MutableColumnStatistics {
- private:
- InternalDecimalStatistics _stats;
-
- public:
- DecimalColumnStatisticsImpl() { reset(); }
- DecimalColumnStatisticsImpl(const proto::ColumnStatistics& stats,
- const StatContext& statContext);
- virtual ~DecimalColumnStatisticsImpl() override;
-
- bool hasMinimum() const override {
- return _stats.hasMinimum();
- }
-
- bool hasMaximum() const override {
- return _stats.hasMaximum();
- }
-
- bool hasSum() const override {
- return _stats.hasSum();
- }
-
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
-
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
-
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
-
- bool hasNull() const override {
- return _stats.hasNull();
- }
-
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
-
- Decimal getMinimum() const override {
- if(hasMinimum()){
- return _stats.getMinimum();
- }else{
- throw ParseError("Minimum is not defined.");
- }
- }
-
- Decimal getMaximum() const override {
- if(hasMaximum()){
- return _stats.getMaximum();
- }else{
- throw ParseError("Maximum is not defined.");
- }
- }
-
- void setMinimum(Decimal minimum) {
- _stats.setHasMinimum(true);
- _stats.setMinimum(minimum);
- }
-
- void setMaximum(Decimal maximum) {
- _stats.setHasMaximum(true);
- _stats.setMaximum(maximum);
- }
-
- Decimal getSum() const override {
- if(hasSum()){
- return _stats.getSum();
- }else{
- throw ParseError("Sum is not defined.");
- }
- }
-
- void setSum(Decimal sum) {
- _stats.setHasSum(true);
- _stats.setSum(sum);
- }
-
- void update(const Decimal& value) {
- _stats.updateMinMax(value);
-
- if (_stats.hasSum()) {
- updateSum(value);
- }
- }
-
- void merge(const MutableColumnStatistics& other) override {
- const DecimalColumnStatisticsImpl& decStats =
- dynamic_cast<const DecimalColumnStatisticsImpl&>(other);
-
- _stats.merge(decStats._stats);
-
- _stats.setHasSum(_stats.hasSum() && decStats.hasSum());
- if (_stats.hasSum()) {
- updateSum(decStats.getSum());
- }
- }
-
- void reset() override {
- _stats.reset();
- setSum(Decimal());
- }
-
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
-
- proto::DecimalStatistics* decStats = pbStats.mutable_decimalstatistics();
- if (_stats.hasMinimum()) {
- decStats->set_minimum(TString(_stats.getMinimum().toString()));
- decStats->set_maximum(TString(_stats.getMaximum().toString()));
- } else {
- decStats->clear_minimum();
- decStats->clear_maximum();
- }
- if (_stats.hasSum()) {
- decStats->set_sum(TString(_stats.getSum().toString()));
- } else {
- decStats->clear_sum();
- }
- }
-
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: Decimal" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if(hasMinimum()){
- buffer << "Minimum: " << getMinimum().toString() << std::endl;
- }else{
- buffer << "Minimum: not defined" << std::endl;
- }
-
- if(hasMaximum()){
- buffer << "Maximum: " << getMaximum().toString() << std::endl;
- }else{
- buffer << "Maximum: not defined" << std::endl;
- }
-
- if(hasSum()){
- buffer << "Sum: " << getSum().toString() << std::endl;
- }else{
- buffer << "Sum: not defined" << std::endl;
- }
-
- return buffer.str();
- }
-
- private:
- void updateSum(Decimal value) {
- if (_stats.hasSum()) {
- bool overflow = false;
- Decimal sum = _stats.getSum();
- if (sum.scale > value.scale) {
- value.value = scaleUpInt128ByPowerOfTen(value.value,
- sum.scale - value.scale,
- overflow);
- } else if (sum.scale < value.scale) {
- sum.value = scaleUpInt128ByPowerOfTen(sum.value,
- value.scale - sum.scale,
- overflow);
- sum.scale = value.scale;
- }
-
- if (!overflow) {
- bool wasPositive = sum.value >= 0;
- sum.value += value.value;
- if ((value.value >= 0) == wasPositive) {
- _stats.setHasSum((sum.value >= 0) == wasPositive);
- }
- } else {
- _stats.setHasSum(false);
- }
-
- if (_stats.hasSum()) {
- _stats.setSum(sum);
- }
- }
- }
- };
-
- class DoubleColumnStatisticsImpl: public DoubleColumnStatistics,
- public MutableColumnStatistics {
- private:
- InternalDoubleStatistics _stats;
- public:
- DoubleColumnStatisticsImpl() { reset(); }
- DoubleColumnStatisticsImpl(const proto::ColumnStatistics& stats);
- virtual ~DoubleColumnStatisticsImpl() override;
-
- bool hasMinimum() const override {
- return _stats.hasMinimum();
- }
-
- bool hasMaximum() const override {
- return _stats.hasMaximum();
- }
-
- bool hasSum() const override {
- return _stats.hasSum();
- }
-
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
-
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
-
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
-
- bool hasNull() const override {
- return _stats.hasNull();
- }
-
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
-
- double getMinimum() const override {
- if(hasMinimum()){
- return _stats.getMinimum();
- }else{
- throw ParseError("Minimum is not defined.");
- }
- }
-
- double getMaximum() const override {
- if(hasMaximum()){
- return _stats.getMaximum();
- }else{
- throw ParseError("Maximum is not defined.");
- }
- }
-
- void setMinimum(double minimum) {
- _stats.setHasMinimum(true);
- _stats.setMinimum(minimum);
- }
-
- void setMaximum(double maximum) {
- _stats.setHasMaximum(true);
- _stats.setMaximum(maximum);
- }
-
- double getSum() const override {
- if(hasSum()){
- return _stats.getSum();
- }else{
- throw ParseError("Sum is not defined.");
- }
- }
-
- void setSum(double sum) {
- _stats.setHasSum(true);
- _stats.setSum(sum);
- }
-
- void update(double value) {
- _stats.updateMinMax(value);
- _stats.setSum(_stats.getSum() + value);
- }
-
- void merge(const MutableColumnStatistics& other) override {
- const DoubleColumnStatisticsImpl& doubleStats =
- dynamic_cast<const DoubleColumnStatisticsImpl&>(other);
- _stats.merge(doubleStats._stats);
-
- _stats.setHasSum(_stats.hasSum() && doubleStats.hasSum());
- if (_stats.hasSum()) {
- _stats.setSum(_stats.getSum() + doubleStats.getSum());
- }
- }
-
- void reset() override {
- _stats.reset();
- setSum(0.0);
- }
-
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
-
- proto::DoubleStatistics* doubleStats = pbStats.mutable_doublestatistics();
- if (_stats.hasMinimum()) {
- doubleStats->set_minimum(_stats.getMinimum());
- doubleStats->set_maximum(_stats.getMaximum());
- } else {
- doubleStats->clear_minimum();
- doubleStats->clear_maximum();
- }
- if (_stats.hasSum()) {
- doubleStats->set_sum(_stats.getSum());
- } else {
- doubleStats->clear_sum();
- }
- }
-
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: Double" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if(hasMinimum()){
- buffer << "Minimum: " << getMinimum() << std::endl;
- }else{
- buffer << "Minimum: not defined" << std::endl;
- }
-
- if(hasMaximum()){
- buffer << "Maximum: " << getMaximum() << std::endl;
- }else{
- buffer << "Maximum: not defined" << std::endl;
- }
-
- if(hasSum()){
- buffer << "Sum: " << getSum() << std::endl;
- }else{
- buffer << "Sum: not defined" << std::endl;
- }
- return buffer.str();
- }
- };
-
- class IntegerColumnStatisticsImpl: public IntegerColumnStatistics,
- public MutableColumnStatistics {
- private:
- InternalIntegerStatistics _stats;
- public:
- IntegerColumnStatisticsImpl() { reset(); }
- IntegerColumnStatisticsImpl(const proto::ColumnStatistics& stats);
- virtual ~IntegerColumnStatisticsImpl() override;
-
- bool hasMinimum() const override {
- return _stats.hasMinimum();
- }
-
- bool hasMaximum() const override {
- return _stats.hasMaximum();
- }
-
- bool hasSum() const override {
- return _stats.hasSum();
- }
-
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
-
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
-
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
-
- bool hasNull() const override {
- return _stats.hasNull();
- }
-
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
-
- int64_t getMinimum() const override {
- if(hasMinimum()){
- return _stats.getMinimum();
- }else{
- throw ParseError("Minimum is not defined.");
- }
- }
-
- int64_t getMaximum() const override {
- if(hasMaximum()){
- return _stats.getMaximum();
- }else{
- throw ParseError("Maximum is not defined.");
- }
- }
-
- void setMinimum(int64_t minimum) {
- _stats.setHasMinimum(true);
- _stats.setMinimum(minimum);
- }
-
- void setMaximum(int64_t maximum) {
- _stats.setHasMaximum(true);
- _stats.setMaximum(maximum);
- }
-
- int64_t getSum() const override {
- if(hasSum()){
- return _stats.getSum();
- }else{
- throw ParseError("Sum is not defined.");
- }
- }
-
- void setSum(int64_t sum) {
- _stats.setHasSum(true);
- _stats.setSum(sum);
- }
-
+
+ void setMinimum(T min) { _minimum = min; }
+
+ // GET / SET _valueCount
+ uint64_t getNumberOfValues() const { return _valueCount; }
+
+ void setNumberOfValues(uint64_t numValues) { _valueCount = numValues; }
+
+ // GET / SET _hasNullValue
+ bool hasNull() const { return _hasNull; }
+
+ void setHasNull(bool hasNull) { _hasNull = hasNull; }
+
+ void reset() {
+ _hasNull = false;
+ _hasMinimum = false;
+ _hasMaximum = false;
+ _hasSum = false;
+ _hasTotalLength = false;
+ _totalLength = 0;
+ _valueCount = 0;
+ }
+
+ void updateMinMax(T value) {
+ if (!_hasMinimum) {
+ _hasMinimum = _hasMaximum = true;
+ _minimum = _maximum = value;
+ } else if (compare(value, _minimum)) {
+ _minimum = value;
+ } else if (compare(_maximum, value)) {
+ _maximum = value;
+ }
+ }
+
+ // sum is not merged here as we need to check overflow
+ void merge(const InternalStatisticsImpl& other) {
+ _hasNull = _hasNull || other._hasNull;
+ _valueCount += other._valueCount;
+
+ if (other._hasMinimum) {
+ if (!_hasMinimum) {
+ _hasMinimum = _hasMaximum = true;
+ _minimum = other._minimum;
+ _maximum = other._maximum;
+ } else {
+ // all template types should support operator<
+ if (compare(_maximum, other._maximum)) {
+ _maximum = other._maximum;
+ }
+ if (compare(other._minimum, _minimum)) {
+ _minimum = other._minimum;
+ }
+ }
+ }
+
+ _hasTotalLength = _hasTotalLength && other._hasTotalLength;
+ _totalLength += other._totalLength;
+ }
+ };
+
+ typedef InternalStatisticsImpl<char> InternalCharStatistics;
+ typedef InternalStatisticsImpl<char> InternalBooleanStatistics;
+ typedef InternalStatisticsImpl<int64_t> InternalIntegerStatistics;
+ typedef InternalStatisticsImpl<int32_t> InternalDateStatistics;
+ typedef InternalStatisticsImpl<double> InternalDoubleStatistics;
+ typedef InternalStatisticsImpl<Decimal> InternalDecimalStatistics;
+ typedef InternalStatisticsImpl<std::string> InternalStringStatistics;
+
+ /**
+ * Mutable column statistics for use by the writer.
+ */
+ class MutableColumnStatistics {
+ public:
+ virtual ~MutableColumnStatistics();
+
+ virtual void increase(uint64_t count) = 0;
+
+ virtual void setNumberOfValues(uint64_t value) = 0;
+
+ virtual void setHasNull(bool hasNull) = 0;
+
+ virtual void merge(const MutableColumnStatistics& other) = 0;
+
+ virtual void reset() = 0;
+
+ virtual void toProtoBuf(proto::ColumnStatistics& pbStats) const = 0;
+ };
+
+/**
+ * ColumnStatistics Implementation
+ */
+
+ class ColumnStatisticsImpl: public ColumnStatistics,
+ public MutableColumnStatistics {
+ private:
+ InternalCharStatistics _stats;
+ public:
+ ColumnStatisticsImpl() { reset(); }
+ ColumnStatisticsImpl(const proto::ColumnStatistics& stats);
+ virtual ~ColumnStatisticsImpl() override;
+
+ uint64_t getNumberOfValues() const override {
+ return _stats.getNumberOfValues();
+ }
+
+ void setNumberOfValues(uint64_t value) override {
+ _stats.setNumberOfValues(value);
+ }
+
+ void increase(uint64_t count) override {
+ _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+ }
+
+ bool hasNull() const override {
+ return _stats.hasNull();
+ }
+
+ void setHasNull(bool hasNull) override {
+ _stats.setHasNull(hasNull);
+ }
+
+ void merge(const MutableColumnStatistics& other) override {
+ _stats.merge(dynamic_cast<const ColumnStatisticsImpl&>(other)._stats);
+ }
+
+ void reset() override {
+ _stats.reset();
+ }
+
+ void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+ pbStats.set_hasnull(_stats.hasNull());
+ pbStats.set_numberofvalues(_stats.getNumberOfValues());
+ }
+
+ std::string toString() const override {
+ std::ostringstream buffer;
+ buffer << "Column has " << getNumberOfValues() << " values"
+ << " and has null value: " << (hasNull() ? "yes" : "no")
+ << std::endl;
+ return buffer.str();
+ }
+ };
+
+ class BinaryColumnStatisticsImpl: public BinaryColumnStatistics,
+ public MutableColumnStatistics {
+ private:
+ InternalCharStatistics _stats;
+ public:
+ BinaryColumnStatisticsImpl() { reset(); }
+ BinaryColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+ const StatContext& statContext);
+ virtual ~BinaryColumnStatisticsImpl() override;
+
+ uint64_t getNumberOfValues() const override {
+ return _stats.getNumberOfValues();
+ }
+
+ void setNumberOfValues(uint64_t value) override {
+ _stats.setNumberOfValues(value);
+ }
+
+ void increase(uint64_t count) override {
+ _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+ }
+
+ bool hasNull() const override {
+ return _stats.hasNull();
+ }
+
+ void setHasNull(bool hasNull) override {
+ _stats.setHasNull(hasNull);
+ }
+
+ bool hasTotalLength() const override {
+ return _stats.hasTotalLength();
+ }
+
+ uint64_t getTotalLength() const override {
+ if(hasTotalLength()){
+ return _stats.getTotalLength();
+ }else{
+ throw ParseError("Total length is not defined.");
+ }
+ }
+
+ void setTotalLength(uint64_t length) {
+ _stats.setHasTotalLength(true);
+ _stats.setTotalLength(length);
+ }
+
+ void update(size_t length) {
+ _stats.setTotalLength(_stats.getTotalLength() + length);
+ }
+
+ void merge(const MutableColumnStatistics& other) override {
+ const BinaryColumnStatisticsImpl& binStats =
+ dynamic_cast<const BinaryColumnStatisticsImpl&>(other);
+ _stats.merge(binStats._stats);
+ }
+
+ void reset() override {
+ _stats.reset();
+ setTotalLength(0);
+ }
+
+ void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+ pbStats.set_hasnull(_stats.hasNull());
+ pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+ proto::BinaryStatistics* binStats = pbStats.mutable_binarystatistics();
+ binStats->set_sum(static_cast<int64_t>(_stats.getTotalLength()));
+ }
+
+ std::string toString() const override {
+ std::ostringstream buffer;
+ buffer << "Data type: Binary" << std::endl
+ << "Values: " << getNumberOfValues() << std::endl
+ << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+ if(hasTotalLength()){
+ buffer << "Total length: " << getTotalLength() << std::endl;
+ }else{
+ buffer << "Total length: not defined" << std::endl;
+ }
+ return buffer.str();
+ }
+ };
+
+ class BooleanColumnStatisticsImpl: public BooleanColumnStatistics,
+ public MutableColumnStatistics {
+ private:
+ InternalBooleanStatistics _stats;
+ bool _hasCount;
+ uint64_t _trueCount;
+
+ public:
+ BooleanColumnStatisticsImpl() { reset(); }
+ BooleanColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+ const StatContext& statContext);
+ virtual ~BooleanColumnStatisticsImpl() override;
+
+ bool hasCount() const override {
+ return _hasCount;
+ }
+
+ void increase(uint64_t count) override {
+ _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+ _hasCount = true;
+ }
+
+ uint64_t getNumberOfValues() const override {
+ return _stats.getNumberOfValues();
+ }
+
+ void setNumberOfValues(uint64_t value) override {
+ _stats.setNumberOfValues(value);
+ }
+
+ bool hasNull() const override {
+ return _stats.hasNull();
+ }
+
+ void setHasNull(bool hasNull) override {
+ _stats.setHasNull(hasNull);
+ }
+
+ uint64_t getFalseCount() const override {
+ if(hasCount()){
+ return getNumberOfValues() - _trueCount;
+ }else{
+ throw ParseError("False count is not defined.");
+ }
+ }
+
+ uint64_t getTrueCount() const override {
+ if(hasCount()){
+ return _trueCount;
+ }else{
+ throw ParseError("True count is not defined.");
+ }
+ }
+
+ void setTrueCount(uint64_t trueCount) {
+ _hasCount = true;
+ _trueCount = trueCount;
+ }
+
+ void update(bool value, size_t repetitions) {
+ if (value) {
+ _trueCount += repetitions;
+ }
+ }
+
+ void merge(const MutableColumnStatistics& other) override {
+ const BooleanColumnStatisticsImpl& boolStats =
+ dynamic_cast<const BooleanColumnStatisticsImpl&>(other);
+ _stats.merge(boolStats._stats);
+ _hasCount = _hasCount && boolStats._hasCount;
+ _trueCount += boolStats._trueCount;
+ }
+
+ void reset() override {
+ _stats.reset();
+ setTrueCount(0);
+ }
+
+ void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+ pbStats.set_hasnull(_stats.hasNull());
+ pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+ proto::BucketStatistics* bucketStats = pbStats.mutable_bucketstatistics();
+ if (_hasCount) {
+ bucketStats->add_count(_trueCount);
+ } else {
+ bucketStats->clear_count();
+ }
+ }
+
+ std::string toString() const override {
+ std::ostringstream buffer;
+ buffer << "Data type: Boolean" << std::endl
+ << "Values: " << getNumberOfValues() << std::endl
+ << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+ if(hasCount()){
+ buffer << "(true: " << getTrueCount() << "; false: "
+ << getFalseCount() << ")" << std::endl;
+ } else {
+ buffer << "(true: not defined; false: not defined)" << std::endl;
+ buffer << "True and false counts are not defined" << std::endl;
+ }
+ return buffer.str();
+ }
+ };
+
+ class DateColumnStatisticsImpl: public DateColumnStatistics,
+ public MutableColumnStatistics{
+ private:
+ InternalDateStatistics _stats;
+ public:
+ DateColumnStatisticsImpl() { reset(); }
+ DateColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+ const StatContext& statContext);
+ virtual ~DateColumnStatisticsImpl() override;
+
+ bool hasMinimum() const override {
+ return _stats.hasMinimum();
+ }
+
+ bool hasMaximum() const override {
+ return _stats.hasMaximum();
+ }
+
+ void increase(uint64_t count) override {
+ _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+ }
+
+ uint64_t getNumberOfValues() const override {
+ return _stats.getNumberOfValues();
+ }
+
+ void setNumberOfValues(uint64_t value) override {
+ _stats.setNumberOfValues(value);
+ }
+
+ bool hasNull() const override {
+ return _stats.hasNull();
+ }
+
+ void setHasNull(bool hasNull) override {
+ _stats.setHasNull(hasNull);
+ }
+
+ int32_t getMinimum() const override {
+ if(hasMinimum()){
+ return _stats.getMinimum();
+ }else{
+ throw ParseError("Minimum is not defined.");
+ }
+ }
+
+ int32_t getMaximum() const override {
+ if(hasMaximum()){
+ return _stats.getMaximum();
+ }else{
+ throw ParseError("Maximum is not defined.");
+ }
+ }
+
+ void setMinimum(int32_t minimum) {
+ _stats.setHasMinimum(true);
+ _stats.setMinimum(minimum);
+ }
+
+ void setMaximum(int32_t maximum) {
+ _stats.setHasMaximum(true);
+ _stats.setMaximum(maximum);
+ }
+
+ void update(int32_t value) {
+ _stats.updateMinMax(value);
+ }
+
+ void merge(const MutableColumnStatistics& other) override {
+ const DateColumnStatisticsImpl& dateStats =
+ dynamic_cast<const DateColumnStatisticsImpl&>(other);
+ _stats.merge(dateStats._stats);
+ }
+
+ void reset() override {
+ _stats.reset();
+ }
+
+ void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+ pbStats.set_hasnull(_stats.hasNull());
+ pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+ proto::DateStatistics* dateStatistics =
+ pbStats.mutable_datestatistics();
+ if (_stats.hasMinimum()) {
+ dateStatistics->set_maximum(_stats.getMaximum());
+ dateStatistics->set_minimum(_stats.getMinimum());
+ } else {
+ dateStatistics->clear_minimum();
+ dateStatistics->clear_maximum();
+ }
+ }
+
+ std::string toString() const override {
+ std::ostringstream buffer;
+ buffer << "Data type: Date" << std::endl
+ << "Values: " << getNumberOfValues() << std::endl
+ << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+ if(hasMinimum()){
+ buffer << "Minimum: " << getMinimum() << std::endl;
+ }else{
+ buffer << "Minimum: not defined" << std::endl;
+ }
+
+ if(hasMaximum()){
+ buffer << "Maximum: " << getMaximum() << std::endl;
+ }else{
+ buffer << "Maximum: not defined" << std::endl;
+ }
+ return buffer.str();
+ }
+ };
+
+ class DecimalColumnStatisticsImpl: public DecimalColumnStatistics,
+ public MutableColumnStatistics {
+ private:
+ InternalDecimalStatistics _stats;
+
+ public:
+ DecimalColumnStatisticsImpl() { reset(); }
+ DecimalColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+ const StatContext& statContext);
+ virtual ~DecimalColumnStatisticsImpl() override;
+
+ bool hasMinimum() const override {
+ return _stats.hasMinimum();
+ }
+
+ bool hasMaximum() const override {
+ return _stats.hasMaximum();
+ }
+
+ bool hasSum() const override {
+ return _stats.hasSum();
+ }
+
+ void increase(uint64_t count) override {
+ _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+ }
+
+ uint64_t getNumberOfValues() const override {
+ return _stats.getNumberOfValues();
+ }
+
+ void setNumberOfValues(uint64_t value) override {
+ _stats.setNumberOfValues(value);
+ }
+
+ bool hasNull() const override {
+ return _stats.hasNull();
+ }
+
+ void setHasNull(bool hasNull) override {
+ _stats.setHasNull(hasNull);
+ }
+
+ Decimal getMinimum() const override {
+ if(hasMinimum()){
+ return _stats.getMinimum();
+ }else{
+ throw ParseError("Minimum is not defined.");
+ }
+ }
+
+ Decimal getMaximum() const override {
+ if(hasMaximum()){
+ return _stats.getMaximum();
+ }else{
+ throw ParseError("Maximum is not defined.");
+ }
+ }
+
+ void setMinimum(Decimal minimum) {
+ _stats.setHasMinimum(true);
+ _stats.setMinimum(minimum);
+ }
+
+ void setMaximum(Decimal maximum) {
+ _stats.setHasMaximum(true);
+ _stats.setMaximum(maximum);
+ }
+
+ Decimal getSum() const override {
+ if(hasSum()){
+ return _stats.getSum();
+ }else{
+ throw ParseError("Sum is not defined.");
+ }
+ }
+
+ void setSum(Decimal sum) {
+ _stats.setHasSum(true);
+ _stats.setSum(sum);
+ }
+
+ void update(const Decimal& value) {
+ _stats.updateMinMax(value);
+
+ if (_stats.hasSum()) {
+ updateSum(value);
+ }
+ }
+
+ void merge(const MutableColumnStatistics& other) override {
+ const DecimalColumnStatisticsImpl& decStats =
+ dynamic_cast<const DecimalColumnStatisticsImpl&>(other);
+
+ _stats.merge(decStats._stats);
+
+ _stats.setHasSum(_stats.hasSum() && decStats.hasSum());
+ if (_stats.hasSum()) {
+ updateSum(decStats.getSum());
+ }
+ }
+
+ void reset() override {
+ _stats.reset();
+ setSum(Decimal());
+ }
+
+ void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+ pbStats.set_hasnull(_stats.hasNull());
+ pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+ proto::DecimalStatistics* decStats = pbStats.mutable_decimalstatistics();
+ if (_stats.hasMinimum()) {
+ decStats->set_minimum(TString(_stats.getMinimum().toString()));
+ decStats->set_maximum(TString(_stats.getMaximum().toString()));
+ } else {
+ decStats->clear_minimum();
+ decStats->clear_maximum();
+ }
+ if (_stats.hasSum()) {
+ decStats->set_sum(TString(_stats.getSum().toString()));
+ } else {
+ decStats->clear_sum();
+ }
+ }
+
+ std::string toString() const override {
+ std::ostringstream buffer;
+ buffer << "Data type: Decimal" << std::endl
+ << "Values: " << getNumberOfValues() << std::endl
+ << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+ if(hasMinimum()){
+ buffer << "Minimum: " << getMinimum().toString() << std::endl;
+ }else{
+ buffer << "Minimum: not defined" << std::endl;
+ }
+
+ if(hasMaximum()){
+ buffer << "Maximum: " << getMaximum().toString() << std::endl;
+ }else{
+ buffer << "Maximum: not defined" << std::endl;
+ }
+
+ if(hasSum()){
+ buffer << "Sum: " << getSum().toString() << std::endl;
+ }else{
+ buffer << "Sum: not defined" << std::endl;
+ }
+
+ return buffer.str();
+ }
+
+ private:
+ void updateSum(Decimal value) {
+ if (_stats.hasSum()) {
+ bool overflow = false;
+ Decimal sum = _stats.getSum();
+ if (sum.scale > value.scale) {
+ value.value = scaleUpInt128ByPowerOfTen(value.value,
+ sum.scale - value.scale,
+ overflow);
+ } else if (sum.scale < value.scale) {
+ sum.value = scaleUpInt128ByPowerOfTen(sum.value,
+ value.scale - sum.scale,
+ overflow);
+ sum.scale = value.scale;
+ }
+
+ if (!overflow) {
+ bool wasPositive = sum.value >= 0;
+ sum.value += value.value;
+ if ((value.value >= 0) == wasPositive) {
+ _stats.setHasSum((sum.value >= 0) == wasPositive);
+ }
+ } else {
+ _stats.setHasSum(false);
+ }
+
+ if (_stats.hasSum()) {
+ _stats.setSum(sum);
+ }
+ }
+ }
+ };
+
+ class DoubleColumnStatisticsImpl: public DoubleColumnStatistics,
+ public MutableColumnStatistics {
+ private:
+ InternalDoubleStatistics _stats;
+ public:
+ DoubleColumnStatisticsImpl() { reset(); }
+ DoubleColumnStatisticsImpl(const proto::ColumnStatistics& stats);
+ virtual ~DoubleColumnStatisticsImpl() override;
+
+ bool hasMinimum() const override {
+ return _stats.hasMinimum();
+ }
+
+ bool hasMaximum() const override {
+ return _stats.hasMaximum();
+ }
+
+ bool hasSum() const override {
+ return _stats.hasSum();
+ }
+
+ void increase(uint64_t count) override {
+ _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+ }
+
+ uint64_t getNumberOfValues() const override {
+ return _stats.getNumberOfValues();
+ }
+
+ void setNumberOfValues(uint64_t value) override {
+ _stats.setNumberOfValues(value);
+ }
+
+ bool hasNull() const override {
+ return _stats.hasNull();
+ }
+
+ void setHasNull(bool hasNull) override {
+ _stats.setHasNull(hasNull);
+ }
+
+ double getMinimum() const override {
+ if(hasMinimum()){
+ return _stats.getMinimum();
+ }else{
+ throw ParseError("Minimum is not defined.");
+ }
+ }
+
+ double getMaximum() const override {
+ if(hasMaximum()){
+ return _stats.getMaximum();
+ }else{
+ throw ParseError("Maximum is not defined.");
+ }
+ }
+
+ void setMinimum(double minimum) {
+ _stats.setHasMinimum(true);
+ _stats.setMinimum(minimum);
+ }
+
+ void setMaximum(double maximum) {
+ _stats.setHasMaximum(true);
+ _stats.setMaximum(maximum);
+ }
+
+ double getSum() const override {
+ if(hasSum()){
+ return _stats.getSum();
+ }else{
+ throw ParseError("Sum is not defined.");
+ }
+ }
+
+ void setSum(double sum) {
+ _stats.setHasSum(true);
+ _stats.setSum(sum);
+ }
+
+ void update(double value) {
+ _stats.updateMinMax(value);
+ _stats.setSum(_stats.getSum() + value);
+ }
+
+ void merge(const MutableColumnStatistics& other) override {
+ const DoubleColumnStatisticsImpl& doubleStats =
+ dynamic_cast<const DoubleColumnStatisticsImpl&>(other);
+ _stats.merge(doubleStats._stats);
+
+ _stats.setHasSum(_stats.hasSum() && doubleStats.hasSum());
+ if (_stats.hasSum()) {
+ _stats.setSum(_stats.getSum() + doubleStats.getSum());
+ }
+ }
+
+ void reset() override {
+ _stats.reset();
+ setSum(0.0);
+ }
+
+ void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+ pbStats.set_hasnull(_stats.hasNull());
+ pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+ proto::DoubleStatistics* doubleStats = pbStats.mutable_doublestatistics();
+ if (_stats.hasMinimum()) {
+ doubleStats->set_minimum(_stats.getMinimum());
+ doubleStats->set_maximum(_stats.getMaximum());
+ } else {
+ doubleStats->clear_minimum();
+ doubleStats->clear_maximum();
+ }
+ if (_stats.hasSum()) {
+ doubleStats->set_sum(_stats.getSum());
+ } else {
+ doubleStats->clear_sum();
+ }
+ }
+
+ std::string toString() const override {
+ std::ostringstream buffer;
+ buffer << "Data type: Double" << std::endl
+ << "Values: " << getNumberOfValues() << std::endl
+ << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+ if(hasMinimum()){
+ buffer << "Minimum: " << getMinimum() << std::endl;
+ }else{
+ buffer << "Minimum: not defined" << std::endl;
+ }
+
+ if(hasMaximum()){
+ buffer << "Maximum: " << getMaximum() << std::endl;
+ }else{
+ buffer << "Maximum: not defined" << std::endl;
+ }
+
+ if(hasSum()){
+ buffer << "Sum: " << getSum() << std::endl;
+ }else{
+ buffer << "Sum: not defined" << std::endl;
+ }
+ return buffer.str();
+ }
+ };
+
+ class IntegerColumnStatisticsImpl: public IntegerColumnStatistics,
+ public MutableColumnStatistics {
+ private:
+ InternalIntegerStatistics _stats;
+ public:
+ IntegerColumnStatisticsImpl() { reset(); }
+ IntegerColumnStatisticsImpl(const proto::ColumnStatistics& stats);
+ virtual ~IntegerColumnStatisticsImpl() override;
+
+ bool hasMinimum() const override {
+ return _stats.hasMinimum();
+ }
+
+ bool hasMaximum() const override {
+ return _stats.hasMaximum();
+ }
+
+ bool hasSum() const override {
+ return _stats.hasSum();
+ }
+
+ void increase(uint64_t count) override {
+ _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+ }
+
+ uint64_t getNumberOfValues() const override {
+ return _stats.getNumberOfValues();
+ }
+
+ void setNumberOfValues(uint64_t value) override {
+ _stats.setNumberOfValues(value);
+ }
+
+ bool hasNull() const override {
+ return _stats.hasNull();
+ }
+
+ void setHasNull(bool hasNull) override {
+ _stats.setHasNull(hasNull);
+ }
+
+ int64_t getMinimum() const override {
+ if(hasMinimum()){
+ return _stats.getMinimum();
+ }else{
+ throw ParseError("Minimum is not defined.");
+ }
+ }
+
+ int64_t getMaximum() const override {
+ if(hasMaximum()){
+ return _stats.getMaximum();
+ }else{
+ throw ParseError("Maximum is not defined.");
+ }
+ }
+
+ void setMinimum(int64_t minimum) {
+ _stats.setHasMinimum(true);
+ _stats.setMinimum(minimum);
+ }
+
+ void setMaximum(int64_t maximum) {
+ _stats.setHasMaximum(true);
+ _stats.setMaximum(maximum);
+ }
+
+ int64_t getSum() const override {
+ if(hasSum()){
+ return _stats.getSum();
+ }else{
+ throw ParseError("Sum is not defined.");
+ }
+ }
+
+ void setSum(int64_t sum) {
+ _stats.setHasSum(true);
+ _stats.setSum(sum);
+ }
+
void update(int64_t value, int repetitions) {
_stats.updateMinMax(value);
-
+
if (_stats.hasSum()) {
if (repetitions > 1) {
_stats.setHasSum(multiplyExact(value, repetitions, &value));
@@ -981,498 +981,498 @@ namespace orc {
}
}
- void merge(const MutableColumnStatistics& other) override {
- const IntegerColumnStatisticsImpl& intStats =
- dynamic_cast<const IntegerColumnStatisticsImpl&>(other);
-
- _stats.merge(intStats._stats);
-
- // update sum and check overflow
- _stats.setHasSum(_stats.hasSum() && intStats.hasSum());
- if (_stats.hasSum()) {
+ void merge(const MutableColumnStatistics& other) override {
+ const IntegerColumnStatisticsImpl& intStats =
+ dynamic_cast<const IntegerColumnStatisticsImpl&>(other);
+
+ _stats.merge(intStats._stats);
+
+ // update sum and check overflow
+ _stats.setHasSum(_stats.hasSum() && intStats.hasSum());
+ if (_stats.hasSum()) {
int64_t value;
_stats.setHasSum(addExact(_stats.getSum(), intStats.getSum(), &value));
if (_stats.hasSum()) {
_stats.setSum(value);
- }
- }
- }
-
- void reset() override {
- _stats.reset();
- setSum(0);
- }
-
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
-
- proto::IntegerStatistics* intStats = pbStats.mutable_intstatistics();
- if (_stats.hasMinimum()) {
- intStats->set_minimum(_stats.getMinimum());
- intStats->set_maximum(_stats.getMaximum());
- } else {
- intStats->clear_minimum();
- intStats->clear_maximum();
- }
- if (_stats.hasSum()) {
- intStats->set_sum(_stats.getSum());
- } else {
- intStats->clear_sum();
- }
- }
-
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: Integer" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if(hasMinimum()){
- buffer << "Minimum: " << getMinimum() << std::endl;
- }else{
- buffer << "Minimum: not defined" << std::endl;
- }
-
- if(hasMaximum()){
- buffer << "Maximum: " << getMaximum() << std::endl;
- }else{
- buffer << "Maximum: not defined" << std::endl;
- }
-
- if(hasSum()){
- buffer << "Sum: " << getSum() << std::endl;
- }else{
- buffer << "Sum: not defined" << std::endl;
- }
- return buffer.str();
- }
- };
-
- class StringColumnStatisticsImpl: public StringColumnStatistics,
- public MutableColumnStatistics{
- private:
- InternalStringStatistics _stats;
-
- public:
- StringColumnStatisticsImpl() {
- reset();
- }
- StringColumnStatisticsImpl(const proto::ColumnStatistics& stats,
- const StatContext& statContext);
- virtual ~StringColumnStatisticsImpl() override;
-
- bool hasMinimum() const override {
- return _stats.hasMinimum();
- }
-
- bool hasMaximum() const override {
- return _stats.hasMaximum();
- }
-
- bool hasTotalLength() const override {
- return _stats.hasTotalLength();
- }
-
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
-
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
-
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
-
- bool hasNull() const override {
- return _stats.hasNull();
- }
-
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
-
+ }
+ }
+ }
+
+ void reset() override {
+ _stats.reset();
+ setSum(0);
+ }
+
+ void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+ pbStats.set_hasnull(_stats.hasNull());
+ pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+ proto::IntegerStatistics* intStats = pbStats.mutable_intstatistics();
+ if (_stats.hasMinimum()) {
+ intStats->set_minimum(_stats.getMinimum());
+ intStats->set_maximum(_stats.getMaximum());
+ } else {
+ intStats->clear_minimum();
+ intStats->clear_maximum();
+ }
+ if (_stats.hasSum()) {
+ intStats->set_sum(_stats.getSum());
+ } else {
+ intStats->clear_sum();
+ }
+ }
+
+ std::string toString() const override {
+ std::ostringstream buffer;
+ buffer << "Data type: Integer" << std::endl
+ << "Values: " << getNumberOfValues() << std::endl
+ << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+ if(hasMinimum()){
+ buffer << "Minimum: " << getMinimum() << std::endl;
+ }else{
+ buffer << "Minimum: not defined" << std::endl;
+ }
+
+ if(hasMaximum()){
+ buffer << "Maximum: " << getMaximum() << std::endl;
+ }else{
+ buffer << "Maximum: not defined" << std::endl;
+ }
+
+ if(hasSum()){
+ buffer << "Sum: " << getSum() << std::endl;
+ }else{
+ buffer << "Sum: not defined" << std::endl;
+ }
+ return buffer.str();
+ }
+ };
+
+ class StringColumnStatisticsImpl: public StringColumnStatistics,
+ public MutableColumnStatistics{
+ private:
+ InternalStringStatistics _stats;
+
+ public:
+ StringColumnStatisticsImpl() {
+ reset();
+ }
+ StringColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+ const StatContext& statContext);
+ virtual ~StringColumnStatisticsImpl() override;
+
+ bool hasMinimum() const override {
+ return _stats.hasMinimum();
+ }
+
+ bool hasMaximum() const override {
+ return _stats.hasMaximum();
+ }
+
+ bool hasTotalLength() const override {
+ return _stats.hasTotalLength();
+ }
+
+ void increase(uint64_t count) override {
+ _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+ }
+
+ uint64_t getNumberOfValues() const override {
+ return _stats.getNumberOfValues();
+ }
+
+ void setNumberOfValues(uint64_t value) override {
+ _stats.setNumberOfValues(value);
+ }
+
+ bool hasNull() const override {
+ return _stats.hasNull();
+ }
+
+ void setHasNull(bool hasNull) override {
+ _stats.setHasNull(hasNull);
+ }
+
const std::string & getMinimum() const override {
- if(hasMinimum()){
- return _stats.getMinimum();
- }else{
- throw ParseError("Minimum is not defined.");
- }
- }
-
+ if(hasMinimum()){
+ return _stats.getMinimum();
+ }else{
+ throw ParseError("Minimum is not defined.");
+ }
+ }
+
const std::string & getMaximum() const override {
- if(hasMaximum()){
- return _stats.getMaximum();
- }else{
- throw ParseError("Maximum is not defined.");
- }
- }
-
- void setMinimum(std::string minimum) {
- _stats.setHasMinimum(true);
- _stats.setMinimum(minimum);
- }
-
- void setMaximum(std::string maximum) {
- _stats.setHasMaximum(true);
- _stats.setMaximum(maximum);
- }
-
- uint64_t getTotalLength() const override {
- if(hasTotalLength()){
- return _stats.getTotalLength();
- }else{
- throw ParseError("Total length is not defined.");
- }
- }
-
- void setTotalLength(uint64_t length) {
- _stats.setHasTotalLength(true);
- _stats.setTotalLength(length);
- }
-
- void update(const char* value, size_t length) {
- if (value != nullptr) {
- if (!_stats.hasMinimum()) {
- std::string tempStr(value, value + length);
- setMinimum(tempStr);
- setMaximum(tempStr);
- } else {
- // update min
- int minCmp = strncmp(_stats.getMinimum().c_str(),
- value,
- std::min(_stats.getMinimum().length(), length));
- if (minCmp > 0 ||
- (minCmp == 0 && length < _stats.getMinimum().length())) {
- setMinimum(std::string(value, value + length));
- }
-
- // update max
- int maxCmp = strncmp(_stats.getMaximum().c_str(),
- value,
- std::min(_stats.getMaximum().length(), length));
- if (maxCmp < 0 ||
- (maxCmp == 0 && length > _stats.getMaximum().length())) {
- setMaximum(std::string(value, value + length));
- }
- }
- }
-
- _stats.setTotalLength(_stats.getTotalLength() + length);
- }
-
- void update(std::string value) {
- update(value.c_str(), value.length());
- }
-
- void merge(const MutableColumnStatistics& other) override {
- const StringColumnStatisticsImpl& strStats =
- dynamic_cast<const StringColumnStatisticsImpl&>(other);
- _stats.merge(strStats._stats);
- }
-
- void reset() override {
- _stats.reset();
- setTotalLength(0);
- }
-
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
-
- proto::StringStatistics* strStats = pbStats.mutable_stringstatistics();
- if (_stats.hasMinimum()) {
- strStats->set_minimum(TString(_stats.getMinimum()));
- strStats->set_maximum(TString(_stats.getMaximum()));
- } else {
- strStats->clear_minimum();
- strStats->clear_maximum();
- }
- if (_stats.hasTotalLength()) {
- strStats->set_sum(static_cast<int64_t>(_stats.getTotalLength()));
- } else {
- strStats->clear_sum();
- }
- }
-
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: String" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if(hasMinimum()){
- buffer << "Minimum: " << getMinimum() << std::endl;
- }else{
- buffer << "Minimum is not defined" << std::endl;
- }
-
- if(hasMaximum()){
- buffer << "Maximum: " << getMaximum() << std::endl;
- }else{
- buffer << "Maximum is not defined" << std::endl;
- }
-
- if(hasTotalLength()){
- buffer << "Total length: " << getTotalLength() << std::endl;
- }else{
- buffer << "Total length is not defined" << std::endl;
- }
- return buffer.str();
- }
- };
-
- class TimestampColumnStatisticsImpl: public TimestampColumnStatistics,
- public MutableColumnStatistics {
- private:
- InternalIntegerStatistics _stats;
- bool _hasLowerBound;
- bool _hasUpperBound;
- int64_t _lowerBound;
- int64_t _upperBound;
-
- public:
- TimestampColumnStatisticsImpl() { reset(); }
- TimestampColumnStatisticsImpl(const proto::ColumnStatistics& stats,
- const StatContext& statContext);
- virtual ~TimestampColumnStatisticsImpl() override;
-
- bool hasMinimum() const override {
- return _stats.hasMinimum();
- }
-
- bool hasMaximum() const override {
- return _stats.hasMaximum();
- }
-
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
-
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
-
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
-
- bool hasNull() const override {
- return _stats.hasNull();
- }
-
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
-
- int64_t getMinimum() const override {
- if(hasMinimum()){
- return _stats.getMinimum();
- }else{
- throw ParseError("Minimum is not defined.");
- }
- }
-
- int64_t getMaximum() const override {
- if(hasMaximum()){
- return _stats.getMaximum();
- }else{
- throw ParseError("Maximum is not defined.");
- }
- }
-
- void setMinimum(int64_t minimum) {
- _stats.setHasMinimum(true);
- _stats.setMinimum(minimum);
- }
-
- void setMaximum(int64_t maximum) {
- _stats.setHasMaximum(true);
- _stats.setMaximum(maximum);
- }
-
- void update(int64_t value) {
- _stats.updateMinMax(value);
- }
-
- void merge(const MutableColumnStatistics& other) override {
- const TimestampColumnStatisticsImpl& tsStats =
- dynamic_cast<const TimestampColumnStatisticsImpl&>(other);
- _stats.merge(tsStats._stats);
- }
-
- void reset() override {
- _stats.reset();
- }
-
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
-
- proto::TimestampStatistics* tsStats =
- pbStats.mutable_timestampstatistics();
- if (_stats.hasMinimum()) {
- tsStats->set_minimumutc(_stats.getMinimum());
- tsStats->set_maximumutc(_stats.getMaximum());
- } else {
- tsStats->clear_minimumutc();
- tsStats->clear_maximumutc();
- }
- }
-
- std::string toString() const override {
- std::ostringstream buffer;
- struct tm tmValue;
- char timeBuffer[20];
- time_t secs = 0;
-
- buffer << "Data type: Timestamp" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if(hasMinimum()){
- secs = static_cast<time_t>(getMinimum() / 1000);
- gmtime_r(&secs, &tmValue);
- strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
- buffer << "Minimum: " << timeBuffer << "."
- << (getMinimum() % 1000) << std::endl;
- }else{
- buffer << "Minimum is not defined" << std::endl;
- }
-
- if(hasLowerBound()){
- secs = static_cast<time_t>(getLowerBound() / 1000);
- gmtime_r(&secs, &tmValue);
- strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
- buffer << "LowerBound: " << timeBuffer << "."
- << (getLowerBound() % 1000) << std::endl;
- }else{
- buffer << "LowerBound is not defined" << std::endl;
- }
-
- if(hasMaximum()){
- secs = static_cast<time_t>(getMaximum()/1000);
- gmtime_r(&secs, &tmValue);
- strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
- buffer << "Maximum: " << timeBuffer << "."
- << (getMaximum() % 1000) << std::endl;
- }else{
- buffer << "Maximum is not defined" << std::endl;
- }
-
- if(hasUpperBound()){
- secs = static_cast<time_t>(getUpperBound() / 1000);
- gmtime_r(&secs, &tmValue);
- strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
- buffer << "UpperBound: " << timeBuffer << "."
- << (getUpperBound() % 1000) << std::endl;
- }else{
- buffer << "UpperBound is not defined" << std::endl;
- }
-
- return buffer.str();
- }
-
- bool hasLowerBound() const override {
- return _hasLowerBound;
- }
-
- bool hasUpperBound() const override {
- return _hasUpperBound;
- }
-
- int64_t getLowerBound() const override {
- if(hasLowerBound()){
- return _lowerBound;
- }else{
- throw ParseError("LowerBound is not defined.");
- }
- }
-
- int64_t getUpperBound() const override {
- if(hasUpperBound()){
- return _upperBound;
- }else{
- throw ParseError("UpperBound is not defined.");
- }
- }
- };
-
- ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s,
- const StatContext& statContext);
-
- class StatisticsImpl: public Statistics {
- private:
- std::vector<ColumnStatistics*> colStats;
-
- // DELIBERATELY NOT IMPLEMENTED
- StatisticsImpl(const StatisticsImpl&);
- StatisticsImpl& operator=(const StatisticsImpl&);
-
- public:
- StatisticsImpl(const proto::StripeStatistics& stripeStats,
- const StatContext& statContext);
-
- StatisticsImpl(const proto::Footer& footer, const StatContext& statContext);
-
- virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId
- ) const override {
- return colStats[columnId];
- }
-
- virtual ~StatisticsImpl() override;
-
- uint32_t getNumberOfColumns() const override {
- return static_cast<uint32_t>(colStats.size());
- }
- };
-
- class StripeStatisticsImpl: public StripeStatistics {
- private:
- std::unique_ptr<StatisticsImpl> columnStats;
- std::vector<std::vector<std::shared_ptr<const ColumnStatistics> > >
- rowIndexStats;
-
- // DELIBERATELY NOT IMPLEMENTED
- StripeStatisticsImpl(const StripeStatisticsImpl&);
- StripeStatisticsImpl& operator=(const StripeStatisticsImpl&);
-
- public:
- StripeStatisticsImpl(
- const proto::StripeStatistics& stripeStats,
- std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
- const StatContext& statContext);
-
- virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId
- ) const override {
- return columnStats->getColumnStatistics(columnId);
- }
-
- uint32_t getNumberOfColumns() const override {
- return columnStats->getNumberOfColumns();
- }
-
- virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId,
- uint32_t rowIndex
- ) const override {
- // check id indices are valid
- return rowIndexStats[columnId][rowIndex].get();
- }
-
- virtual ~StripeStatisticsImpl() override;
-
- uint32_t getNumberOfRowIndexStats(uint32_t columnId) const override {
- return static_cast<uint32_t>(rowIndexStats[columnId].size());
- }
- };
-
- /**
- * Create ColumnStatistics for writers
- * @param type of column
- * @return MutableColumnStatistics instances
- */
- std::unique_ptr<MutableColumnStatistics> createColumnStatistics(
- const Type& type);
-
-}// namespace
-
-#endif
+ if(hasMaximum()){
+ return _stats.getMaximum();
+ }else{
+ throw ParseError("Maximum is not defined.");
+ }
+ }
+
+ void setMinimum(std::string minimum) {
+ _stats.setHasMinimum(true);
+ _stats.setMinimum(minimum);
+ }
+
+ void setMaximum(std::string maximum) {
+ _stats.setHasMaximum(true);
+ _stats.setMaximum(maximum);
+ }
+
+ uint64_t getTotalLength() const override {
+ if(hasTotalLength()){
+ return _stats.getTotalLength();
+ }else{
+ throw ParseError("Total length is not defined.");
+ }
+ }
+
+ void setTotalLength(uint64_t length) {
+ _stats.setHasTotalLength(true);
+ _stats.setTotalLength(length);
+ }
+
+ void update(const char* value, size_t length) {
+ if (value != nullptr) {
+ if (!_stats.hasMinimum()) {
+ std::string tempStr(value, value + length);
+ setMinimum(tempStr);
+ setMaximum(tempStr);
+ } else {
+ // update min
+ int minCmp = strncmp(_stats.getMinimum().c_str(),
+ value,
+ std::min(_stats.getMinimum().length(), length));
+ if (minCmp > 0 ||
+ (minCmp == 0 && length < _stats.getMinimum().length())) {
+ setMinimum(std::string(value, value + length));
+ }
+
+ // update max
+ int maxCmp = strncmp(_stats.getMaximum().c_str(),
+ value,
+ std::min(_stats.getMaximum().length(), length));
+ if (maxCmp < 0 ||
+ (maxCmp == 0 && length > _stats.getMaximum().length())) {
+ setMaximum(std::string(value, value + length));
+ }
+ }
+ }
+
+ _stats.setTotalLength(_stats.getTotalLength() + length);
+ }
+
+ void update(std::string value) {
+ update(value.c_str(), value.length());
+ }
+
+ void merge(const MutableColumnStatistics& other) override {
+ const StringColumnStatisticsImpl& strStats =
+ dynamic_cast<const StringColumnStatisticsImpl&>(other);
+ _stats.merge(strStats._stats);
+ }
+
+ void reset() override {
+ _stats.reset();
+ setTotalLength(0);
+ }
+
+ void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+ pbStats.set_hasnull(_stats.hasNull());
+ pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+ proto::StringStatistics* strStats = pbStats.mutable_stringstatistics();
+ if (_stats.hasMinimum()) {
+ strStats->set_minimum(TString(_stats.getMinimum()));
+ strStats->set_maximum(TString(_stats.getMaximum()));
+ } else {
+ strStats->clear_minimum();
+ strStats->clear_maximum();
+ }
+ if (_stats.hasTotalLength()) {
+ strStats->set_sum(static_cast<int64_t>(_stats.getTotalLength()));
+ } else {
+ strStats->clear_sum();
+ }
+ }
+
+ std::string toString() const override {
+ std::ostringstream buffer;
+ buffer << "Data type: String" << std::endl
+ << "Values: " << getNumberOfValues() << std::endl
+ << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+ if(hasMinimum()){
+ buffer << "Minimum: " << getMinimum() << std::endl;
+ }else{
+ buffer << "Minimum is not defined" << std::endl;
+ }
+
+ if(hasMaximum()){
+ buffer << "Maximum: " << getMaximum() << std::endl;
+ }else{
+ buffer << "Maximum is not defined" << std::endl;
+ }
+
+ if(hasTotalLength()){
+ buffer << "Total length: " << getTotalLength() << std::endl;
+ }else{
+ buffer << "Total length is not defined" << std::endl;
+ }
+ return buffer.str();
+ }
+ };
+
+ class TimestampColumnStatisticsImpl: public TimestampColumnStatistics,
+ public MutableColumnStatistics {
+ private:
+ InternalIntegerStatistics _stats;
+ bool _hasLowerBound;
+ bool _hasUpperBound;
+ int64_t _lowerBound;
+ int64_t _upperBound;
+
+ public:
+ TimestampColumnStatisticsImpl() { reset(); }
+ TimestampColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+ const StatContext& statContext);
+ virtual ~TimestampColumnStatisticsImpl() override;
+
+ bool hasMinimum() const override {
+ return _stats.hasMinimum();
+ }
+
+ bool hasMaximum() const override {
+ return _stats.hasMaximum();
+ }
+
+ uint64_t getNumberOfValues() const override {
+ return _stats.getNumberOfValues();
+ }
+
+ void setNumberOfValues(uint64_t value) override {
+ _stats.setNumberOfValues(value);
+ }
+
+ void increase(uint64_t count) override {
+ _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
+ }
+
+ bool hasNull() const override {
+ return _stats.hasNull();
+ }
+
+ void setHasNull(bool hasNull) override {
+ _stats.setHasNull(hasNull);
+ }
+
+ int64_t getMinimum() const override {
+ if(hasMinimum()){
+ return _stats.getMinimum();
+ }else{
+ throw ParseError("Minimum is not defined.");
+ }
+ }
+
+ int64_t getMaximum() const override {
+ if(hasMaximum()){
+ return _stats.getMaximum();
+ }else{
+ throw ParseError("Maximum is not defined.");
+ }
+ }
+
+ void setMinimum(int64_t minimum) {
+ _stats.setHasMinimum(true);
+ _stats.setMinimum(minimum);
+ }
+
+ void setMaximum(int64_t maximum) {
+ _stats.setHasMaximum(true);
+ _stats.setMaximum(maximum);
+ }
+
+ void update(int64_t value) {
+ _stats.updateMinMax(value);
+ }
+
+ void merge(const MutableColumnStatistics& other) override {
+ const TimestampColumnStatisticsImpl& tsStats =
+ dynamic_cast<const TimestampColumnStatisticsImpl&>(other);
+ _stats.merge(tsStats._stats);
+ }
+
+ void reset() override {
+ _stats.reset();
+ }
+
+ void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+ pbStats.set_hasnull(_stats.hasNull());
+ pbStats.set_numberofvalues(_stats.getNumberOfValues());
+
+ proto::TimestampStatistics* tsStats =
+ pbStats.mutable_timestampstatistics();
+ if (_stats.hasMinimum()) {
+ tsStats->set_minimumutc(_stats.getMinimum());
+ tsStats->set_maximumutc(_stats.getMaximum());
+ } else {
+ tsStats->clear_minimumutc();
+ tsStats->clear_maximumutc();
+ }
+ }
+
+ std::string toString() const override {
+ std::ostringstream buffer;
+ struct tm tmValue;
+ char timeBuffer[20];
+ time_t secs = 0;
+
+ buffer << "Data type: Timestamp" << std::endl
+ << "Values: " << getNumberOfValues() << std::endl
+ << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
+ if(hasMinimum()){
+ secs = static_cast<time_t>(getMinimum() / 1000);
+ gmtime_r(&secs, &tmValue);
+ strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
+ buffer << "Minimum: " << timeBuffer << "."
+ << (getMinimum() % 1000) << std::endl;
+ }else{
+ buffer << "Minimum is not defined" << std::endl;
+ }
+
+ if(hasLowerBound()){
+ secs = static_cast<time_t>(getLowerBound() / 1000);
+ gmtime_r(&secs, &tmValue);
+ strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
+ buffer << "LowerBound: " << timeBuffer << "."
+ << (getLowerBound() % 1000) << std::endl;
+ }else{
+ buffer << "LowerBound is not defined" << std::endl;
+ }
+
+ if(hasMaximum()){
+ secs = static_cast<time_t>(getMaximum()/1000);
+ gmtime_r(&secs, &tmValue);
+ strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
+ buffer << "Maximum: " << timeBuffer << "."
+ << (getMaximum() % 1000) << std::endl;
+ }else{
+ buffer << "Maximum is not defined" << std::endl;
+ }
+
+ if(hasUpperBound()){
+ secs = static_cast<time_t>(getUpperBound() / 1000);
+ gmtime_r(&secs, &tmValue);
+ strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
+ buffer << "UpperBound: " << timeBuffer << "."
+ << (getUpperBound() % 1000) << std::endl;
+ }else{
+ buffer << "UpperBound is not defined" << std::endl;
+ }
+
+ return buffer.str();
+ }
+
+ bool hasLowerBound() const override {
+ return _hasLowerBound;
+ }
+
+ bool hasUpperBound() const override {
+ return _hasUpperBound;
+ }
+
+ int64_t getLowerBound() const override {
+ if(hasLowerBound()){
+ return _lowerBound;
+ }else{
+ throw ParseError("LowerBound is not defined.");
+ }
+ }
+
+ int64_t getUpperBound() const override {
+ if(hasUpperBound()){
+ return _upperBound;
+ }else{
+ throw ParseError("UpperBound is not defined.");
+ }
+ }
+ };
+
+ ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s,
+ const StatContext& statContext);
+
+ class StatisticsImpl: public Statistics {
+ private:
+ std::vector<ColumnStatistics*> colStats;
+
+ // DELIBERATELY NOT IMPLEMENTED
+ StatisticsImpl(const StatisticsImpl&);
+ StatisticsImpl& operator=(const StatisticsImpl&);
+
+ public:
+ StatisticsImpl(const proto::StripeStatistics& stripeStats,
+ const StatContext& statContext);
+
+ StatisticsImpl(const proto::Footer& footer, const StatContext& statContext);
+
+ virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId
+ ) const override {
+ return colStats[columnId];
+ }
+
+ virtual ~StatisticsImpl() override;
+
+ uint32_t getNumberOfColumns() const override {
+ return static_cast<uint32_t>(colStats.size());
+ }
+ };
+
+ class StripeStatisticsImpl: public StripeStatistics {
+ private:
+ std::unique_ptr<StatisticsImpl> columnStats;
+ std::vector<std::vector<std::shared_ptr<const ColumnStatistics> > >
+ rowIndexStats;
+
+ // DELIBERATELY NOT IMPLEMENTED
+ StripeStatisticsImpl(const StripeStatisticsImpl&);
+ StripeStatisticsImpl& operator=(const StripeStatisticsImpl&);
+
+ public:
+ StripeStatisticsImpl(
+ const proto::StripeStatistics& stripeStats,
+ std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
+ const StatContext& statContext);
+
+ virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId
+ ) const override {
+ return columnStats->getColumnStatistics(columnId);
+ }
+
+ uint32_t getNumberOfColumns() const override {
+ return columnStats->getNumberOfColumns();
+ }
+
+ virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId,
+ uint32_t rowIndex
+ ) const override {
+ // check id indices are valid
+ return rowIndexStats[columnId][rowIndex].get();
+ }
+
+ virtual ~StripeStatisticsImpl() override;
+
+ uint32_t getNumberOfRowIndexStats(uint32_t columnId) const override {
+ return static_cast<uint32_t>(rowIndexStats[columnId].size());
+ }
+ };
+
+ /**
+ * Create ColumnStatistics for writers
+ * @param type of column
+ * @return MutableColumnStatistics instances
+ */
+ std::unique_ptr<MutableColumnStatistics> createColumnStatistics(
+ const Type& type);
+
+}// namespace
+
+#endif