diff options
author | Aidar Samerkhanov <aidarsamer@yandex-team.ru> | 2022-02-09 18:18:45 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 15:58:17 +0300 |
commit | e65c50047c24f91dcd6454edcd9b0e4bf9a2ae2a (patch) | |
tree | 379a6851244b5c9ff9c3d7994c26126b9b712942 /library/cpp/histogram | |
parent | 542b4cb3a3bbb51b5a1cdedd117ee52a1e8f2032 (diff) | |
download | ydb-e65c50047c24f91dcd6454edcd9b0e4bf9a2ae2a.tar.gz |
KIKIMR-13365. Add workload commands to ydb cli.
KIKIMR-13365. Add workload command to YDB cli.
ref:d1b633b524f135ff2d0f23ee7534c1f67268be75
Diffstat (limited to 'library/cpp/histogram')
-rw-r--r-- | library/cpp/histogram/hdr/histogram.cpp | 155 | ||||
-rw-r--r-- | library/cpp/histogram/hdr/histogram.h | 303 | ||||
-rw-r--r-- | library/cpp/histogram/hdr/histogram_iter.cpp | 146 | ||||
-rw-r--r-- | library/cpp/histogram/hdr/histogram_iter.h | 231 | ||||
-rw-r--r-- | library/cpp/histogram/hdr/histogram_iter_ut.cpp | 210 | ||||
-rw-r--r-- | library/cpp/histogram/hdr/histogram_ut.cpp | 178 | ||||
-rw-r--r-- | library/cpp/histogram/hdr/ut/ya.make | 13 | ||||
-rw-r--r-- | library/cpp/histogram/hdr/ya.make | 17 |
8 files changed, 1253 insertions, 0 deletions
diff --git a/library/cpp/histogram/hdr/histogram.cpp b/library/cpp/histogram/hdr/histogram.cpp new file mode 100644 index 0000000000..a213d5d8fd --- /dev/null +++ b/library/cpp/histogram/hdr/histogram.cpp @@ -0,0 +1,155 @@ +#include "histogram.h" + +#include <util/generic/cast.h> +#include <util/generic/yexception.h> + +#include <contrib/libs/hdr_histogram/src/hdr_histogram.h> + +namespace NHdr { + namespace { + struct hdr_histogram* CreateHistogram( + i64 lowestDiscernibleValue, i64 highestTrackableValue, + i32 numberOfSignificantValueDigits, IAllocator* allocator) { + struct hdr_histogram_bucket_config cfg; + + int r = hdr_calculate_bucket_config( + lowestDiscernibleValue, highestTrackableValue, + numberOfSignificantValueDigits, &cfg); + if (r) { + ythrow yexception() << "illegal arguments values"; + } + + size_t histogramSize = sizeof(struct hdr_histogram) + + cfg.counts_len * sizeof(i64); + + IAllocator::TBlock mem = allocator->Allocate(histogramSize); + struct hdr_histogram* histogram = + reinterpret_cast<struct hdr_histogram*>(mem.Data); + + // memset will ensure that all of the function pointers are null + memset(histogram, 0, histogramSize); + + hdr_init_preallocated(histogram, &cfg); + return histogram; + } + + } + + THistogram::THistogram(i64 lowestDiscernibleValue, i64 highestTrackableValue, + i32 numberOfSignificantValueDigits, IAllocator* allocator) + : Data_(CreateHistogram( + lowestDiscernibleValue, highestTrackableValue, + numberOfSignificantValueDigits, allocator)) + , Allocator_(allocator) + { + } + + THistogram::~THistogram() { + if (Data_) { + size_t size = GetMemorySize(); + Allocator_->Release({Data_.Release(), size}); + } + } + + // Histogram structure querying support ----------------------------------- + + i64 THistogram::GetLowestDiscernibleValue() const { + return Data_->lowest_trackable_value; + } + + i64 THistogram::GetHighestTrackableValue() const { + return Data_->highest_trackable_value; + } + + i32 THistogram::GetNumberOfSignificantValueDigits() const { + return Data_->significant_figures; + } + + size_t THistogram::GetMemorySize() const { + return hdr_get_memory_size(Data_.Get()); + } + + i32 THistogram::GetCountsLen() const { + return Data_->counts_len; + } + + i64 THistogram::GetTotalCount() const { + return Data_->total_count; + } + + // Value recording support ------------------------------------------------ + + bool THistogram::RecordValue(i64 value) { + return hdr_record_value(Data_.Get(), value); + } + + bool THistogram::RecordValues(i64 value, i64 count) { + return hdr_record_values(Data_.Get(), value, count); + } + + bool THistogram::RecordValueWithExpectedInterval(i64 value, i64 expectedInterval) { + return hdr_record_corrected_value(Data_.Get(), value, expectedInterval); + } + + bool THistogram::RecordValuesWithExpectedInterval( + i64 value, i64 count, i64 expectedInterval) { + return hdr_record_corrected_values( + Data_.Get(), value, count, expectedInterval); + } + + i64 THistogram::Add(const THistogram& rhs) { + return hdr_add(Data_.Get(), rhs.Data_.Get()); + } + + i64 THistogram::AddWithExpectedInterval(const THistogram& rhs, i64 expectedInterval) { + return hdr_add_while_correcting_for_coordinated_omission( + Data_.Get(), rhs.Data_.Get(), expectedInterval); + } + + // Histogram Data access support ------------------------------------------ + + i64 THistogram::GetMin() const { + return hdr_min(Data_.Get()); + } + + i64 THistogram::GetMax() const { + return hdr_max(Data_.Get()); + } + + double THistogram::GetMean() const { + return hdr_mean(Data_.Get()); + } + + double THistogram::GetStdDeviation() const { + return hdr_stddev(Data_.Get()); + } + + i64 THistogram::GetValueAtPercentile(double percentile) const { + return hdr_value_at_percentile(Data_.Get(), percentile); + } + + i64 THistogram::GetCountAtValue(i64 value) const { + return hdr_count_at_value(Data_.Get(), value); + } + + bool THistogram::ValuesAreEqual(i64 v1, i64 v2) const { + return hdr_values_are_equivalent(Data_.Get(), v1, v2); + } + + i64 THistogram::GetLowestEquivalentValue(i64 value) const { + return hdr_lowest_equivalent_value(Data_.Get(), value); + } + + i64 THistogram::GetHighestEquivalentValue(i64 value) const { + return hdr_next_non_equivalent_value(Data_.Get(), value) - 1; + } + + i64 THistogram::GetMedianEquivalentValue(i64 value) const { + return hdr_median_equivalent_value(Data_.Get(), value); + } + + void THistogram::Reset() { + hdr_reset(Data_.Get()); + } + +} diff --git a/library/cpp/histogram/hdr/histogram.h b/library/cpp/histogram/hdr/histogram.h new file mode 100644 index 0000000000..5f1cebbd9f --- /dev/null +++ b/library/cpp/histogram/hdr/histogram.h @@ -0,0 +1,303 @@ +#pragma once + +#include <util/generic/ptr.h> +#include <util/generic/noncopyable.h> +#include <util/memory/alloc.h> + +struct hdr_histogram; + +namespace NHdr { + /** + * A High Dynamic Range (HDR) Histogram + * + * THdrHistogram supports the recording and analyzing sampled data value counts + * across a configurable integer value range with configurable value precision + * within the range. Value precision is expressed as the number of significant + * digits in the value recording, and provides control over value quantization + * behavior across the value range and the subsequent value resolution at any + * given level. + */ + class THistogram: public TMoveOnly { + public: + /** + * Construct a histogram given the Highest value to be tracked and a number + * of significant decimal digits. The histogram will be constructed to + * implicitly track (distinguish from 0) values as low as 1. Default + * allocator will be used to allocate underlying memory. + * + * @param highestTrackableValue The highest value to be tracked by the + * histogram. Must be a positive integer that is literal >= 2. + * + * @param numberOfSignificantValueDigits Specifies the precision to use. + * This is the number of significant decimal digits to which the + * histogram will maintain value resolution and separation. Must be + * a non-negative integer between 0 and 5. + */ + THistogram(i64 highestTrackableValue, i32 numberOfSignificantValueDigits) + : THistogram(1, highestTrackableValue, numberOfSignificantValueDigits) + { + } + + /** + * Construct a histogram given the Lowest and Highest values to be tracked + * and a number of significant decimal digits. Providing a + * lowestDiscernibleValue is useful in situations where the units used for + * the histogram's values are much smaller that the minimal accuracy + * required. E.g. when tracking time values stated in nanosecond units, + * where the minimal accuracy required is a microsecond, the proper value + * for lowestDiscernibleValue would be 1000. + * + * @param lowestDiscernibleValue The lowest value that can be discerned + * (distinguished from 0) by the histogram. Must be a positive + * integer that is >= 1. May be internally rounded down to nearest + * power of 2. + * + * @param highestTrackableValue The highest value to be tracked by the + * histogram. Must be a positive integer that is + * >= (2 * lowestDiscernibleValue). + * + * @param numberOfSignificantValueDigits Specifies the precision to use. + * This is the number of significant decimal digits to which the + * histogram will maintain value resolution and separation. Must be + * a non-negative integer between 0 and 5. + * + * @param allocator Specifies allocator which will be used to allocate + * memory for histogram. + */ + THistogram(i64 lowestDiscernibleValue, i64 highestTrackableValue, + i32 numberOfSignificantValueDigits, + IAllocator* allocator = TDefaultAllocator::Instance()); + + ~THistogram(); + + // Histogram structure querying support ----------------------------------- + + /** + * @return The configured lowestDiscernibleValue + */ + i64 GetLowestDiscernibleValue() const; + + /** + * @return The configured highestTrackableValue + */ + i64 GetHighestTrackableValue() const; + + /** + * @return The configured numberOfSignificantValueDigits + */ + i32 GetNumberOfSignificantValueDigits() const; + + /** + * @return The size of allocated memory for histogram + */ + size_t GetMemorySize() const; + + /** + * @return The number of created counters + */ + i32 GetCountsLen() const; + + /** + * @return The total count of all recorded values in the histogram + */ + i64 GetTotalCount() const; + + // Value recording support ------------------------------------------------ + + /** + * Records a value in the histogram, will round this value of to a + * precision at or better than the NumberOfSignificantValueDigits specified + * at construction time. + * + * @param value Value to add to the histogram + * @return false if the value is larger than the HighestTrackableValue + * and can't be recorded, true otherwise. + */ + bool RecordValue(i64 value); + + /** + * Records count values in the histogram, will round this value of to a + * precision at or better than the NumberOfSignificantValueDigits specified + * at construction time. + * + * @param value Value to add to the histogram + * @param count Number of values to add to the histogram + * @return false if the value is larger than the HighestTrackableValue + * and can't be recorded, true otherwise. + */ + bool RecordValues(i64 value, i64 count); + + /** + * Records a value in the histogram and backfill based on an expected + * interval. Value will be rounded this to a precision at or better + * than the NumberOfSignificantValueDigits specified at contruction time. + * This is specifically used for recording latency. If the value is larger + * than the expectedInterval then the latency recording system has + * experienced co-ordinated omission. This method fills in the values that + * would have occured had the client providing the load not been blocked. + * + * @param value Value to add to the histogram + * @param expectedInterval The delay between recording values + * @return false if the value is larger than the HighestTrackableValue + * and can't be recorded, true otherwise. + */ + bool RecordValueWithExpectedInterval(i64 value, i64 expectedInterval); + + /** + * Record a value in the histogram count times. Applies the same correcting + * logic as {@link THistogram::RecordValueWithExpectedInterval}. + * + * @param value Value to add to the histogram + * @param count Number of values to add to the histogram + * @param expectedInterval The delay between recording values. + * @return false if the value is larger than the HighestTrackableValue + * and can't be recorded, true otherwise. + */ + bool RecordValuesWithExpectedInterval( + i64 value, i64 count, i64 expectedInterval); + + /** + * Adds all of the values from rhs to this histogram. Will return the + * number of values that are dropped when copying. Values will be dropped + * if they around outside of [LowestDiscernibleValue, GetHighestTrackableValue]. + * + * @param rhs Histogram to copy values from. + * @return The number of values dropped when copying. + */ + i64 Add(const THistogram& rhs); + + /** + * Adds all of the values from rhs to this histogram. Will return the + * number of values that are dropped when copying. Values will be dropped + * if they around outside of [LowestDiscernibleValue, GetHighestTrackableValue]. + * Applies the same correcting logic as + * {@link THistogram::RecordValueWithExpectedInterval}. + * + * @param rhs Histogram to copy values from. + * @return The number of values dropped when copying. + */ + i64 AddWithExpectedInterval(const THistogram& rhs, i64 expectedInterval); + + // Histogram Data access support ------------------------------------------ + + /** + * Get the lowest recorded value level in the histogram. If the histogram + * has no recorded values, the value returned is undefined. + * + * @return the Min value recorded in the histogram + */ + i64 GetMin() const; + + /** + * Get the highest recorded value level in the histogram. If the histogram + * has no recorded values, the value returned is undefined. + * + * @return the Max value recorded in the histogram + */ + i64 GetMax() const; + + /** + * Get the computed mean value of all recorded values in the histogram + * + * @return the mean value (in value units) of the histogram data + */ + double GetMean() const; + + /** + * Get the computed standard deviation of all recorded values in the histogram + * + * @return the standard deviation (in value units) of the histogram data + */ + double GetStdDeviation() const; + + /** + * Get the value at a given percentile. + * Note that two values are "equivalent" in this statement if + * {@link THistogram::ValuesAreEquivalent} would return true. + * + * @param percentile The percentile for which to return the associated + * value + * @return The value that the given percentage of the overall recorded + * value entries in the histogram are either smaller than or + * equivalent to. When the percentile is 0.0, returns the value + * that all value entries in the histogram are either larger than + * or equivalent to. + */ + i64 GetValueAtPercentile(double percentile) const; + + /** + * Get the count of recorded values at a specific value (to within the + * histogram resolution at the value level). + * + * @param value The value for which to provide the recorded count + * @return The total count of values recorded in the histogram within the + * value range that is >= GetLowestEquivalentValue(value) and + * <= GetHighestEquivalentValue(value) + */ + i64 GetCountAtValue(i64 value) const; + + /** + * Determine if two values are equivalent with the histogram's resolution. + * Where "equivalent" means that value samples recorded for any two + * equivalent values are counted in a common total count. + * + * @param v1 first value to compare + * @param v2 second value to compare + * @return True if values are equivalent with the histogram's resolution. + */ + bool ValuesAreEqual(i64 v1, i64 v2) const; + + /** + * Get the lowest value that is equivalent to the given value within the + * histogram's resolution. Where "equivalent" means that value samples + * recorded for any two equivalent values are counted in a common total + * count. + * + * @param value The given value + * @return The lowest value that is equivalent to the given value within + * the histogram's resolution. + */ + i64 GetLowestEquivalentValue(i64 value) const; + + /** + * Get the highest value that is equivalent to the given value within the + * histogram's resolution. Where "equivalent" means that value samples + * recorded for any two equivalent values are counted in a common total + * count. + * + * @param value The given value + * @return The highest value that is equivalent to the given value within + * the histogram's resolution. + */ + i64 GetHighestEquivalentValue(i64 value) const; + + /** + * Get a value that lies in the middle (rounded up) of the range of values + * equivalent the given value. Where "equivalent" means that value samples + * recorded for any two equivalent values are counted in a common total + * count. + * + * @param value The given value + * @return The value lies in the middle (rounded up) of the range of values + * equivalent the given value. + */ + i64 GetMedianEquivalentValue(i64 value) const; + + // misc functions --------------------------------------------------------- + + /** + * Reset a histogram to zero - empty out a histogram and re-initialise it. + * If you want to re-use an existing histogram, but reset everything back + * to zero, this is the routine to use. + */ + void Reset(); + + const hdr_histogram* GetHdrHistogramImpl() const { + return Data_.Get(); + } + + private: + THolder<hdr_histogram> Data_; + IAllocator* Allocator_; + }; +} diff --git a/library/cpp/histogram/hdr/histogram_iter.cpp b/library/cpp/histogram/hdr/histogram_iter.cpp new file mode 100644 index 0000000000..d251fd5dd9 --- /dev/null +++ b/library/cpp/histogram/hdr/histogram_iter.cpp @@ -0,0 +1,146 @@ +#include "histogram_iter.h" + +#include <contrib/libs/hdr_histogram/src/hdr_histogram.h> + +namespace NHdr { + // TBaseHistogramIterator ----------------------------------------------------- + TBaseHistogramIterator::TBaseHistogramIterator() + : Iter_(new hdr_iter) + { + } + + TBaseHistogramIterator::~TBaseHistogramIterator() { + } + + bool TBaseHistogramIterator::Next() { + return hdr_iter_next(Iter_.Get()); + } + + i32 TBaseHistogramIterator::GetCountsIndex() const { + return Iter_->counts_index; + } + + i32 TBaseHistogramIterator::GetTotalCount() const { + return Iter_->total_count; + } + + i64 TBaseHistogramIterator::GetCount() const { + return Iter_->count; + } + + i64 TBaseHistogramIterator::GetCumulativeCount() const { + return Iter_->cumulative_count; + } + + i64 TBaseHistogramIterator::GetValue() const { + return Iter_->value; + } + + i64 TBaseHistogramIterator::GetHighestEquivalentValue() const { + return Iter_->highest_equivalent_value; + } + + i64 TBaseHistogramIterator::GetLowestEquivalentValue() const { + return Iter_->lowest_equivalent_value; + } + + i64 TBaseHistogramIterator::GetMedianEquivalentValue() const { + return Iter_->median_equivalent_value; + } + + i64 TBaseHistogramIterator::GetValueIteratedFrom() const { + return Iter_->value_iterated_from; + } + + i64 TBaseHistogramIterator::GetValueIteratedTo() const { + return Iter_->value_iterated_to; + } + + // TAllValuesIterator --------------------------------------------------------- + + TAllValuesIterator::TAllValuesIterator(const THistogram& histogram) { + hdr_iter_init(Iter_.Get(), histogram.GetHdrHistogramImpl()); + } + + // TRecordedValuesIterator ---------------------------------------------------- + + TRecordedValuesIterator::TRecordedValuesIterator(const THistogram& histogram) { + hdr_iter_recorded_init(Iter_.Get(), histogram.GetHdrHistogramImpl()); + } + + i64 TRecordedValuesIterator::GetCountAddedInThisIterationStep() const { + return Iter_->specifics.recorded.count_added_in_this_iteration_step; + } + + // TPercentileIterator -------------------------------------------------------- + + TPercentileIterator::TPercentileIterator( + const THistogram& histogram, ui32 ticksPerHalfDistance) { + hdr_iter_percentile_init( + Iter_.Get(), histogram.GetHdrHistogramImpl(), + ticksPerHalfDistance); + } + + i32 TPercentileIterator::GetTicketsPerHalfDistance() const { + return Iter_->specifics.percentiles.ticks_per_half_distance; + } + + double TPercentileIterator::GetPercentileToIterateTo() const { + return Iter_->specifics.percentiles.percentile_to_iterate_to; + } + + double TPercentileIterator::GetPercentile() const { + return Iter_->specifics.percentiles.percentile; + } + + // TLinearIterator ------------------------------------------------------------ + + TLinearIterator::TLinearIterator( + const THistogram& histogram, i64 valueUnitsPerBucket) { + hdr_iter_linear_init( + Iter_.Get(), histogram.GetHdrHistogramImpl(), valueUnitsPerBucket); + } + + i64 TLinearIterator::GetValueUnitsPerBucket() const { + return Iter_->specifics.linear.value_units_per_bucket; + } + + i64 TLinearIterator::GetCountAddedInThisIterationStep() const { + return Iter_->specifics.linear.count_added_in_this_iteration_step; + } + + i64 TLinearIterator::GetNextValueReportingLevel() const { + return Iter_->specifics.linear.next_value_reporting_level; + } + + i64 TLinearIterator::GetNextValueReportingLevelLowestEquivalent() const { + return Iter_->specifics.linear.next_value_reporting_level_lowest_equivalent; + } + + // TLogarithmicIterator ------------------------------------------------------- + + TLogarithmicIterator::TLogarithmicIterator( + const THistogram& histogram, i64 valueUnitsInFirstBucket, + double logBase) { + hdr_iter_log_init( + Iter_.Get(), histogram.GetHdrHistogramImpl(), + valueUnitsInFirstBucket, logBase); + } + + double TLogarithmicIterator::GetLogBase() const { + return Iter_->specifics.log.log_base; + } + + i64 TLogarithmicIterator::GetCountAddedInThisIterationStep() const { + return Iter_->specifics.log.count_added_in_this_iteration_step; + } + + i64 TLogarithmicIterator::GetNextValueReportingLevel() const { + return Iter_->specifics.log.next_value_reporting_level; + } + + i64 TLogarithmicIterator::GetNextValueReportingLevelLowestEquivalent() const { + return Iter_->specifics.log.next_value_reporting_level_lowest_equivalent; + } + +} diff --git a/library/cpp/histogram/hdr/histogram_iter.h b/library/cpp/histogram/hdr/histogram_iter.h new file mode 100644 index 0000000000..adfc1616e3 --- /dev/null +++ b/library/cpp/histogram/hdr/histogram_iter.h @@ -0,0 +1,231 @@ +#pragma once + +#include "histogram.h" + +struct hdr_iter; + +namespace NHdr { + /** + * Used for iterating through histogram values. + */ + class TBaseHistogramIterator { + public: + /** + * Iterate to the next value for the iterator. If there are no more values + * available return false. + * + * @return 'false' if there are no values remaining for this iterator. + */ + bool Next(); + + /** + * @return Raw index into the counts array. + */ + i32 GetCountsIndex() const; + + /** + * @return Snapshot of the length at the time the iterator is created. + */ + i32 GetTotalCount() const; + + /** + * @return Value directly from array for the current countsIndex. + */ + i64 GetCount() const; + + /** + * @return Sum of all of the counts up to and including the count at + * this index. + */ + i64 GetCumulativeCount() const; + + /** + * @return The current value based on countsIndex. + */ + i64 GetValue() const; + + /** + * @return The highest value that is equivalent to the current value + * within the histogram's resolution. + */ + i64 GetHighestEquivalentValue() const; + + /** + * @return The lowest value that is equivalent to the current value + * within the histogram's resolution. + */ + i64 GetLowestEquivalentValue() const; + + /** + * @return The value lies in the middle (rounded up) of the range of + * values equivalent the current value. + */ + i64 GetMedianEquivalentValue() const; + + /** + * @return The actual value level that was iterated from by the iterator. + */ + i64 GetValueIteratedFrom() const; + + /** + * @return The actual value level that was iterated to by the iterator. + */ + i64 GetValueIteratedTo() const; + + protected: + // must not be instantiated directly + TBaseHistogramIterator(); + ~TBaseHistogramIterator(); + + protected: + THolder<hdr_iter> Iter_; + }; + + /** + * Used for iterating through histogram values using the finest granularity + * steps supported by the underlying representation. The iteration steps + * through all possible unit value levels, regardless of whether or not there + * were recorded values for that value level, and terminates when all recorded + * histogram values are exhausted. + */ + class TAllValuesIterator: public TBaseHistogramIterator { + public: + /** + * @param histogram The histogram this iterator will operate on + */ + explicit TAllValuesIterator(const THistogram& histogram); + }; + + /** + * Used for iterating through all recorded histogram values using the finest + * granularity steps supported by the underlying representation. The iteration + * steps through all non-zero recorded value counts, and terminates when all + * recorded histogram values are exhausted. + */ + class TRecordedValuesIterator: public TBaseHistogramIterator { + public: + /** + * @param histogram The histogram this iterator will operate on + */ + explicit TRecordedValuesIterator(const THistogram& histogram); + + /** + * @return The count of recorded values in the histogram that were added + * to the totalCount as a result on this iteration step. Since + * multiple iteration steps may occur with overlapping equivalent + * value ranges, the count may be lower than the count found at + * the value (e.g. multiple linear steps or percentile levels can + * occur within a single equivalent value range). + */ + i64 GetCountAddedInThisIterationStep() const; + }; + + /** + * Used for iterating through histogram values according to percentile levels. + * The iteration is performed in steps that start at 0% and reduce their + * distance to 100% according to the <i>percentileTicksPerHalfDistance</i> + * parameter, ultimately reaching 100% when all recorded histogram + * values are exhausted. + */ + class TPercentileIterator: public TBaseHistogramIterator { + public: + /** + * @param histogram The histogram this iterator will operate on + * @param ticksPerHalfDistance The number of equal-sized iteration steps + * per half-distance to 100%. + */ + TPercentileIterator(const THistogram& histogram, ui32 ticksPerHalfDistance); + + /** + * @return The number of equal-sized iteration steps per half-distance + * to 100%. + */ + i32 GetTicketsPerHalfDistance() const; + + double GetPercentileToIterateTo() const; + + /** + * @return The percentile of recorded values in the histogram at values + * equal or smaller than valueIteratedTo. + * + */ + double GetPercentile() const; + }; + + /** + * Used for iterating through histogram values in linear steps. The iteration + * is performed in steps of <i>valueUnitsPerBucket</i> in size, terminating + * when all recorded histogram values are exhausted. Note that each iteration + * "bucket" includes values up to and including the next bucket boundary value. + */ + class TLinearIterator: public TBaseHistogramIterator { + public: + /** + * @param histogram The histogram this iterator will operate on + * @param valueUnitsPerBucket The size (in value units) of each bucket + * iteration. + */ + TLinearIterator(const THistogram& histogram, i64 valueUnitsPerBucket); + + /** + * @return The size (in value units) of each bucket iteration. + */ + i64 GetValueUnitsPerBucket() const; + + /** + * @return The count of recorded values in the histogram that were added + * to the totalCount as a result on this iteration step. Since + * multiple iteration steps may occur with overlapping equivalent + * value ranges, the count may be lower than the count found at + * the value (e.g. multiple linear steps or percentile levels can + * occur within a single equivalent value range). + */ + i64 GetCountAddedInThisIterationStep() const; + + i64 GetNextValueReportingLevel() const; + + i64 GetNextValueReportingLevelLowestEquivalent() const; + }; + + /** + * Used for iterating through histogram values in logarithmically increasing + * levels. The iteration is performed in steps that start at + * <i>valueUnitsInFirstBucket</i> and increase exponentially according to + * <i>logBase</i>, terminating when all recorded histogram values are + * exhausted. Note that each iteration "bucket" includes values up to and + * including the next bucket boundary value. + */ + class TLogarithmicIterator: public TBaseHistogramIterator { + public: + /** + * @param histogram The histogram this iterator will operate on + * @param valueUnitsInFirstBucket the size (in value units) of the first + * value bucket step + * @param logBase the multiplier by which the bucket size is expanded in + * each iteration step. + */ + TLogarithmicIterator( + const THistogram& histogram, i64 valueUnitsInFirstBucket, + double logBase); + + /** + * @return The multiplier by which the bucket size is expanded in each + * iteration step. + */ + double GetLogBase() const; + + /** + * @return The count of recorded values in the histogram that were added + * to the totalCount as a result on this iteration step. Since + * multiple iteration steps may occur with overlapping equivalent + * value ranges, the count may be lower than the count found at + * the value (e.g. multiple linear steps or percentile levels can + * occur within a single equivalent value range). + */ + i64 GetCountAddedInThisIterationStep() const; + + i64 GetNextValueReportingLevel() const; + + i64 GetNextValueReportingLevelLowestEquivalent() const; + }; +} diff --git a/library/cpp/histogram/hdr/histogram_iter_ut.cpp b/library/cpp/histogram/hdr/histogram_iter_ut.cpp new file mode 100644 index 0000000000..9c291a2547 --- /dev/null +++ b/library/cpp/histogram/hdr/histogram_iter_ut.cpp @@ -0,0 +1,210 @@ +#include "histogram_iter.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NHdr; + +Y_UNIT_TEST_SUITE(THistogramIterTest) { + Y_UNIT_TEST(RecordedValues) { + THistogram h(TDuration::Hours(1).MicroSeconds(), 3); + UNIT_ASSERT(h.RecordValues(1000, 1000)); + UNIT_ASSERT(h.RecordValue(1000 * 1000)); + + int index = 0; + TRecordedValuesIterator it(h); + + while (it.Next()) { + i64 countInBucket = it.GetCount(); + i64 countInStep = it.GetCountAddedInThisIterationStep(); + if (index == 0) { + UNIT_ASSERT_EQUAL(countInBucket, 1000); + UNIT_ASSERT_EQUAL(countInStep, 1000); + } else if (index == 1) { + UNIT_ASSERT_EQUAL(countInBucket, 1); + UNIT_ASSERT_EQUAL(countInStep, 1); + } else { + UNIT_FAIL("unexpected index value: " << index); + } + + index++; + } + + UNIT_ASSERT_EQUAL(index, 2); + } + + Y_UNIT_TEST(CorrectedRecordedValues) { + THistogram h(TDuration::Hours(1).MicroSeconds(), 3); + UNIT_ASSERT(h.RecordValuesWithExpectedInterval(1000, 1000, 1000)); + UNIT_ASSERT(h.RecordValueWithExpectedInterval(1000 * 1000, 1000)); + + int index = 0; + i64 totalCount = 0; + TRecordedValuesIterator it(h); + + while (it.Next()) { + i64 countInBucket = it.GetCount(); + i64 countInStep = it.GetCountAddedInThisIterationStep(); + if (index == 0) { + UNIT_ASSERT_EQUAL(countInBucket, 1001); + UNIT_ASSERT_EQUAL(countInStep, 1001); + } else { + UNIT_ASSERT(countInBucket >= 1); + UNIT_ASSERT(countInStep >= 1); + } + index++; + totalCount += countInStep; + } + + UNIT_ASSERT_EQUAL(index, 1000); + UNIT_ASSERT_EQUAL(totalCount, 2000); + } + + Y_UNIT_TEST(LinearValues) { + THistogram h(TDuration::Hours(1).MicroSeconds(), 3); + UNIT_ASSERT(h.RecordValues(1000, 1000)); + UNIT_ASSERT(h.RecordValue(1000 * 1000)); + + int index = 0; + TLinearIterator it(h, 1000); + + while (it.Next()) { + i64 countInBucket = it.GetCount(); + i64 countInStep = it.GetCountAddedInThisIterationStep(); + if (index == 0) { + UNIT_ASSERT_EQUAL(countInBucket, 1000); + UNIT_ASSERT_EQUAL(countInStep, 1000); + } else if (index == 999) { + UNIT_ASSERT_EQUAL(countInBucket, 1); + UNIT_ASSERT_EQUAL(countInStep, 1); + } else { + UNIT_ASSERT_EQUAL(countInBucket, 0); + UNIT_ASSERT_EQUAL(countInStep, 0); + } + + index++; + } + + UNIT_ASSERT_EQUAL(index, 1000); + } + + Y_UNIT_TEST(CorrectLinearValues) { + THistogram h(TDuration::Hours(1).MicroSeconds(), 3); + UNIT_ASSERT(h.RecordValuesWithExpectedInterval(1000, 1000, 1000)); + UNIT_ASSERT(h.RecordValueWithExpectedInterval(1000 * 1000, 1000)); + + int index = 0; + i64 totalCount = 0; + TLinearIterator it(h, 1000); + + while (it.Next()) { + i64 countInBucket = it.GetCount(); + i64 countInStep = it.GetCountAddedInThisIterationStep(); + + if (index == 0) { + UNIT_ASSERT_EQUAL(countInBucket, 1001); + UNIT_ASSERT_EQUAL(countInStep, 1001); + } else { + UNIT_ASSERT_EQUAL(countInBucket, 1); + UNIT_ASSERT_EQUAL(countInStep, 1); + } + + index++; + totalCount += countInStep; + } + + UNIT_ASSERT_EQUAL(index, 1000); + UNIT_ASSERT_EQUAL(totalCount, 2000); + } + + Y_UNIT_TEST(LogarithmicValues) { + THistogram h(TDuration::Hours(1).MicroSeconds(), 3); + UNIT_ASSERT(h.RecordValues(1000, 1000)); + UNIT_ASSERT(h.RecordValue(1000 * 1000)); + + int index = 0; + i64 expectedValue = 1000; + TLogarithmicIterator it(h, 1000, 2.0); + + while (it.Next()) { + i64 value = it.GetValue(); + i64 countInBucket = it.GetCount(); + i64 countInStep = it.GetCountAddedInThisIterationStep(); + + UNIT_ASSERT_EQUAL(value, expectedValue); + + if (index == 0) { + UNIT_ASSERT_EQUAL(countInBucket, 1000); + UNIT_ASSERT_EQUAL(countInStep, 1000); + } else if (index == 10) { + UNIT_ASSERT_EQUAL(countInBucket, 0); + UNIT_ASSERT_EQUAL(countInStep, 1); + } else { + UNIT_ASSERT_EQUAL(countInBucket, 0); + UNIT_ASSERT_EQUAL(countInStep, 0); + } + + index++; + expectedValue *= 2; + } + + UNIT_ASSERT_EQUAL(index, 11); + } + + Y_UNIT_TEST(CorrectedLogarithmicValues) { + THistogram h(TDuration::Hours(1).MicroSeconds(), 3); + UNIT_ASSERT(h.RecordValuesWithExpectedInterval(1000, 1000, 1000)); + UNIT_ASSERT(h.RecordValueWithExpectedInterval(1000 * 1000, 1000)); + + int index = 0; + i64 totalCount = 0; + i64 expectedValue = 1000; + TLogarithmicIterator it(h, 1000, 2.0); + + while (it.Next()) { + i64 value = it.GetValue(); + i64 countInBucket = it.GetCount(); + i64 countInStep = it.GetCountAddedInThisIterationStep(); + + UNIT_ASSERT_EQUAL(value, expectedValue); + + if (index == 0) { + UNIT_ASSERT_EQUAL(countInBucket, 1001); + UNIT_ASSERT_EQUAL(countInStep, 1001); + } + + index++; + totalCount += countInStep; + expectedValue *= 2; + } + + UNIT_ASSERT_EQUAL(index, 11); + UNIT_ASSERT_EQUAL(totalCount, 2000); + } + + Y_UNIT_TEST(LinearIterBucketsCorrectly) { + THistogram h(255, 2); + UNIT_ASSERT(h.RecordValue(193)); + UNIT_ASSERT(h.RecordValue(255)); + UNIT_ASSERT(h.RecordValue(0)); + UNIT_ASSERT(h.RecordValue(1)); + UNIT_ASSERT(h.RecordValue(64)); + UNIT_ASSERT(h.RecordValue(128)); + + int index = 0; + i64 totalCount = 0; + TLinearIterator it(h, 64); + + while (it.Next()) { + if (index == 0) { + // change after iterator was created + UNIT_ASSERT(h.RecordValue(2)); + } + + index++; + totalCount += it.GetCountAddedInThisIterationStep(); + } + + UNIT_ASSERT_EQUAL(index, 4); + UNIT_ASSERT_EQUAL(totalCount, 6); + } +} diff --git a/library/cpp/histogram/hdr/histogram_ut.cpp b/library/cpp/histogram/hdr/histogram_ut.cpp new file mode 100644 index 0000000000..4841b76e71 --- /dev/null +++ b/library/cpp/histogram/hdr/histogram_ut.cpp @@ -0,0 +1,178 @@ +#include "histogram.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/cast.h> + +using namespace NHdr; + +void LoadData(THistogram* h1, THistogram* h2) { + for (int i = 0; i < 1000; i++) { + UNIT_ASSERT(h1->RecordValue(1000)); + UNIT_ASSERT(h2->RecordValueWithExpectedInterval(1000, 1000)); + } + + UNIT_ASSERT(h1->RecordValue(1000 * 1000)); + UNIT_ASSERT(h2->RecordValueWithExpectedInterval(1000 * 1000, 1000)); +} + +Y_UNIT_TEST_SUITE(THistogramTest) { + Y_UNIT_TEST(Creation) { + THistogram h(TDuration::Hours(1).MicroSeconds(), 3); + UNIT_ASSERT_EQUAL(h.GetMemorySize(), 188512); + UNIT_ASSERT_EQUAL(h.GetCountsLen(), 23552); + } + + Y_UNIT_TEST(CreateWithLargeValues) { + THistogram h(20L * 1000 * 1000, 100L * 1000 * 1000, 5); + UNIT_ASSERT(h.RecordValue(100L * 1000 * 1000)); + UNIT_ASSERT(h.RecordValue(20L * 1000 * 1000)); + UNIT_ASSERT(h.RecordValue(30L * 1000 * 1000)); + + i64 v50 = h.GetValueAtPercentile(50.0); + i64 v8333 = h.GetValueAtPercentile(83.33); + i64 v8334 = h.GetValueAtPercentile(83.34); + i64 v99 = h.GetValueAtPercentile(99.0); + + UNIT_ASSERT_EQUAL(v50, 33554431); + UNIT_ASSERT_EQUAL(v8333, 33554431); + UNIT_ASSERT_EQUAL(v8334, 100663295); + UNIT_ASSERT_EQUAL(v99, 100663295); + + UNIT_ASSERT(h.ValuesAreEqual(v50, 20L * 1000 * 1000)); + UNIT_ASSERT(h.ValuesAreEqual(v8333, 30L * 1000 * 1000)); + UNIT_ASSERT(h.ValuesAreEqual(v8334, 100L * 1000 * 1000)); + UNIT_ASSERT(h.ValuesAreEqual(v99, 100L * 1000 * 1000)); + } + + Y_UNIT_TEST(InvalidSignificantValueDigits) { + UNIT_ASSERT_EXCEPTION(THistogram(1000, -1), yexception); + UNIT_ASSERT_EXCEPTION(THistogram(1000, 0), yexception); + UNIT_ASSERT_EXCEPTION(THistogram(1000, 6), yexception); + } + + Y_UNIT_TEST(InvalidLowestDiscernibleValue) { + UNIT_ASSERT_EXCEPTION(THistogram(0, 100, 3), yexception); + UNIT_ASSERT_EXCEPTION(THistogram(110, 100, 3), yexception); + } + + Y_UNIT_TEST(TotalCount) { + i64 oneHour = SafeIntegerCast<i64>(TDuration::Hours(1).MicroSeconds()); + THistogram h1(oneHour, 3); + THistogram h2(oneHour, 3); + LoadData(&h1, &h2); + + UNIT_ASSERT_EQUAL(h1.GetTotalCount(), 1001); + UNIT_ASSERT_EQUAL(h2.GetTotalCount(), 2000); + } + + Y_UNIT_TEST(StatsValues) { + i64 oneHour = SafeIntegerCast<i64>(TDuration::Hours(1).MicroSeconds()); + THistogram h1(oneHour, 3); + THistogram h2(oneHour, 3); + LoadData(&h1, &h2); + + // h1 - histogram without correction + { + UNIT_ASSERT_EQUAL(h1.GetMin(), 1000); + UNIT_ASSERT_EQUAL(h1.GetMax(), 1000447); + UNIT_ASSERT(h1.ValuesAreEqual(h1.GetMax(), 1000 * 1000)); + + // >>> numpy.mean([1000 for i in range(1000)] + [1000000]) + // 1998.0019980019979 + UNIT_ASSERT_DOUBLES_EQUAL(h1.GetMean(), 1998, 0.5); + + // >>> numpy.std([1000 for i in range(1000)] + [1000000]) + // 31559.59423085126 + UNIT_ASSERT_DOUBLES_EQUAL(h1.GetStdDeviation(), 31559, 10); + } + + // h2 - histogram with correction of co-ordinated omission + { + UNIT_ASSERT_EQUAL(h2.GetMin(), 1000); + UNIT_ASSERT_EQUAL(h2.GetMax(), 1000447); + UNIT_ASSERT(h2.ValuesAreEqual(h1.GetMax(), 1000 * 1000)); + UNIT_ASSERT_DOUBLES_EQUAL(h2.GetMean(), 250752, 0.5); + UNIT_ASSERT_DOUBLES_EQUAL(h2.GetStdDeviation(), 322557, 0.5); + } + } + + Y_UNIT_TEST(Percentiles) { + i64 oneHour = SafeIntegerCast<i64>(TDuration::Hours(1).MicroSeconds()); + THistogram h1(oneHour, 3); + THistogram h2(oneHour, 3); + LoadData(&h1, &h2); + + // >>> a = [1000 for i in range(1000)] + [1000000] + // >>> [(p, numpy.percentile(a, p)) for p in [50, 90, 99, 99.99, 99.999, 100]] + // [(50, 1000.0), (90, 1000.0), (99, 1000.0), (99.99, 900099.99999986368), (99.999, 990009.99999989558), (100, 1000000.0)] + + // h1 - histogram without correction + { + i64 v50 = h1.GetValueAtPercentile(50); + i64 v90 = h1.GetValueAtPercentile(90); + i64 v99 = h1.GetValueAtPercentile(99); + i64 v9999 = h1.GetValueAtPercentile(99.99); + i64 v99999 = h1.GetValueAtPercentile(99.999); + i64 v100 = h1.GetValueAtPercentile(100); + + UNIT_ASSERT_EQUAL(v50, 1000); + UNIT_ASSERT_EQUAL(v90, 1000); + UNIT_ASSERT_EQUAL(v99, 1000); + UNIT_ASSERT_EQUAL(v9999, 1000447); + UNIT_ASSERT_EQUAL(v99999, 1000447); + UNIT_ASSERT_EQUAL(v100, 1000447); + + UNIT_ASSERT(h1.ValuesAreEqual(v50, 1000)); + UNIT_ASSERT(h1.ValuesAreEqual(v90, 1000)); + UNIT_ASSERT(h1.ValuesAreEqual(v99, 1000)); + UNIT_ASSERT(h1.ValuesAreEqual(v9999, 1000 * 1000)); + UNIT_ASSERT(h1.ValuesAreEqual(v99999, 1000 * 1000)); + UNIT_ASSERT(h1.ValuesAreEqual(v100, 1000 * 1000)); + } + + // h2 - histogram with correction of co-ordinated omission + { + i64 v50 = h2.GetValueAtPercentile(50); + i64 v90 = h2.GetValueAtPercentile(90); + i64 v99 = h2.GetValueAtPercentile(99); + i64 v9999 = h2.GetValueAtPercentile(99.99); + i64 v99999 = h2.GetValueAtPercentile(99.999); + i64 v100 = h2.GetValueAtPercentile(100); + + UNIT_ASSERT_EQUAL(v50, 1000); + UNIT_ASSERT_EQUAL(v90, 800255); + UNIT_ASSERT_EQUAL(v99, 980479); + UNIT_ASSERT_EQUAL(v9999, 1000447); + UNIT_ASSERT_EQUAL(v99999, 1000447); + UNIT_ASSERT_EQUAL(v100, 1000447); + + UNIT_ASSERT(h2.ValuesAreEqual(v50, 1000)); + UNIT_ASSERT(h2.ValuesAreEqual(v90, 800 * 1000)); + UNIT_ASSERT(h2.ValuesAreEqual(v99, 980 * 1000)); + UNIT_ASSERT(h2.ValuesAreEqual(v9999, 1000 * 1000)); + UNIT_ASSERT(h2.ValuesAreEqual(v99999, 1000 * 1000)); + UNIT_ASSERT(h2.ValuesAreEqual(v100, 1000 * 1000)); + } + } + + Y_UNIT_TEST(OutOfRangeValues) { + THistogram h(1000, 4); + UNIT_ASSERT(h.RecordValue(32767)); + UNIT_ASSERT(!h.RecordValue(32768)); + } + + Y_UNIT_TEST(Reset) { + THistogram h(TDuration::Hours(1).MicroSeconds(), 3); + UNIT_ASSERT(h.RecordValues(1000, 1000)); + UNIT_ASSERT(h.RecordValue(1000 * 1000)); + UNIT_ASSERT_EQUAL(h.GetTotalCount(), 1001); + + h.Reset(); + + UNIT_ASSERT_EQUAL(h.GetTotalCount(), 0); + UNIT_ASSERT_EQUAL(h.GetMin(), Max<i64>()); + UNIT_ASSERT_EQUAL(h.GetMax(), 0); + UNIT_ASSERT_EQUAL(h.GetValueAtPercentile(99.0), 0); + } +} diff --git a/library/cpp/histogram/hdr/ut/ya.make b/library/cpp/histogram/hdr/ut/ya.make new file mode 100644 index 0000000000..13ceb143c8 --- /dev/null +++ b/library/cpp/histogram/hdr/ut/ya.make @@ -0,0 +1,13 @@ +OWNER( + g:util + jamel +) + +UNITTEST_FOR(library/cpp/histogram/hdr) + +SRCS( + histogram_ut.cpp + histogram_iter_ut.cpp +) + +END() diff --git a/library/cpp/histogram/hdr/ya.make b/library/cpp/histogram/hdr/ya.make new file mode 100644 index 0000000000..885099608d --- /dev/null +++ b/library/cpp/histogram/hdr/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +OWNER( + g:util + jamel +) + +SRCS( + histogram.cpp + histogram_iter.cpp +) + +PEERDIR( + contrib/libs/hdr_histogram +) + +END() |