1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
#pragma once
#include <yql/essentials/core/minsketch/count_min_sketch.h>
#include <library/cpp/json/json_reader.h>
#include <util/generic/vector.h>
#include <util/generic/hash.h>
#include <util/generic/string.h>
#include <optional>
#include <iostream>
namespace NYql {
enum EStatisticsType : ui32 {
BaseTable,
FilteredFactTable,
ManyManyJoin
};
enum EStorageType : ui32 {
NA,
RowStorage,
ColumnStorage
};
// Providers may subclass this struct to associate specific statistics, useful to
// derive stats for higher-level operators in the plan.
struct IProviderStatistics {
virtual ~IProviderStatistics() {}
};
struct TColumnStatistics {
std::optional<double> NumUniqueVals;
std::optional<double> HyperLogLog;
std::shared_ptr<NKikimr::TCountMinSketch> CountMinSketch;
TString Type;
TColumnStatistics() {}
};
/**
* Optimizer Statistics struct records per-table and per-column statistics
* for the current operator in the plan. Currently, only Nrows and Ncols are
* recorded.
* Cost is also included in statistics, as its updated concurrently with statistics
* all of the time.
*/
struct TOptimizerStatistics {
struct TKeyColumns : public TSimpleRefCount<TKeyColumns> {
TVector<TString> Data;
TKeyColumns(const TVector<TString>& vec) : Data(vec) {}
};
struct TSortColumns : public TSimpleRefCount<TSortColumns> {
TVector<TString> Columns;
TVector<TString> Aliases;
TSortColumns(const TVector<TString>& cols, const TVector<TString>& aliases)
: Columns(cols)
, Aliases(aliases)
{}
};
struct TColumnStatMap : public TSimpleRefCount<TColumnStatMap> {
THashMap<TString,TColumnStatistics> Data;
TColumnStatMap() {}
TColumnStatMap(const THashMap<TString,TColumnStatistics>& map) : Data(map) {}
};
EStatisticsType Type = BaseTable;
double Nrows = 0;
int Ncols = 0;
double ByteSize = 0;
double Cost = 0;
double Selectivity = 1.0;
TIntrusivePtr<TKeyColumns> KeyColumns;
TIntrusivePtr<TColumnStatMap> ColumnStatistics;
EStorageType StorageType = EStorageType::NA;
TIntrusivePtr<TSortColumns> SortColumns;
std::shared_ptr<IProviderStatistics> Specific;
std::shared_ptr<TVector<TString>> Labels = {};
TOptimizerStatistics(TOptimizerStatistics&&) = default;
TOptimizerStatistics& operator=(TOptimizerStatistics&&) = default;
TOptimizerStatistics(const TOptimizerStatistics&) = default;
TOptimizerStatistics& operator=(const TOptimizerStatistics&) = default;
TOptimizerStatistics() = default;
TOptimizerStatistics(
EStatisticsType type,
double nrows = 0.0,
int ncols = 0,
double byteSize = 0.0,
double cost = 0.0,
TIntrusivePtr<TKeyColumns> keyColumns = {},
TIntrusivePtr<TColumnStatMap> columnMap = {},
EStorageType storageType = EStorageType::NA,
std::shared_ptr<IProviderStatistics> specific = nullptr);
TOptimizerStatistics& operator+=(const TOptimizerStatistics& other);
bool Empty() const;
friend std::ostream& operator<<(std::ostream& os, const TOptimizerStatistics& s);
TString ToString() const;
};
std::shared_ptr<TOptimizerStatistics> OverrideStatistics(const TOptimizerStatistics& s, const TStringBuf& tablePath, const std::shared_ptr<NJson::TJsonValue>& stats);
}
|