aboutsummaryrefslogtreecommitdiffstats
path: root/ydb/library/yql/dq/actors/protos/dq_stats.proto
blob: f3f5f7ce868f029140ff24c800fb22857ddad0ef (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
syntax = "proto3";

package NYql.NDqProto;

option cc_enable_arenas = true;

import "google/protobuf/any.proto";

enum EDqStatsMode {
    DQ_STATS_MODE_UNSPECIFIED = 0;
    DQ_STATS_MODE_NONE = 10;
    DQ_STATS_MODE_BASIC = 20;
    DQ_STATS_MODE_PROFILE = 30;
}

message TDqAsyncInputBufferStats {
    // basic stats
    uint64 InputIndex = 1;
    uint64 Chunks = 2;
    uint64 Bytes = 3;
    uint64 RowsIn = 4;
    uint64 RowsOut = 5;

    // profile stats
    uint64 MaxMemoryUsage = 6;
    uint32 ErrorsCount = 7;

    uint64 IngressBytes = 8;

    google.protobuf.Any Extra = 100;
}

message TDqInputChannelStats {
    // basic stats
    uint64 ChannelId = 1;
    uint64 Chunks = 2;
    uint64 Bytes = 3;
    uint64 RowsIn = 4;
    uint64 RowsOut = 5;

    // profile stats
    uint64 DeserializationTimeUs = 6;
    uint64 MaxMemoryUsage = 7;

    uint32 PollRequests = 8;
    uint32 ResentMessages = 9;
    uint64 WaitTimeUs = 10;

    google.protobuf.Any Extra = 100;
}

message TDqAsyncOutputBufferStats {
    // basic stats
    uint64 OutputIndex = 1;
    uint64 Chunks = 2;
    uint64 Bytes = 3;
    uint64 RowsIn = 4;
    uint64 RowsOut = 5;

    // profile stats
    uint64 MaxMemoryUsage = 6;
    uint32 ErrorsCount = 7;

    uint64 EgressBytes = 8;

    google.protobuf.Any Extra = 100;
}

message TDqOutputChannelStats {
    // basic stats
    uint64 ChannelId = 1;
    uint64 Chunks = 2;
    uint64 Bytes = 3;
    uint64 RowsIn = 4;
    uint64 RowsOut = 5;

    // profile stats
    uint64 SerializationTimeUs = 6;
    uint32 BlockedByCapacity = 7;
    uint32 ResentMessages = 8;
    uint32 NoDstActorId = 9;
    uint32 MaxRowsInMemory = 10;
    uint64 MaxMemoryUsage = 11;

    uint64 SpilledBytes = 12;
    uint64 SpilledRows = 13;
    uint64 SpilledBlobs = 14;

    google.protobuf.Any Extra = 100;
}

message TDqTableStats {
    string TablePath = 1;

    uint64 ReadRows = 2;
    uint64 ReadBytes = 3;
    uint64 WriteRows = 4;
    uint64 WriteBytes = 5;
    uint64 EraseRows = 6;
    uint64 EraseBytes = 7;

    uint32 AffectedPartitions = 8; // TODO: move it to the Extra

    google.protobuf.Any Extra = 100;
}

message TDqMkqlStat {
    string Name = 1;
    int64 Value = 2;
    bool Deriv = 3;
}

message TDqDataProviderStats {
    string Name = 1;
    uint64 Bytes = 2;
}

message TDqTaskStats {
    reserved 154;

    // basic stats
    uint64 TaskId = 1;
    uint32 StageId = 2;
    uint64 CpuTimeUs = 3;        // total cpu time (build & compute)
    uint64 SourceCpuTimeUs = 15; // time consumed in source
    uint64 FirstRowTimeMs = 4;   // first row time, timestamp in millis
    uint64 FinishTimeMs = 5;     // task finish time, timestamp in millis
    uint64 InputRows = 6;
    uint64 InputBytes = 7;
    uint64 OutputRows = 8;
    uint64 OutputBytes = 9;

    repeated TDqTableStats Tables = 10;

    repeated TDqDataProviderStats Ingress = 13;
    repeated TDqDataProviderStats Egress = 14;

    // profile stats
    uint64 BuildCpuTimeUs = 103;   // prepare task time: build computation graph, prepare parameters, ...
    uint64 WaitTimeUs = 104;       // total wait (input + output) wall time
    uint64 WaitOutputTimeUs = 105; // wait output wall time (any output: channels, sinks, ...)
    uint64 ComputeCpuTimeUs = 102; // compute time only
    uint64 PendingInputTimeUs = 107; // time waiting input data
    uint64 PendingOutputTimeUs = 108; // time waiting output data
    uint64 FinishTimeUs = 109; // time in finished state // ComputeCpuTimeUs + PendingInputTimeUs + PendingOutputTimeUs + FinishTimeUs == 100% (or == const in aggregated graphs for several stages/tasks)
    repeated TDqMkqlStat MkqlStats = 110; // stats from mkql

    message THistBucket {
        double Bound = 1;
        uint64 Value = 2;
    }
    repeated THistBucket ComputeCpuTimeByRun = 106;

    repeated TDqAsyncInputBufferStats Sources = 150;
    repeated TDqInputChannelStats InputChannels = 151;
    repeated TDqAsyncOutputBufferStats Sinks = 152;
    repeated TDqOutputChannelStats OutputChannels = 153;
    repeated TDqAsyncInputBufferStats InputTransforms = 155;
    string HostName = 156;
    uint32 NodeId = 157;

    google.protobuf.Any Extra = 200;
}

message TDqComputeActorStats {
    // basic stats
    uint64 CpuTimeUs = 1;  // total cpu time: tasks cpu time + self cpu time
    uint64 DurationUs = 2; // compute actor duration, wall time (from FirstRowTime to FinishTime)
    repeated TDqTaskStats Tasks = 3; // in the BASIC_MODE only basic fields are used

    // profile stats
    uint64 MkqlMaxMemoryUsage = 102;       // MKQL allocations stats
    uint64 MkqlExtraMemoryBytes = 103;
    uint32 MkqlExtraMemoryRequests = 104;

    google.protobuf.Any Extra = 200;
}

message TDqStatsAggr {
    uint64 Min = 1;
    uint64 Max = 2;
    uint64 Sum = 3;
    uint64 Cnt = 4;
}

message TExtraStats {
    map<string, TDqStatsAggr> Stats = 1;
}

message TDqStatsMinMax {
    uint64 Min = 1;
    uint64 Max = 2;
}

message TDqTableAggrStats {
    string TablePath = 1;

    TDqStatsAggr ReadRows = 2;
    TDqStatsAggr ReadBytes = 3;
    TDqStatsAggr WriteRows = 4;
    TDqStatsAggr WriteBytes = 5;
    TDqStatsAggr EraseRows = 6;
    TDqStatsAggr EraseBytes = 7;

    uint32 AffectedPartitions = 8;

    google.protobuf.Any Extra = 100;
}

// aggregated stats for all tasks of the stage
message TDqStageStats {
    uint32 StageId = 1;
    string StageGuid = 2;
    string Program = 3;

    uint32 TotalTasksCount = 5;
    uint32 FailedTasksCount = 6;

    TDqStatsAggr CpuTimeUs = 8;
    TDqStatsAggr InputRows = 9;
    TDqStatsAggr InputBytes = 10;
    TDqStatsAggr OutputRows = 11;
    TDqStatsAggr OutputBytes = 12;

    TDqStatsMinMax FirstRowTimeMs = 13;
    TDqStatsMinMax FinishTimeMs = 14;
    uint64 DurationUs = 15; // microseconds from min(task_first_row_time) to max(task_finish_time)

    repeated TDqTableAggrStats Tables = 16; // is it required?

    repeated TDqComputeActorStats ComputeActors = 17; // more detailed stats

    google.protobuf.Any Extra = 100;
}

// graph execution stats
message TDqExecutionStats {
    // basic stats
    uint64 CpuTimeUs = 1;  // total cpu time, executer + compute actors + ...
    uint64 DurationUs = 2; // execution wall time

    uint64 ResultRows = 3;
    uint64 ResultBytes = 4;

    repeated TDqTableStats Tables = 6;

    // profile stats
    uint64 ExecuterCpuTimeUs = 10; // executer self cpu time, microseconds
    uint64 StartTimeMs = 11;       // executer start timestamp, milliseconds
    uint64 FinishTimeMs = 12;      // executer finish timestamp, milliseconds
    uint64 FirstRowTimeMs = 13;    // first result row timestamp, milliseconds

    repeated TDqStageStats Stages = 14;
    repeated string TxPlansWithStats = 15;

    google.protobuf.Any Extra = 100;
}