1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
|
/* MIT License
*
* Copyright (c) 2024 Brad House
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* SPDX-License-Identifier: MIT
*/
/* IMPLEMENTATION NOTES
* ====================
*
* With very little effort we should be able to determine fairly proper timeouts
* we can use based on prior query history. We track in order to be able to
* auto-scale when network conditions change (e.g. maybe there is a provider
* failover and timings change due to that). Apple appears to do this within
* their system resolver in MacOS. Obviously we should have a minimum, maximum,
* and initial value to make sure the algorithm doesn't somehow go off the
* rails.
*
* Values:
* - Minimum Timeout: 250ms (approximate RTT half-way around the globe)
* - Maximum Timeout: 5000ms (Recommended timeout in RFC 1123), can be reduced
* by ARES_OPT_MAXTIMEOUTMS, but otherwise the bound specified by the option
* caps the retry timeout.
* - Initial Timeout: User-specified via configuration or ARES_OPT_TIMEOUTMS
* - Average latency multiplier: 5x (a local DNS server returning a cached value
* will be quicker than if it needs to recurse so we need to account for this)
* - Minimum Count for Average: 3. This is the minimum number of queries we
* need to form an average for the bucket.
*
* Per-server buckets for tracking latency over time (these are ephemeral
* meaning they don't persist once a channel is destroyed). We record both the
* current timespan for the bucket and the immediate preceding timespan in case
* of roll-overs we can still maintain recent metrics for calculations:
* - 1 minute
* - 15 minutes
* - 1 hr
* - 1 day
* - since inception
*
* Each bucket would contain:
* - timestamp (divided by interval)
* - minimum latency
* - maximum latency
* - total time
* - count
* NOTE: average latency is (total time / count), we will calculate this
* dynamically when needed
*
* Basic algorithm for calculating timeout to use would be:
* - Scan from most recent bucket to least recent
* - Check timestamp of bucket, if doesn't match current time, continue to next
* bucket
* - Check count of bucket, if its not at least the "Minimum Count for Average",
* check the previous bucket, otherwise continue to next bucket
* - If we reached the end with no bucket match, use "Initial Timeout"
* - If bucket is selected, take ("total time" / count) as Average latency,
* multiply by "Average Latency Multiplier", bound by "Minimum Timeout" and
* "Maximum Timeout"
* NOTE: The timeout calculated may not be the timeout used. If we are retrying
* the query on the same server another time, then it will use a larger value
*
* On each query reply where the response is legitimate (proper response or
* NXDOMAIN) and not something like a server error:
* - Cycle through each bucket in order
* - Check timestamp of bucket against current timestamp, if out of date
* overwrite previous entry with values, clear current values
* - Compare current minimum and maximum recorded latency against query time and
* adjust if necessary
* - Increment "count" by 1 and "total time" by the query time
*
* Other Notes:
* - This is always-on, the only user-configurable value is the initial
* timeout which will simply re-uses the current option.
* - Minimum and Maximum latencies for a bucket are currently unused but are
* there in case we find a need for them in the future.
*/
#include "ares_private.h"
/*! Minimum timeout value. Chosen due to it being approximately RTT half-way
* around the world */
#define MIN_TIMEOUT_MS 250
/*! Multiplier to apply to average latency to come up with an initial timeout */
#define AVG_TIMEOUT_MULTIPLIER 5
/*! Upper timeout bounds, only used if channel->maxtimeout not set */
#define MAX_TIMEOUT_MS 5000
/*! Minimum queries required to form an average */
#define MIN_COUNT_FOR_AVERAGE 3
static time_t ares_metric_timestamp(ares_server_bucket_t bucket,
const ares_timeval_t *now,
ares_bool_t is_previous)
{
time_t divisor = 1; /* Silence bogus MSVC warning by setting default value */
switch (bucket) {
case ARES_METRIC_1MINUTE:
divisor = 60;
break;
case ARES_METRIC_15MINUTES:
divisor = 15 * 60;
break;
case ARES_METRIC_1HOUR:
divisor = 60 * 60;
break;
case ARES_METRIC_1DAY:
divisor = 24 * 60 * 60;
break;
case ARES_METRIC_INCEPTION:
return is_previous ? 0 : 1;
case ARES_METRIC_COUNT:
return 0; /* Invalid! */
}
if (is_previous) {
if (divisor >= now->sec) {
return 0;
}
return (time_t)((now->sec - divisor) / divisor);
}
return (time_t)(now->sec / divisor);
}
void ares_metrics_record(const ares_query_t *query, ares_server_t *server,
ares_status_t status, const ares_dns_record_t *dnsrec)
{
ares_timeval_t now;
ares_timeval_t tvdiff;
unsigned int query_ms;
ares_dns_rcode_t rcode;
ares_server_bucket_t i;
if (status != ARES_SUCCESS) {
return;
}
if (server == NULL) {
return;
}
ares__tvnow(&now);
rcode = ares_dns_record_get_rcode(dnsrec);
if (rcode != ARES_RCODE_NOERROR && rcode != ARES_RCODE_NXDOMAIN) {
return;
}
ares__timeval_diff(&tvdiff, &query->ts, &now);
query_ms = (unsigned int)((tvdiff.sec * 1000) + (tvdiff.usec / 1000));
if (query_ms == 0) {
query_ms = 1;
}
/* Place in each bucket */
for (i = 0; i < ARES_METRIC_COUNT; i++) {
time_t ts = ares_metric_timestamp(i, &now, ARES_FALSE);
/* Copy metrics to prev and clear */
if (ts != server->metrics[i].ts) {
server->metrics[i].prev_ts = server->metrics[i].ts;
server->metrics[i].prev_total_ms = server->metrics[i].total_ms;
server->metrics[i].prev_total_count = server->metrics[i].total_count;
server->metrics[i].ts = ts;
server->metrics[i].latency_min_ms = 0;
server->metrics[i].latency_max_ms = 0;
server->metrics[i].total_ms = 0;
server->metrics[i].total_count = 0;
}
if (server->metrics[i].latency_min_ms == 0 ||
server->metrics[i].latency_min_ms > query_ms) {
server->metrics[i].latency_min_ms = query_ms;
}
if (query_ms > server->metrics[i].latency_max_ms) {
server->metrics[i].latency_min_ms = query_ms;
}
server->metrics[i].total_count++;
server->metrics[i].total_ms += (ares_uint64_t)query_ms;
}
}
size_t ares_metrics_server_timeout(const ares_server_t *server,
const ares_timeval_t *now)
{
const ares_channel_t *channel = server->channel;
ares_server_bucket_t i;
size_t timeout_ms = 0;
size_t max_timeout_ms;
for (i = 0; i < ARES_METRIC_COUNT; i++) {
time_t ts = ares_metric_timestamp(i, now, ARES_FALSE);
/* This ts has been invalidated, see if we should use the previous
* time period */
if (ts != server->metrics[i].ts ||
server->metrics[i].total_count < MIN_COUNT_FOR_AVERAGE) {
time_t prev_ts = ares_metric_timestamp(i, now, ARES_TRUE);
if (prev_ts != server->metrics[i].prev_ts ||
server->metrics[i].prev_total_count < MIN_COUNT_FOR_AVERAGE) {
/* Move onto next bucket */
continue;
}
/* Calculate average time for previous bucket */
timeout_ms = (size_t)(server->metrics[i].prev_total_ms /
server->metrics[i].prev_total_count);
} else {
/* Calculate average time for current bucket*/
timeout_ms =
(size_t)(server->metrics[i].total_ms / server->metrics[i].total_count);
}
/* Multiply average by constant to get timeout value */
timeout_ms *= AVG_TIMEOUT_MULTIPLIER;
break;
}
/* If we're here, that means its the first query for the server, so we just
* use the initial default timeout */
if (timeout_ms == 0) {
timeout_ms = channel->timeout;
}
/* don't go below lower bounds */
if (timeout_ms < MIN_TIMEOUT_MS) {
timeout_ms = MIN_TIMEOUT_MS;
}
/* don't go above upper bounds */
max_timeout_ms = channel->maxtimeout ? channel->maxtimeout : MAX_TIMEOUT_MS;
if (timeout_ms > max_timeout_ms) {
timeout_ms = max_timeout_ms;
}
return timeout_ms;
}
|