1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
|
#pragma once
#include "public.h"
#include <yt/yt/client/misc/config.h>
#include <yt/yt/core/ytree/yson_serializable.h>
namespace NYT::NChunkClient {
////////////////////////////////////////////////////////////////////////////////
class TFetchChunkSpecConfig
: public virtual NYTree::TYsonStruct
{
public:
int MaxChunksPerFetch;
int MaxChunksPerLocateRequest;
REGISTER_YSON_STRUCT(TFetchChunkSpecConfig);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TFetchChunkSpecConfig)
////////////////////////////////////////////////////////////////////////////////
class TFetcherConfig
: public virtual NYTree::TYsonStruct
{
public:
TDuration NodeRpcTimeout;
//! If node throttled fetch request, it becomes banned for this period of time.
TDuration NodeBanDuration;
//! Time to sleep before next fetching round if no requests were performed.
TDuration BackoffTime;
int MaxChunksPerNodeFetch;
//! Timeout when waiting for all replicas to appear in the given node directory.
TDuration NodeDirectorySynchronizationTimeout;
REGISTER_YSON_STRUCT(TFetcherConfig);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TFetcherConfig)
////////////////////////////////////////////////////////////////////////////////
class TBlockReordererConfig
: public virtual NYTree::TYsonStruct
{
public:
bool EnableBlockReordering;
//! Instead of grouping blocks by column groups, shuffle them.
//! Used only for testing purposes.
bool ShuffleBlocks;
REGISTER_YSON_STRUCT(TBlockReordererConfig);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TBlockReordererConfig)
////////////////////////////////////////////////////////////////////////////////
class TChunkSliceFetcherConfig
: public TFetcherConfig
{
public:
int MaxSlicesPerFetch;
REGISTER_YSON_STRUCT(TChunkSliceFetcherConfig);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TChunkSliceFetcherConfig)
////////////////////////////////////////////////////////////////////////////////
class TEncodingWriterConfig
: public virtual TWorkloadConfig
, public virtual TBlockReordererConfig
{
public:
i64 EncodeWindowSize;
double DefaultCompressionRatio;
bool VerifyCompression;
bool ComputeChecksum;
int CompressionConcurrency;
REGISTER_YSON_STRUCT(TEncodingWriterConfig);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TEncodingWriterConfig)
////////////////////////////////////////////////////////////////////////////////
class TReplicationReaderConfig
: public virtual NYTree::TYsonStruct
{
public:
//! Timeout for a block request.
TDuration BlockRpcTimeout;
//! Delay before sending a hedged block request. If null then hedging is disabled.
//! NB: Hedging policy may be overridden via hedging manager.
std::optional<TDuration> BlockRpcHedgingDelay;
//! Same as above but for a LookupRows rpc.
std::optional<TDuration> LookupRpcHedgingDelay;
//! Whether to cancel the primary block request when backup one is sent.
bool CancelPrimaryBlockRpcRequestOnHedging;
//! Same as above but for a LookupRows rpc.
bool CancelPrimaryLookupRpcRequestOnHedging;
//! Timeout for a lookup request.
TDuration LookupRpcTimeout;
//! Timeout for a meta request.
TDuration MetaRpcTimeout;
//! Delay before sending for a hedged meta request. If null then hedging is disabled.
//! NB: Hedging policy may be overridden via hedging manager.
std::optional<TDuration> MetaRpcHedgingDelay;
//! Timeout for a queue size probing request.
TDuration ProbeRpcTimeout;
//! Maximum number of peers to poll for queue length each round.
int ProbePeerCount;
//! Maximum number of attempts to fetch new seeds.
int RetryCount;
//! Fail read session immediately if master reports no seeds for chunk.
bool FailOnNoSeeds;
//! Time to wait before making another pass with same seeds.
//! Increases exponentially with every pass, from MinPassBackoffTime to MaxPassBackoffTime.
TDuration MinBackoffTime;
TDuration MaxBackoffTime;
double BackoffTimeMultiplier;
//! Maximum number of passes with same seeds.
int PassCount;
//! Enable fetching blocks from peers suggested by seeds.
bool FetchFromPeers;
//! Timeout after which a node forgets about the peer.
//! Only makes sense if the reader is equipped with peer descriptor.
TDuration PeerExpirationTimeout;
//! If |true| then fetched blocks are cached by the node.
bool PopulateCache;
//! If |true| then local data center replicas are unconditionally preferred to remote replicas.
bool PreferLocalDataCenter;
//! If |true| then local rack replicas are unconditionally preferred to remote replicas.
bool PreferLocalRack;
//! If |true| then local host replicas are unconditionally preferred to any other replicas.
bool PreferLocalHost;
//! If peer ban counter exceeds #MaxBanCount, peer is banned forever.
int MaxBanCount;
//! Factors to calculate peer load as linear combination of disk queue and net queue.
double NetQueueSizeFactor;
double DiskQueueSizeFactor;
//! If |true|, then workload descriptors are annotated with the read session start time
//! and are thus scheduled in FIFO order.
bool EnableWorkloadFifoScheduling;
//! Total retry timeout, helps when we are doing too many passes.
TDuration RetryTimeout;
//! Total session timeout (for ReadBlocks and GetMeta calls).
TDuration SessionTimeout;
//! Maximum number of passes within single retry for lookup request.
int LookupRequestPassCount;
//! Maximum number of retries for lookup request.
int LookupRequestRetryCount;
//! If |true| block cache will be accessed via asynchronous interface, if |false|
//! synchronous interface will be used.
bool UseAsyncBlockCache;
//! If |true| replication reader will try to fetch blocks from local block cache.
bool UseBlockCache;
//! Will locate new replicas from master
//! if node was suspicious for at least the period (unless null).
std::optional<TDuration> SuspiciousNodeGracePeriod;
//! Is used to increase interval between Locates
//! that are called for discarding seeds that are suspicious.
TDuration ProlongedDiscardSeedsDelay;
//! If |true| GetMeta() will be performed via provided ChunkMetaCache.
//! If ChunkMetaCache is nullptr or partition tag is specified, this option has no effect.
bool EnableChunkMetaCache;
//! If |true| reader will retain a set of peers that will be banned for every session.
bool BanPeersPermanently;
//! For testing purposes.
//! If |true| network throttlers will be applied even in case of requests to local host.
bool EnableLocalThrottling;
//! For testing purposes.
//! Unless null, reader will simulate failure of accessing chunk meta cache with such probability.
std::optional<double> ChunkMetaCacheFailureProbability;
//! Will open and read with DirectIO (unless already opened w/o DirectIO or disabled via location config).
// NB: Right now is used only with data node lookup + chunk index.
bool UseDirectIO;
REGISTER_YSON_STRUCT(TReplicationReaderConfig);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TReplicationReaderConfig)
////////////////////////////////////////////////////////////////////////////////
class TBlockFetcherConfig
: public virtual NYTree::TYsonStruct
{
public:
//! Prefetch window size (in bytes).
i64 WindowSize;
//! Maximum amount of data to be transferred via a single RPC request.
i64 GroupSize;
//! If |True| block fetcher will try to fetch block from local uncompressed block cache.
bool UseUncompressedBlockCache;
REGISTER_YSON_STRUCT(TBlockFetcherConfig);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TBlockFetcherConfig)
////////////////////////////////////////////////////////////////////////////////
class TErasureReaderConfig
: public TReplicationReaderConfig
, public virtual TBlockFetcherConfig
{
public:
bool EnableAutoRepair;
double ReplicationReaderSpeedLimitPerSec;
TDuration SlowReaderExpirationTimeout;
TDuration ReplicationReaderTimeout;
TDuration ReplicationReaderFailureTimeout;
REGISTER_YSON_STRUCT(TErasureReaderConfig);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TErasureReaderConfig)
////////////////////////////////////////////////////////////////////////////////
class TMultiChunkReaderConfig
: public virtual TErasureReaderConfig
, public virtual TBlockFetcherConfig
, public virtual TFetchChunkSpecConfig
, public virtual TWorkloadConfig
{
public:
i64 MaxBufferSize;
int MaxParallelReaders;
REGISTER_YSON_STRUCT(TMultiChunkReaderConfig);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TMultiChunkReaderConfig)
////////////////////////////////////////////////////////////////////////////////
class TReplicationWriterConfig
: public virtual TWorkloadConfig
, public virtual TBlockReordererConfig
{
public:
//! Maximum window size (in bytes).
i64 SendWindowSize;
//! Maximum group size (in bytes).
i64 GroupSize;
//! RPC requests timeout.
/*!
* This timeout is especially useful for |PutBlocks| calls to ensure that
* uploading is not stalled.
*/
TDuration NodeRpcTimeout;
NRpc::TRetryingChannelConfigPtr NodeChannel;
int UploadReplicationFactor;
int MinUploadReplicationFactor;
std::optional<int> DirectUploadNodeCount;
bool PreferLocalHost;
bool BanFailedNodes;
//! Interval between consecutive pings to Data Nodes.
TDuration NodePingPeriod;
//! If |true| then written blocks are cached by the node.
bool PopulateCache;
//! If |true| then the chunk is fsynced to disk upon closing.
bool SyncOnClose;
bool EnableDirectIO;
//! If |true| then the chunk is finished as soon as MinUploadReplicationFactor chunks are written.
bool EnableEarlyFinish;
TDuration AllocateWriteTargetsBackoffTime;
int AllocateWriteTargetsRetryCount;
std::optional<TDuration> TestingDelay;
int GetDirectUploadNodeCount();
REGISTER_YSON_STRUCT(TReplicationWriterConfig);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TReplicationWriterConfig)
////////////////////////////////////////////////////////////////////////////////
class TErasureWriterConfig
: public virtual TBlockReordererConfig
{
public:
i64 WriterWindowSize;
i64 WriterGroupSize;
// TODO(gritukan): Drop.
std::optional<i64> ErasureStripeSize;
i64 ErasureWindowSize;
bool ErasureStoreOriginalBlockChecksums;
i64 DesiredSegmentPartSize;
bool EnableErasureTargetNodeReallocation;
bool EnableStripedErasure;
bool UseEffectiveErasureCodecs;
REGISTER_YSON_STRUCT(TErasureWriterConfig);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TErasureWriterConfig)
////////////////////////////////////////////////////////////////////////////////
class TMultiChunkWriterConfig
: public TReplicationWriterConfig
, public TErasureWriterConfig
{
public:
i64 DesiredChunkSize;
i64 DesiredChunkWeight;
i64 MaxMetaSize;
REGISTER_YSON_STRUCT(TMultiChunkWriterConfig);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TMultiChunkWriterConfig)
////////////////////////////////////////////////////////////////////////////////
class TMemoryTrackedWriterOptions
: public NYTree::TYsonStruct
{
public:
IMemoryUsageTrackerPtr MemoryTracker;
IMemoryReferenceTrackerPtr MemoryReferenceTracker;
};
////////////////////////////////////////////////////////////////////////////////
class TEncodingWriterOptions
: public virtual TMemoryTrackedWriterOptions
{
public:
NCompression::ECodec CompressionCodec;
bool ChunksEden;
bool SetChunkCreationTime;
REGISTER_YSON_STRUCT(TEncodingWriterOptions);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TEncodingWriterOptions)
////////////////////////////////////////////////////////////////////////////////
class TChunkFragmentReaderConfig
: public virtual NYTree::TYsonStruct
{
public:
//! Expiration timeout of corresponding sync expiring cache.
TDuration PeerInfoExpirationTimeout;
//! Minimal delay between sequential chunk replica locations.
TDuration SeedsExpirationTimeout;
//! Delay between background cache updates.
TDuration PeriodicUpdateDelay;
//! Factors to calculate peer load as linear combination of disk queue and net queue.
double NetQueueSizeFactor;
double DiskQueueSizeFactor;
//! RPC timeouts of ProbeChunkSet and GetChunkFragmentSet.
TDuration ProbeChunkSetRpcTimeout;
TDuration GetChunkFragmentSetRpcTimeout;
//! Delay before sending a hedged request. If null then hedging is disabled.
//! NB: This option may be overridden via hedging manager.
std::optional<TDuration> FragmentReadHedgingDelay;
//! Limit on retry count.
int RetryCountLimit;
//! Time between retries.
TDuration RetryBackoffTime;
//! Maximum time to serve fragments read request.
TDuration ReadTimeLimit;
//! Chunk that was not accessed for the time by user
//! will stop being accessed within periodic updates and then will be evicted via expiring cache logic.
TDuration ChunkInfoCacheExpirationTimeout;
//! Will locate new replicas from master
//! if node was suspicious for at least the period (unless null).
std::optional<TDuration> SuspiciousNodeGracePeriod;
//! Will open and read with DirectIO (unless already opened w/o DirectIO or disabled via location config).
bool UseDirectIO;
//! Upper bound on length of simultaneously requested fragments withing a reading session.
i64 MaxInflightFragmentLength;
//! Upper bound on count of simultaneously requested fragments withing a reading session.
i64 MaxInflightFragmentCount;
REGISTER_YSON_STRUCT(TChunkFragmentReaderConfig);
static void Register(TRegistrar registrar);
};
DEFINE_REFCOUNTED_TYPE(TChunkFragmentReaderConfig)
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NChunkClient
|