aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/yt/stockpile/stockpile_linux.cpp
blob: dafb4e9e8c821c9170dd06d7bd7d2482075c5082 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#include "stockpile.h"

#include "library/cpp/yt/logging/logger.h"

#include <thread>
#include <mutex>

#include <sys/mman.h>

#include <util/system/thread.h>
#include <string.h>

namespace NYT {

////////////////////////////////////////////////////////////////////////////////

static const auto Logger = NLogging::TLogger("Stockpile");

constexpr int MADV_STOCKPILE = 0x59410004;

////////////////////////////////////////////////////////////////////////////////

namespace {

void RunWithFixedBreaks(i64 bufferSize, TDuration period)
{
    auto returnCode = -::madvise(nullptr, bufferSize, MADV_STOCKPILE);
    YT_LOG_DEBUG_IF(returnCode, "System call \"madvise\" returned %v", strerror(returnCode));
    Sleep(period);
}

void RunWithCappedLoad(i64 bufferSize, TDuration period)
{
    auto started = GetApproximateCpuInstant();
    auto returnCode = -::madvise(nullptr, bufferSize, MADV_STOCKPILE);
    YT_LOG_DEBUG_IF(returnCode, "System call \"madvise\" returned %v", strerror(returnCode));
    auto duration = CpuDurationToDuration(GetApproximateCpuInstant() - started);

    if (duration < period) {
        Sleep(period - duration);
    }
}

std::pair<i64, TDuration> RunWithBackoffs(
    i64 adjustedBufferSize,
    TDuration adjustedPeriod,
    const TStockpileOptions& options,
    i64 pageSize)
{
    int returnCode = -::madvise(nullptr, adjustedBufferSize, MADV_STOCKPILE);
    YT_LOG_DEBUG_IF(returnCode, "System call \"madvise\" returned %v", strerror(returnCode));

    switch(returnCode) {
        case 0:
            Sleep(options.Period);
            return {options.BufferSize, options.Period};

        case ENOMEM:
            if (adjustedBufferSize / 2 >= pageSize) {
                // Immediately make an attempt to reclaim half as much.
                adjustedBufferSize = adjustedBufferSize / 2;
            } else {
                // Unless there is not even a single reclaimable page.
                Sleep(options.Period);
            }
            return {adjustedBufferSize, options.Period};

        case EAGAIN:
        case EINTR:
            Sleep(adjustedPeriod);
            return {options.BufferSize, adjustedPeriod + options.Period};

        default:
            Sleep(options.Period);
            return {options.BufferSize, options.Period};
    }
}

} // namespace

void RunStockpileThread(TStockpileOptions options, std::atomic<bool>* shouldProceed)
{
    TThread::SetCurrentThreadName("Stockpile");

    const i64 pageSize = sysconf(_SC_PAGESIZE);
    auto bufferSize = options.BufferSize;
    auto period = options.Period;

    while (!shouldProceed || shouldProceed->load()) {
        switch (options.Strategy) {
            case EStockpileStrategy::FixedBreaks:
                RunWithFixedBreaks(options.BufferSize, options.Period);
                break;

            case EStockpileStrategy::FlooredLoad:
                RunWithCappedLoad(options.BufferSize, options.Period);
                break;

            case EStockpileStrategy::ProgressiveBackoff:
                std::tie(bufferSize, period) = RunWithBackoffs(bufferSize, period, options, pageSize);
                break;

            default:
                YT_ABORT();
        }
    }
}

void RunDetachedStockpileThreads(TStockpileOptions options)
{
    static std::once_flag OnceFlag;
    std::call_once(OnceFlag, [options = std::move(options)] {
        for (int i = 0; i < options.ThreadCount; ++i) {
            std::thread(RunStockpileThread, options, nullptr).detach();
        }
    });
}

////////////////////////////////////////////////////////////////////////////////

} // namespace NYT