summaryrefslogtreecommitdiffstats
path: root/library/cpp/yt/rseq/per_cpu.cpp
blob: 90f1fa87fb38b8635975e2f687a432ffa15b6c65 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#include "per_cpu.h"

#if defined(__linux__)
#include <sched.h>
#include <unistd.h>

#include <cstdio>
#endif

#include <mutex>

namespace NYT::NRseq {

////////////////////////////////////////////////////////////////////////////////

namespace NDetail {

int ParsePossibleCpuCount(std::string_view list)
{
    // The list enumerates CPU id ranges (e.g. "0-3,8-11"); the highest id + 1 is nr_cpu_ids,
    // the exclusive upper bound for the rseq cpu_id. This differs from _SC_NPROCESSORS_CONF
    // (a popcount) on sparse topologies: "0-3,8-11" yields 12 here but a count of 8.
    int maxId = -1;
    for (size_t index = 0; index < list.size();) {
        if (list[index] < '0' || list[index] > '9') {
            ++index;
            continue;
        }
        int value = 0;
        while (index < list.size() && list[index] >= '0' && list[index] <= '9') {
            value = value * 10 + (list[index] - '0');
            ++index;
        }
        if (value > maxId) {
            maxId = value;
        }
    }
    return maxId >= 0 ? maxId + 1 : -1;
}

#if defined(__linux__)

//! Reads /sys/devices/system/cpu/possible and returns nr_cpu_ids, or -1 if it cannot be read.
static int TryReadPossibleCpuCount()
{
    auto* file = std::fopen("/sys/devices/system/cpu/possible", "re");
    if (!file) {
        return -1;
    }
    char buffer[256] = {};
    size_t size = std::fread(buffer, 1, sizeof(buffer) - 1, file);
    std::fclose(file);
    return ParsePossibleCpuCount(std::string_view(buffer, size));
}

#endif

// Published by GetCpuCount(); see the declaration in per_cpu-inl.h. Defaults to 0 so the
// fast path's bounds check sends every update to the safe atomic fallback until the size is
// known.
constinit int CpuCount = 0;

int GetFallbackCpuId()
{
#if defined(__linux__)
    int cpuId = ::sched_getcpu();
    if (cpuId < 0) {
        return 0;
    }
    int cpuCount = GetCpuCount();
    // Defensive: keep the index in range even if a CPU came online beyond the configured
    // count. On the fallback path slots are touched atomically, so a shared slot is safe.
    return cpuId < cpuCount ? cpuId : cpuId % cpuCount;
#else
    return 0;
#endif
}

} // namespace NDetail

////////////////////////////////////////////////////////////////////////////////

int GetCpuCount()
{
    static std::once_flag OnceFlag;
    std::call_once(OnceFlag, [] {
        int cpuCount = 1;
#if defined(__linux__)
        // The fast path indexes the slot array by the raw rseq cpu_id, so size to the highest
        // CPU id the kernel can report plus one (nr_cpu_ids), not merely the number of CPUs;
        // the possible-CPU bitmap gives this exact bound, and then every cpu_id is in range.
        if (int possible = NDetail::TryReadPossibleCpuCount(); possible > 0) {
            cpuCount = possible;
        } else {
            // Bitmap unavailable (e.g. /sys masked in a container): _SC_NPROCESSORS_CONF is a
            // count, not a cpu_id bound, so on a sparse topology it may be smaller than some
            // cpu_id. The fast path's bounds check then routes those CPUs to the clamped
            // atomic fallback -- still memory-safe, though a clamped slot may mix atomic and
            // non-atomic writes (at worst a lost counter update on such exotic setups).
            int configured = static_cast<int>(::sysconf(_SC_NPROCESSORS_CONF));
            cpuCount = configured > 0 ? configured : 1;
        }
#endif
        // Publish for the fast-path bounds check before any update can index the array.
        NDetail::CpuCount = cpuCount;
    });
    return NDetail::CpuCount;
}

////////////////////////////////////////////////////////////////////////////////

} // namespace NYT::NRseq