1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
|
#include "getNumberOfPhysicalCPUCores.h"
#include <filesystem>
#include "clickhouse_config.h"
#if defined(OS_LINUX)
# include <cmath>
# include <fstream>
#endif
#include <boost/algorithm/string/trim.hpp>
#include <boost/algorithm/string/split.hpp>
#include <base/range.h>
#include <thread>
#include <set>
#include <vector>
namespace
{
#if defined(OS_LINUX)
int32_t readFrom(const std::filesystem::path & filename, int default_value)
{
std::ifstream infile(filename);
if (!infile.is_open())
return default_value;
int idata;
if (infile >> idata)
return idata;
else
return default_value;
}
/// Try to look at cgroups limit if it is available.
uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
{
uint32_t quota_count = default_cpu_count;
std::filesystem::path prefix = "/sys/fs/cgroup";
/// cgroupsv2
std::ifstream contr_file(prefix / "cgroup.controllers");
if (contr_file.is_open())
{
/// First, we identify the cgroup the process belongs
std::ifstream cgroup_name_file("/proc/self/cgroup");
if (!cgroup_name_file.is_open())
return default_cpu_count;
// cgroup_name_file always starts with '0::/' for v2
cgroup_name_file.ignore(4);
std::string cgroup_name;
cgroup_name_file >> cgroup_name;
std::filesystem::path current_cgroup;
if (cgroup_name.empty())
current_cgroup = prefix;
else
current_cgroup = prefix / cgroup_name;
// Looking for cpu.max in directories from the current cgroup to the top level
// It does not stop on the first time since the child could have a greater value than parent
while (current_cgroup != prefix.parent_path())
{
std::ifstream cpu_max_file(current_cgroup / "cpu.max");
current_cgroup = current_cgroup.parent_path();
if (cpu_max_file.is_open())
{
std::string cpu_limit_str;
float cpu_period;
cpu_max_file >> cpu_limit_str >> cpu_period;
if (cpu_limit_str != "max" && cpu_period != 0)
{
float cpu_limit = std::stof(cpu_limit_str);
quota_count = std::min(static_cast<uint32_t>(ceil(cpu_limit / cpu_period)), quota_count);
}
}
}
current_cgroup = prefix / cgroup_name;
// Looking for cpuset.cpus.effective in directories from the current cgroup to the top level
while (current_cgroup != prefix.parent_path())
{
std::ifstream cpuset_cpus_file(current_cgroup / "cpuset.cpus.effective");
current_cgroup = current_cgroup.parent_path();
if (cpuset_cpus_file.is_open())
{
// The line in the file is "0,2-4,6,9-14" cpu numbers
// It's always grouped and ordered
std::vector<std::string> cpu_ranges;
std::string cpuset_line;
cpuset_cpus_file >> cpuset_line;
if (cpuset_line.empty())
continue;
boost::split(cpu_ranges, cpuset_line, boost::is_any_of(","));
uint32_t cpus_count = 0;
for (const std::string& cpu_number_or_range : cpu_ranges)
{
std::vector<std::string> cpu_range;
boost::split(cpu_range, cpu_number_or_range, boost::is_any_of("-"));
if (cpu_range.size() == 2)
{
int start = std::stoi(cpu_range[0]);
int end = std::stoi(cpu_range[1]);
cpus_count += (end - start) + 1;
}
else
cpus_count++;
}
quota_count = std::min(cpus_count, quota_count);
break;
}
}
return quota_count;
}
/// cgroupsv1
/// Return the number of milliseconds per period process is guaranteed to run.
/// -1 for no quota
int cgroup_quota = readFrom(prefix / "cpu/cpu.cfs_quota_us", -1);
int cgroup_period = readFrom(prefix / "cpu/cpu.cfs_period_us", -1);
if (cgroup_quota > -1 && cgroup_period > 0)
quota_count = static_cast<uint32_t>(ceil(static_cast<float>(cgroup_quota) / static_cast<float>(cgroup_period)));
return std::min(default_cpu_count, quota_count);
}
#endif
/// Returns number of physical cores, unlike std::thread::hardware_concurrency() which returns the logical core count. With 2-way SMT
/// (HyperThreading) enabled, physical_concurrency() returns half of of std::thread::hardware_concurrency(), otherwise return the same.
#if defined(__x86_64__) && defined(OS_LINUX)
unsigned physical_concurrency()
try
{
/// The CPUID instruction isn't reliable across different vendors and CPU models. The best option to get the physical core count is
/// to parse /proc/cpuinfo. boost::thread::physical_concurrency() does the same, so use their implementation.
///
/// See https://doc.callmematthi.eu/static/webArticles/Understanding%20Linux%20_proc_cpuinfo.pdf
std::ifstream proc_cpuinfo("/proc/cpuinfo");
if (!proc_cpuinfo.is_open())
/// In obscure cases (chroot) /proc can be unmounted
return std::thread::hardware_concurrency();
using CoreEntry = std::pair<size_t, size_t>; /// physical id, core id
using CoreEntrySet = std::set<CoreEntry>;
CoreEntrySet core_entries;
CoreEntry cur_core_entry;
std::string line;
while (std::getline(proc_cpuinfo, line))
{
size_t pos = line.find(std::string(":"));
if (pos == std::string::npos)
continue;
std::string key = line.substr(0, pos);
std::string val = line.substr(pos + 1);
if (key.find("physical id") != std::string::npos)
{
cur_core_entry.first = std::stoi(val);
continue;
}
if (key.find("core id") != std::string::npos)
{
cur_core_entry.second = std::stoi(val);
core_entries.insert(cur_core_entry);
continue;
}
}
return core_entries.empty() ? /*unexpected format*/ std::thread::hardware_concurrency() : static_cast<unsigned>(core_entries.size());
}
catch (...)
{
return std::thread::hardware_concurrency(); /// parsing error
}
#endif
unsigned getNumberOfPhysicalCPUCoresImpl()
{
unsigned cpu_count = std::thread::hardware_concurrency(); /// logical cores (with SMT/HyperThreading)
/// Most x86_64 CPUs have 2-way SMT (Hyper-Threading).
/// Aarch64 and RISC-V don't have SMT so far.
/// POWER has SMT and it can be multi-way (e.g. 8-way), but we don't know how ClickHouse really behaves, so use all of them.
#if defined(__x86_64__) && defined(OS_LINUX)
/// On really big machines, SMT is detrimental to performance (+ ~5% overhead in ClickBench). On such machines, we limit ourself to the physical cores.
/// Few cores indicate it is a small machine, runs in a VM or is a limited cloud instance --> it is reasonable to use all the cores.
if (cpu_count >= 32)
cpu_count = physical_concurrency();
#endif
#if defined(OS_LINUX)
cpu_count = getCGroupLimitedCPUCores(cpu_count);
#endif
return cpu_count;
}
}
unsigned getNumberOfPhysicalCPUCores()
{
/// Calculate once.
static auto res = getNumberOfPhysicalCPUCoresImpl();
return res;
}
|