1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
|
#include <sys/types.h>
#include <sys/wait.h>
#include <dlfcn.h>
#include <unistd.h>
#include <csignal>
#include <Common/logger_useful.h>
#include <base/errnoToString.h>
#include <Common/Exception.h>
#include <Common/ShellCommand.h>
#include <Common/PipeFDs.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <Common/waitForPid.h>
namespace
{
/// By these return codes from the child process, we learn (for sure) about errors when creating it.
enum class ReturnCodes : int
{
CANNOT_DUP_STDIN = 0x55555555, /// The value is not important, but it is chosen so that it's rare to conflict with the program return code.
CANNOT_DUP_STDOUT = 0x55555556,
CANNOT_DUP_STDERR = 0x55555557,
CANNOT_EXEC = 0x55555558,
CANNOT_DUP_READ_DESCRIPTOR = 0x55555559,
CANNOT_DUP_WRITE_DESCRIPTOR = 0x55555560,
};
}
namespace ProfileEvents
{
extern const Event ExecuteShellCommand;
}
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_DLSYM;
extern const int CANNOT_FORK;
extern const int CANNOT_WAITPID;
extern const int CHILD_WAS_NOT_EXITED_NORMALLY;
extern const int CANNOT_CREATE_CHILD_PROCESS;
}
ShellCommand::ShellCommand(pid_t pid_, int & in_fd_, int & out_fd_, int & err_fd_, const ShellCommand::Config & config_)
: in(in_fd_)
, out(out_fd_)
, err(err_fd_)
, pid(pid_)
, config(config_)
{
}
Poco::Logger * ShellCommand::getLogger()
{
return &Poco::Logger::get("ShellCommand");
}
ShellCommand::~ShellCommand()
{
if (wait_called)
return;
if (config.terminate_in_destructor_strategy.terminate_in_destructor)
{
size_t try_wait_timeout = config.terminate_in_destructor_strategy.wait_for_normal_exit_before_termination_seconds;
bool process_terminated_normally = tryWaitProcessWithTimeout(try_wait_timeout);
if (process_terminated_normally)
return;
LOG_TRACE(getLogger(), "Will kill shell command pid {} with signal {}", pid, config.terminate_in_destructor_strategy.termination_signal);
int retcode = kill(pid, config.terminate_in_destructor_strategy.termination_signal);
if (retcode != 0)
LOG_WARNING(getLogger(), "Cannot kill shell command pid {}, error: '{}'", pid, errnoToString());
}
else
{
try
{
tryWait();
}
catch (...)
{
tryLogCurrentException(getLogger());
}
}
}
bool ShellCommand::tryWaitProcessWithTimeout(size_t timeout_in_seconds)
{
LOG_TRACE(getLogger(), "Try wait for shell command pid {} with timeout {}", pid, timeout_in_seconds);
wait_called = true;
in.close();
out.close();
err.close();
for (auto & [_, fd] : write_fds)
fd.close();
for (auto & [_, fd] : read_fds)
fd.close();
return waitForPid(pid, timeout_in_seconds);
}
void ShellCommand::logCommand(const char * filename, char * const argv[])
{
WriteBufferFromOwnString args;
for (int i = 0; argv != nullptr && argv[i] != nullptr; ++i)
{
if (i > 0)
args << ", ";
/// NOTE: No escaping is performed.
args << "'" << argv[i] << "'";
}
LOG_TRACE(ShellCommand::getLogger(), "Will start shell command '{}' with arguments {}", filename, args.str());
}
std::unique_ptr<ShellCommand> ShellCommand::executeImpl(
const char * filename,
char * const argv[],
const Config & config)
{
logCommand(filename, argv);
ProfileEvents::increment(ProfileEvents::ExecuteShellCommand);
#if !defined(USE_MUSL)
/** Here it is written that with a normal call `vfork`, there is a chance of deadlock in multithreaded programs,
* because of the resolving of symbols in the shared library
* http://www.oracle.com/technetwork/server-storage/solaris10/subprocess-136439.html
* Therefore, separate the resolving of the symbol from the call.
*/
static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork");
#else
/// If we use Musl with static linking, there is no dlsym and no issue with vfork.
static void * real_vfork = reinterpret_cast<void *>(&vfork);
#endif
if (!real_vfork)
throwFromErrno("Cannot find symbol vfork in myself", ErrorCodes::CANNOT_DLSYM);
PipeFDs pipe_stdin;
PipeFDs pipe_stdout;
PipeFDs pipe_stderr;
std::vector<std::unique_ptr<PipeFDs>> read_pipe_fds;
std::vector<std::unique_ptr<PipeFDs>> write_pipe_fds;
for (size_t i = 0; i < config.read_fds.size(); ++i)
read_pipe_fds.emplace_back(std::make_unique<PipeFDs>());
for (size_t i = 0; i < config.write_fds.size(); ++i)
write_pipe_fds.emplace_back(std::make_unique<PipeFDs>());
pid_t pid = reinterpret_cast<pid_t(*)()>(real_vfork)();
if (pid == -1)
throwFromErrno("Cannot vfork", ErrorCodes::CANNOT_FORK);
if (0 == pid)
{
/// We are in the freshly created process.
/// Why `_exit` and not `exit`? Because `exit` calls `atexit` and destructors of thread local storage.
/// And there is a lot of garbage (including, for example, mutex is blocked). And this can not be done after `vfork` - deadlock happens.
/// Replace the file descriptors with the ends of our pipes.
if (STDIN_FILENO != dup2(pipe_stdin.fds_rw[0], STDIN_FILENO))
_exit(static_cast<int>(ReturnCodes::CANNOT_DUP_STDIN));
if (!config.pipe_stdin_only)
{
if (STDOUT_FILENO != dup2(pipe_stdout.fds_rw[1], STDOUT_FILENO))
_exit(static_cast<int>(ReturnCodes::CANNOT_DUP_STDOUT));
if (STDERR_FILENO != dup2(pipe_stderr.fds_rw[1], STDERR_FILENO))
_exit(static_cast<int>(ReturnCodes::CANNOT_DUP_STDERR));
}
for (size_t i = 0; i < config.read_fds.size(); ++i)
{
auto & fds = *read_pipe_fds[i];
auto fd = config.read_fds[i];
if (fd != dup2(fds.fds_rw[1], fd))
_exit(static_cast<int>(ReturnCodes::CANNOT_DUP_READ_DESCRIPTOR));
}
for (size_t i = 0; i < config.write_fds.size(); ++i)
{
auto & fds = *write_pipe_fds[i];
auto fd = config.write_fds[i];
if (fd != dup2(fds.fds_rw[0], fd))
_exit(static_cast<int>(ReturnCodes::CANNOT_DUP_WRITE_DESCRIPTOR));
}
// Reset the signal mask: it may be non-empty and will be inherited
// by the child process, which might not expect this.
sigset_t mask;
sigemptyset(&mask);
sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe)
sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe)
execv(filename, argv);
/// If the process is running, then `execv` does not return here.
_exit(static_cast<int>(ReturnCodes::CANNOT_EXEC));
}
std::unique_ptr<ShellCommand> res(new ShellCommand(
pid,
pipe_stdin.fds_rw[1],
pipe_stdout.fds_rw[0],
pipe_stderr.fds_rw[0],
config));
for (size_t i = 0; i < config.read_fds.size(); ++i)
{
auto & fds = *read_pipe_fds[i];
auto fd = config.read_fds[i];
res->read_fds.emplace(fd, fds.fds_rw[0]);
}
for (size_t i = 0; i < config.write_fds.size(); ++i)
{
auto & fds = *write_pipe_fds[i];
auto fd = config.write_fds[i];
res->write_fds.emplace(fd, fds.fds_rw[1]);
}
LOG_TRACE(getLogger(), "Started shell command '{}' with pid {}", filename, pid);
return res;
}
std::unique_ptr<ShellCommand> ShellCommand::execute(const ShellCommand::Config & config)
{
auto config_copy = config;
config_copy.command = "/bin/sh";
config_copy.arguments = {"-c", config.command};
for (const auto & argument : config.arguments)
config_copy.arguments.emplace_back(argument);
return executeDirect(config_copy);
}
std::unique_ptr<ShellCommand> ShellCommand::executeDirect(const ShellCommand::Config & config)
{
const auto & path = config.command;
const auto & arguments = config.arguments;
size_t argv_sum_size = path.size() + 1;
for (const auto & arg : arguments)
argv_sum_size += arg.size() + 1;
std::vector<char *> argv(arguments.size() + 2);
std::vector<char> argv_data(argv_sum_size);
WriteBufferFromPointer writer(argv_data.data(), argv_sum_size);
argv[0] = writer.position();
writer.write(path.data(), path.size() + 1);
for (size_t i = 0, size = arguments.size(); i < size; ++i)
{
argv[i + 1] = writer.position();
writer.write(arguments[i].data(), arguments[i].size() + 1);
}
writer.finalize();
argv[arguments.size() + 1] = nullptr;
return executeImpl(path.data(), argv.data(), config);
}
int ShellCommand::tryWait()
{
wait_called = true;
in.close();
out.close();
err.close();
for (auto & [_, fd] : write_fds)
fd.close();
for (auto & [_, fd] : read_fds)
fd.close();
LOG_TRACE(getLogger(), "Will wait for shell command pid {}", pid);
int status = 0;
while (waitpid(pid, &status, 0) < 0)
{
if (errno != EINTR)
throwFromErrno("Cannot waitpid", ErrorCodes::CANNOT_WAITPID);
}
LOG_TRACE(getLogger(), "Wait for shell command pid {} completed with status {}", pid, status);
if (WIFEXITED(status))
return WEXITSTATUS(status);
if (WIFSIGNALED(status))
throw Exception(ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY, "Child process was terminated by signal {}", toString(WTERMSIG(status)));
if (WIFSTOPPED(status))
throw Exception(ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY, "Child process was stopped by signal {}", toString(WSTOPSIG(status)));
throw Exception(ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY, "Child process was not exited normally by unknown reason");
}
void ShellCommand::wait()
{
int retcode = tryWait();
if (retcode != EXIT_SUCCESS)
{
switch (retcode)
{
case static_cast<int>(ReturnCodes::CANNOT_DUP_STDIN):
throw Exception(ErrorCodes::CANNOT_CREATE_CHILD_PROCESS, "Cannot dup2 stdin of child process");
case static_cast<int>(ReturnCodes::CANNOT_DUP_STDOUT):
throw Exception(ErrorCodes::CANNOT_CREATE_CHILD_PROCESS, "Cannot dup2 stdout of child process");
case static_cast<int>(ReturnCodes::CANNOT_DUP_STDERR):
throw Exception(ErrorCodes::CANNOT_CREATE_CHILD_PROCESS, "Cannot dup2 stderr of child process");
case static_cast<int>(ReturnCodes::CANNOT_EXEC):
throw Exception(ErrorCodes::CANNOT_CREATE_CHILD_PROCESS, "Cannot execv in child process");
case static_cast<int>(ReturnCodes::CANNOT_DUP_READ_DESCRIPTOR):
throw Exception(ErrorCodes::CANNOT_CREATE_CHILD_PROCESS, "Cannot dup2 read descriptor of child process");
case static_cast<int>(ReturnCodes::CANNOT_DUP_WRITE_DESCRIPTOR):
throw Exception(ErrorCodes::CANNOT_CREATE_CHILD_PROCESS, "Cannot dup2 write descriptor of child process");
default:
throw Exception(ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY, "Child process was exited with return code {}", toString(retcode));
}
}
}
}
|