1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
#include "remapExecutable.h"
#if defined(OS_LINUX) && defined(__amd64__) && defined(__SSE2__) && !defined(SANITIZER) && defined(NDEBUG)
#include <sys/mman.h>
#include <unistd.h>
#include <string.h>
#include <sys/syscall.h>
#include <emmintrin.h>
#include <Common/getMappedArea.h>
#include <Common/Exception.h>
#include <fmt/format.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_ALLOCATE_MEMORY;
}
namespace
{
/// NOLINTNEXTLINE(cert-dcl50-cpp)
__attribute__((__noinline__)) int64_t our_syscall(...)
{
__asm__ __volatile__ (R"(
movq %%rdi,%%rax;
movq %%rsi,%%rdi;
movq %%rdx,%%rsi;
movq %%rcx,%%rdx;
movq %%r8,%%r10;
movq %%r9,%%r8;
movq 8(%%rsp),%%r9;
syscall;
ret
)" : : : "memory");
return 0;
}
__attribute__((__noinline__)) void remapToHugeStep3(void * scratch, size_t size, size_t offset)
{
/// The function should not use the stack, otherwise various optimizations, including "omit-frame-pointer" may break the code.
/// Unmap the scratch area.
our_syscall(SYS_munmap, scratch, size);
/** The return address of this function is pointing to scratch area (because it was called from there).
* But the scratch area no longer exists. We should correct the return address by subtracting the offset.
*/
__asm__ __volatile__("subq %0, 8(%%rsp)" : : "r"(offset) : "memory");
}
__attribute__((__noinline__)) void remapToHugeStep2(void * begin, size_t size, void * scratch)
{
/** Unmap old memory region with the code of our program.
* Our instruction pointer is located inside scratch area and this function can execute after old code is unmapped.
* But it cannot call any other functions because they are not available at usual addresses
* - that's why we have to use "our_syscall" function and a substitution for memcpy.
* (Relative addressing may continue to work but we should not assume that).
*/
int64_t offset = reinterpret_cast<intptr_t>(scratch) - reinterpret_cast<intptr_t>(begin);
int64_t (*syscall_func)(...) = reinterpret_cast<int64_t (*)(...)>(reinterpret_cast<intptr_t>(our_syscall) + offset);
int64_t munmap_res = syscall_func(SYS_munmap, begin, size);
if (munmap_res != 0)
return;
/// Map new anonymous memory region in place of old region with code.
int64_t mmap_res = syscall_func(SYS_mmap, begin, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
if (-1 == mmap_res)
syscall_func(SYS_exit, 1);
/// As the memory region is anonymous, we can do madvise with MADV_HUGEPAGE.
syscall_func(SYS_madvise, begin, size, MADV_HUGEPAGE);
/// Copy the code from scratch area to the old memory location.
{
__m128i * __restrict dst = reinterpret_cast<__m128i *>(begin);
const __m128i * __restrict src = reinterpret_cast<const __m128i *>(scratch);
const __m128i * __restrict src_end = reinterpret_cast<const __m128i *>(reinterpret_cast<const char *>(scratch) + size);
while (src < src_end)
{
_mm_storeu_si128(dst, _mm_loadu_si128(src));
++dst;
++src;
}
}
/// Make the memory area with the code executable and non-writable.
syscall_func(SYS_mprotect, begin, size, PROT_READ | PROT_EXEC);
/** Step 3 function should unmap the scratch area.
* The currently executed code is located in the scratch area and cannot be removed here.
* We have to call another function and use its address from the original location (not in scratch area).
* To do it, we obtain its pointer and call by pointer.
*/
void(* volatile step3)(void*, size_t, size_t) = remapToHugeStep3;
step3(scratch, size, offset);
}
__attribute__((__noinline__)) void remapToHugeStep1(void * begin, size_t size)
{
/// Allocate scratch area and copy the code there.
void * scratch = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (MAP_FAILED == scratch)
throwFromErrno(fmt::format("Cannot mmap {} bytes", size), ErrorCodes::CANNOT_ALLOCATE_MEMORY);
memcpy(scratch, begin, size);
/// Offset to the scratch area from previous location.
int64_t offset = reinterpret_cast<intptr_t>(scratch) - reinterpret_cast<intptr_t>(begin);
/// Jump to the next function inside the scratch area.
reinterpret_cast<void(*)(void*, size_t, void*)>(reinterpret_cast<intptr_t>(remapToHugeStep2) + offset)(begin, size, scratch);
}
}
size_t remapExecutable()
{
auto [begin, size] = getMappedArea(reinterpret_cast<void *>(remapExecutable));
remapToHugeStep1(begin, size);
return size;
}
}
#else
namespace DB
{
size_t remapExecutable() { return 0; }
}
#endif
|