aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/malloc/nalf/nalf_alloc.h
blob: a3d8126df2ba22d77f40e1867ae9d017a084e2e0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#pragma once

#include <util/generic/vector.h>
#include <util/stream/output.h>

#ifndef NALF_ALLOC_DEFAULTMODE
#define NALF_ALLOC_DEFAULTMODE (TAllocHint::Chunked)
#endif

#ifndef NALF_ALLOC_DEFAULTALIGN
#define NALF_ALLOC_DEFAULTALIGN (16)
#endif

#if defined(_tsan_enabled_) || defined(_msan_enabled_) || defined(_asan_enabled_) || defined(WITH_VALGRIND)
#define NALF_FORCE_MALLOC_FREE 1
#define NALF_DONOT_DEFINE_GLOBALS 1
#endif

namespace NNumaAwareLockFreeAllocator {
    struct TAllocHint {
        enum EHint {
            Undefined,
            Incremental,
            Chunked,
            System,
            ForceIncremental,
            ForceChunked,
            ForceSystem,
            Bootstrap,
        };
        // valid op hint values: incremental, chunked, system, force*
        // valid thread hint values: undefined, incremental, chunked, system
        // bootstrap is used in node initialization only
    };

    class TPerThreadAllocator;

    TPerThreadAllocator* GetThreadAllocator();
    void* Allocate(ui64 len, TAllocHint::EHint hint = NALF_ALLOC_DEFAULTMODE, ui64 align = NALF_ALLOC_DEFAULTALIGN);
    void Free(void* mem);
    void* Realloc(void* mem, ui64 len);
    TAllocHint::EHint SwapHint(TAllocHint::EHint hint) noexcept;
    std::pair<ui64, TAllocHint::EHint> MemBlockSize(void* mem);

    void* Allocate(TPerThreadAllocator* pta, ui64 len, TAllocHint::EHint hint = NALF_ALLOC_DEFAULTMODE, ui64 align = NALF_ALLOC_DEFAULTALIGN);
    void Free(TPerThreadAllocator* pta, void* mem);
    void* Realloc(TPerThreadAllocator* pta, void* mem, ui64 len);

    TAllocHint::EHint SwapHint(TPerThreadAllocator* pta, TAllocHint::EHint hint) noexcept;

    template <TAllocHint::EHint Hint>
    struct TSwapHint : TNonCopyable {
        const TAllocHint::EHint Old;
        TSwapHint()
            : Old(SwapHint(Hint))
        {
        }
        ~TSwapHint() {
            SwapHint(Old);
        }
    };

    void* SystemAllocation(ui64 size);
    void SystemFree(void* mem, ui64 size);
    void* SystemRemap(void* mem, ui64 oldsize, ui64 newsize);
    ui32 GetNumaNode();

    struct TAllocatorStats {
        ui64 TotalBytesReserved; // w/o system bytes!
        ui32 PerThreadEntries;

        struct TSizeStats {
            ui32 PageSize;
            ui32 ChunkSize;

            ui64 TotalPagesReserved;
            ui64 TotalPagesCached;
            ui64 TotalAllocations;
            ui64 TotalReclaimed;
            ui64 PagesClaimed;
            ui64 PagesFromCache;
            ui64 PagesReleased;

            TSizeStats();
        };

        struct TIncrementalStats {
            ui64 TotalPagesReserved;
            ui64 TotalPagesCached;
            ui64 TotalAllocations;
            ui64 TotalReclaimed;
            ui64 PagesClaimed;
            ui64 PagesFromCache;
            ui64 PagesReleased;

            TIncrementalStats();
        };

        struct TSysStats {
            ui64 TotalBytesReserved;
            ui64 TotalBytesCached;
            ui64 TotalAllocations;
            ui64 TotalReclaimed;

            TSysStats();
        };

        TVector<TSizeStats> BySizeStats;
        TIncrementalStats IncrementalStats;
        TSysStats SysStats;

        TAllocatorStats();
        void Out(IOutputStream& out) const;
    };

    TVector<TAllocatorStats> GetAllocatorStats(); // one entry per numa-node

    static const ui64 SystemPageSize = 4096;

}