aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/threading/poor_man_openmp/thread_helper.h
blob: 1536c186cb7356ea698c4d0107c8258b6a8fb83a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#pragma once

#include <util/thread/pool.h>
#include <util/generic/utility.h>
#include <util/generic/yexception.h> 
#include <util/system/info.h>
#include <util/system/atomic.h>
#include <util/system/condvar.h>
#include <util/system/mutex.h>
#include <util/stream/output.h> 

#include <functional>
#include <cstdlib> 

class TMtpQueueHelper { 
public:
    TMtpQueueHelper() {
        SetThreadCount(NSystemInfo::CachedNumberOfCpus());
    }
    IThreadPool* Get() {
        return q.Get();
    }
    size_t GetThreadCount() {
        return ThreadCount;
    }
    void SetThreadCount(size_t threads) {
        ThreadCount = threads;
        q = CreateThreadPool(ThreadCount);
    }
 
    static TMtpQueueHelper& Instance(); 
 
private:
    size_t ThreadCount;
    TAutoPtr<IThreadPool> q;
};

namespace NYmp { 
    inline void SetThreadCount(size_t threads) {
        TMtpQueueHelper::Instance().SetThreadCount(threads); 
    }

    inline size_t GetThreadCount() {
        return TMtpQueueHelper::Instance().GetThreadCount(); 
    }

    template <typename T> 
    inline void ParallelForStaticChunk(T begin, T end, size_t chunkSize, std::function<void(T)> func) {
        chunkSize = Max<size_t>(chunkSize, 1); 
 
        size_t threadCount = TMtpQueueHelper::Instance().GetThreadCount(); 
        IThreadPool* queue = TMtpQueueHelper::Instance().Get();
        TCondVar cv;
        TMutex mutex;
        TAtomic counter = threadCount;
        std::exception_ptr err; 
 
        for (size_t i = 0; i < threadCount; ++i) { 
            queue->SafeAddFunc([&cv, &counter, &mutex, &func, i, begin, end, chunkSize, threadCount, &err]() { 
                try { 
                    T currentChunkStart = begin + static_cast<decltype(T() - T())>(i * chunkSize); 
 
                    while (currentChunkStart < end) { 
                        T currentChunkEnd = Min<T>(end, currentChunkStart + chunkSize); 
 
                        for (T val = currentChunkStart; val < currentChunkEnd; ++val) { 
                            func(val); 
                        } 
 
                        currentChunkStart += chunkSize * threadCount; 
                    }
                } catch (...) { 
                    with_lock (mutex) { 
                        err = std::current_exception(); 
                    } 
                }
 
                with_lock (mutex) { 
                    if (AtomicDecrement(counter) == 0) { 
                        //last one 
                        cv.Signal(); 
                    } 
                } 
            });
        }
 
        with_lock (mutex) { 
            while (AtomicGet(counter) > 0) { 
                cv.WaitI(mutex); 
            } 
        }
 
        if (err) { 
            std::rethrow_exception(err); 
        } 
    }

    template <typename T> 
    inline void ParallelForStaticAutoChunk(T begin, T end, std::function<void(T)> func) {
        const size_t taskSize = end - begin; 
        const size_t threadCount = TMtpQueueHelper::Instance().GetThreadCount(); 
 
        ParallelForStaticChunk(begin, end, (taskSize + threadCount - 1) / threadCount, func);
    }
}