1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
#pragma once
#include <util/thread/pool.h>
#include <util/generic/utility.h>
#include <util/generic/yexception.h>
#include <util/system/info.h>
#include <util/system/atomic.h>
#include <util/system/condvar.h>
#include <util/system/mutex.h>
#include <util/stream/output.h>
#include <functional>
#include <cstdlib>
class TMtpQueueHelper {
public:
TMtpQueueHelper() {
SetThreadCount(NSystemInfo::CachedNumberOfCpus());
}
IThreadPool* Get() {
return q.Get();
}
size_t GetThreadCount() {
return ThreadCount;
}
void SetThreadCount(size_t threads) {
ThreadCount = threads;
q = CreateThreadPool(ThreadCount);
}
static TMtpQueueHelper& Instance();
private:
size_t ThreadCount;
TAutoPtr<IThreadPool> q;
};
namespace NYmp {
inline void SetThreadCount(size_t threads) {
TMtpQueueHelper::Instance().SetThreadCount(threads);
}
inline size_t GetThreadCount() {
return TMtpQueueHelper::Instance().GetThreadCount();
}
template <typename T>
inline void ParallelForStaticChunk(T begin, T end, size_t chunkSize, std::function<void(T)> func) {
chunkSize = Max<size_t>(chunkSize, 1);
size_t threadCount = TMtpQueueHelper::Instance().GetThreadCount();
IThreadPool* queue = TMtpQueueHelper::Instance().Get();
TCondVar cv;
TMutex mutex;
TAtomic counter = threadCount;
std::exception_ptr err;
for (size_t i = 0; i < threadCount; ++i) {
queue->SafeAddFunc([&cv, &counter, &mutex, &func, i, begin, end, chunkSize, threadCount, &err]() {
try {
T currentChunkStart = begin + static_cast<decltype(T() - T())>(i * chunkSize);
while (currentChunkStart < end) {
T currentChunkEnd = Min<T>(end, currentChunkStart + chunkSize);
for (T val = currentChunkStart; val < currentChunkEnd; ++val) {
func(val);
}
currentChunkStart += chunkSize * threadCount;
}
} catch (...) {
with_lock (mutex) {
err = std::current_exception();
}
}
with_lock (mutex) {
if (AtomicDecrement(counter) == 0) {
//last one
cv.Signal();
}
}
});
}
with_lock (mutex) {
while (AtomicGet(counter) > 0) {
cv.WaitI(mutex);
}
}
if (err) {
std::rethrow_exception(err);
}
}
template <typename T>
inline void ParallelForStaticAutoChunk(T begin, T end, std::function<void(T)> func) {
const size_t taskSize = end - begin;
const size_t threadCount = TMtpQueueHelper::Instance().GetThreadCount();
ParallelForStaticChunk(begin, end, (taskSize + threadCount - 1) / threadCount, func);
}
}
|