aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Common/ThreadFuzzer.h
blob: 9dd55fe7995c9ed2e3d1a299bc6338664a7c48eb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#pragma once
#include <cstdint>
#include <atomic>

namespace DB
{

/** Allows to randomize thread scheduling and insert various glitches across whole program for testing purposes.
  * It is done by setting up a timer that will send PROF signal to every thread when certain amount of CPU time has passed.
  *
  * To initialize ThreadFuzzer, call ThreadFuzzer::instance().
  * The behaviour is controlled by environment variables:
  *
  * THREAD_FUZZER_CPU_TIME_PERIOD_US  - period of signals in microseconds.
  * THREAD_FUZZER_YIELD_PROBABILITY   - probability to do 'sched_yield'.
  * THREAD_FUZZER_MIGRATE_PROBABILITY - probability to set CPU affinity to random CPU core.
  * THREAD_FUZZER_SLEEP_PROBABILITY   - probability to sleep.
  * THREAD_FUZZER_SLEEP_TIME_US       - amount of time to sleep in microseconds.
  *
  * ThreadFuzzer will do nothing if environment variables are not set accordingly.
  *
  * The intention is to reproduce thread synchronization bugs (race conditions and deadlocks) more frequently in tests.
  * We already have tests with TSan. But TSan only covers "physical" synchronization bugs, but not "logical" ones,
  *  where all data is protected by synchronization primitives, but we still have race conditions.
  * Obviously, TSan cannot debug distributed synchronization bugs.
  *
  * The motivation for this tool is an evidence, that concurrency bugs are more likely to reproduce
  *  on bad unstable virtual machines in a dirty environments.
  *
  * The idea is not new, see also:
  * https://channel9.msdn.com/blogs/peli/concurrency-fuzzing-with-cuzz
  *
  * Notes:
  * - it can be also implemented with instrumentation (example: LLVM Xray) instead of signals.
  * - we should also make the sleep time random.
  * - sleep and migration obviously helps, but the effect of yield is unclear.
  *
  * In addition, we allow to inject glitches around thread synchronization functions.
  * Example:
  *
  * THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
  * THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
  * THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
  * THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
  */
class ThreadFuzzer
{
public:
    static ThreadFuzzer & instance()
    {
        static ThreadFuzzer res;
        return res;
    }

    bool isEffective() const;

    static void stop();
    static void start();
    static bool isStarted();

    static void maybeInjectSleep();
    static void maybeInjectMemoryLimitException();

private:
    uint64_t cpu_time_period_us = 0;
    double yield_probability = 0;
    double migrate_probability = 0;
    double sleep_probability = 0;
    double sleep_time_us = 0;
    double explicit_sleep_probability = 0;
    double explicit_memory_exception_probability = 0;

    inline static std::atomic<bool> started{true};

    ThreadFuzzer();

    void initConfiguration();
    void setup() const;

    static void signalHandler(int);
};

}