diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:15 +0300 |
commit | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch) | |
tree | da2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /contrib/libs/cxxsupp/openmp/kmp_stats.h | |
parent | 778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff) | |
download | ydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/cxxsupp/openmp/kmp_stats.h')
-rw-r--r-- | contrib/libs/cxxsupp/openmp/kmp_stats.h | 1496 |
1 files changed, 748 insertions, 748 deletions
diff --git a/contrib/libs/cxxsupp/openmp/kmp_stats.h b/contrib/libs/cxxsupp/openmp/kmp_stats.h index 20cec3efcc..c52c964456 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_stats.h +++ b/contrib/libs/cxxsupp/openmp/kmp_stats.h @@ -1,748 +1,748 @@ -#ifndef KMP_STATS_H -#define KMP_STATS_H - -/** @file kmp_stats.h - * Functions for collecting statistics. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp_config.h" - -#if KMP_STATS_ENABLED -/* - * Statistics accumulator. - * Accumulates number of samples and computes min, max, mean, standard deviation on the fly. - * - * Online variance calculation algorithm from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm - */ - -#include <limits> -#include <math.h> -#include <string> -#include <stdint.h> -#include <new> // placement new -#include "kmp_stats_timing.h" - -/* - * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and - * are intended for the runtime library developer. - */ -// #define KMP_DEVELOPER_STATS 1 - -/*! - * @ingroup STATS_GATHERING - * \brief flags to describe the statistic ( timers or counter ) - * -*/ -class stats_flags_e { - public: - const static int onlyInMaster = 1<<0; //!< statistic is valid only for master - const static int noUnits = 1<<1; //!< statistic doesn't need units printed next to it in output - const static int synthesized = 1<<2; //!< statistic's value is created atexit time in the __kmp_output_stats function - const static int notInMaster = 1<<3; //!< statistic is valid for non-master threads - const static int logEvent = 1<<4; //!< statistic can be logged when KMP_STATS_EVENTS is on (valid only for timers) -}; - -/*! - * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h - * - * @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg) - * @param arg a user defined argument to send to the user defined macro - * - * \details A counter counts the occurrence of some event. - * Each thread accumulates its own count, at the end of execution the counts are aggregated treating each thread - * as a separate measurement. (Unless onlyInMaster is set, in which case there's only a single measurement). - * The min,mean,max are therefore the values for the threads. - * Adding the counter here and then putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you need to do. - * All of the tables and printing is generated from this macro. - * Format is "macro(name, flags, arg)" - * - * @ingroup STATS_GATHERING -*/ -#define KMP_FOREACH_COUNTER(macro, arg) \ - macro (OMP_PARALLEL, stats_flags_e::onlyInMaster, arg) \ - macro (OMP_NESTED_PARALLEL, 0, arg) \ - macro (OMP_FOR_static, 0, arg) \ - macro (OMP_FOR_dynamic, 0, arg) \ - macro (OMP_DISTRIBUTE, 0, arg) \ - macro (OMP_BARRIER, 0, arg) \ - macro (OMP_CRITICAL,0, arg) \ - macro (OMP_SINGLE, 0, arg) \ - macro (OMP_MASTER, 0, arg) \ - macro (OMP_TEAMS, 0, arg) \ - macro (OMP_set_lock, 0, arg) \ - macro (OMP_test_lock, 0, arg) \ - macro (REDUCE_wait, 0, arg) \ - macro (REDUCE_nowait, 0, arg) \ - macro (OMP_TASKYIELD, 0, arg) \ - macro (TASK_executed, 0, arg) \ - macro (TASK_cancelled, 0, arg) \ - macro (TASK_stolen, 0, arg) \ - macro (LAST,0,arg) - -// OMP_PARALLEL_args -- the number of arguments passed to a fork -// FOR_static_iterations -- Number of available parallel chunks of work in a static for -// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for -// Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2. - -/*! - * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h - * - * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg) - * @param arg a user defined argument to send to the user defined macro - * - * \details A timer collects multiple samples of some count in each thread and then finally aggregates over all the threads. - * The count is normally a time (in ticks), hence the name "timer". (But can be any value, so we use this for "number of arguments passed to fork" - * as well). - * For timers the threads are not significant, it's the individual observations that count, so the statistics are at that level. - * Format is "macro(name, flags, arg)" - * - * @ingroup STATS_GATHERING2 - */ -#define KMP_FOREACH_TIMER(macro, arg) \ - macro (OMP_start_end, stats_flags_e::onlyInMaster, arg) \ - macro (OMP_serial, stats_flags_e::onlyInMaster, arg) \ - macro (OMP_work, 0, arg) \ - macro (Total_work, stats_flags_e::synthesized, arg) \ - macro (OMP_barrier, 0, arg) \ - macro (Total_barrier, stats_flags_e::synthesized, arg) \ - macro (FOR_static_iterations, stats_flags_e::noUnits, arg) \ - macro (FOR_static_scheduling, 0, arg) \ - macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg) \ - macro (FOR_dynamic_scheduling, 0, arg) \ - macro (TASK_execution, 0, arg) \ - macro (OMP_set_numthreads, stats_flags_e::noUnits, arg) \ - macro (OMP_PARALLEL_args, stats_flags_e::noUnits, arg) \ - macro (OMP_single, 0, arg) \ - macro (OMP_master, 0, arg) \ - KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ - macro (LAST,0, arg) - - -// OMP_start_end -- time from when OpenMP is initialized until the stats are printed at exit -// OMP_serial -- thread zero time executing serial code -// OMP_work -- elapsed time in code dispatched by a fork (measured in the thread) -// Total_work -- a synthesized statistic summarizing how much parallel work each thread executed. -// OMP_barrier -- time at "real" barriers -// Total_barrier -- a synthesized statistic summarizing how much time at real barriers in each thread -// FOR_static_scheduling -- time spent doing scheduling for a static "for" -// FOR_dynamic_scheduling -- time spent doing scheduling for a dynamic "for" - -#if (KMP_DEVELOPER_STATS) -// Timers which are of interest tio runtime library developers, not end users. -// THese have to be explicitly enabled in addition to the other stats. - -// KMP_fork_barrier -- time in __kmp_fork_barrier -// KMP_join_barrier -- time in __kmp_join_barrier -// KMP_barrier -- time in __kmp_barrier -// KMP_end_split_barrier -- time in __kmp_end_split_barrier -// KMP_setup_icv_copy -- time in __kmp_setup_icv_copy -// KMP_icv_copy -- start/stop timer for any ICV copying -// KMP_linear_gather -- time in __kmp_linear_barrier_gather -// KMP_linear_release -- time in __kmp_linear_barrier_release -// KMP_tree_gather -- time in __kmp_tree_barrier_gather -// KMP_tree_release -- time in __kmp_tree_barrier_release -// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather -// KMP_hyper_release -- time in __kmp_hyper_barrier_release -# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ - macro (KMP_fork_call, 0, arg) \ - macro (KMP_join_call, 0, arg) \ - macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \ - macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \ - macro (KMP_barrier, 0, arg) \ - macro (KMP_end_split_barrier, 0, arg) \ - macro (KMP_hier_gather, 0, arg) \ - macro (KMP_hier_release, 0, arg) \ - macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \ - macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \ - macro (KMP_linear_gather, 0, arg) \ - macro (KMP_linear_release, 0, arg) \ - macro (KMP_tree_gather, 0, arg) \ - macro (KMP_tree_release, 0, arg) \ - macro (USER_master_invoke, stats_flags_e::logEvent, arg) \ - macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \ - macro (USER_resume, stats_flags_e::logEvent, arg) \ - macro (USER_suspend, stats_flags_e::logEvent, arg) \ - macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \ - macro (KMP_allocate_team, 0, arg) \ - macro (KMP_setup_icv_copy, 0, arg) \ - macro (USER_icv_copy, 0, arg) -#else -# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) -#endif - -/*! - * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro. - * - * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg) - * @param arg a user defined argument to send to the user defined macro - * - * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE BAD THINGS WILL HAPPEN! - * - * \details Explicit timers are ones where we need to allocate a timer itself (as well as the accumulated timing statistics). - * We allocate these on a per-thread basis, and explicitly start and stop them. - * Block timers just allocate the timer itself on the stack, and use the destructor to notice block exit; they don't - * need to be defined here. - * The name here should be the same as that of a timer above. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ - macro(OMP_serial, 0, arg) \ - macro(OMP_start_end, 0, arg) \ - macro(OMP_single, 0, arg) \ - macro(OMP_master, 0, arg) \ - KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \ - macro(LAST, 0, arg) - -#if (KMP_DEVELOPER_STATS) -# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) \ - macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg) -#else -# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) -#endif - -#define ENUMERATE(name,ignore,prefix) prefix##name, -enum timer_e { - KMP_FOREACH_TIMER(ENUMERATE, TIMER_) -}; - -enum explicit_timer_e { - KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) -}; - -enum counter_e { - KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) -}; -#undef ENUMERATE - -class statistic -{ - double minVal; - double maxVal; - double meanVal; - double m2; - uint64_t sampleCount; - - public: - statistic() { reset(); } - statistic (statistic const &o): minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), sampleCount(o.sampleCount) {} - - double getMin() const { return minVal; } - double getMean() const { return meanVal; } - double getMax() const { return maxVal; } - uint64_t getCount() const { return sampleCount; } - double getSD() const { return sqrt(m2/sampleCount); } - double getTotal() const { return sampleCount*meanVal; } - - void reset() - { - minVal = std::numeric_limits<double>::max(); - maxVal = -std::numeric_limits<double>::max(); - meanVal= 0.0; - m2 = 0.0; - sampleCount = 0; - } - void addSample(double sample); - void scale (double factor); - void scaleDown(double f) { scale (1./f); } - statistic & operator+= (statistic const & other); - - std::string format(char unit, bool total=false) const; -}; - -struct statInfo -{ - const char * name; - uint32_t flags; -}; - -class timeStat : public statistic -{ - static statInfo timerInfo[]; - - public: - timeStat() : statistic() {} - static const char * name(timer_e e) { return timerInfo[e].name; } - static bool masterOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::onlyInMaster; } - static bool workerOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::notInMaster; } - static bool noUnits (timer_e e) { return timerInfo[e].flags & stats_flags_e::noUnits; } - static bool synthesized(timer_e e) { return timerInfo[e].flags & stats_flags_e::synthesized; } - static bool logEvent (timer_e e) { return timerInfo[e].flags & stats_flags_e::logEvent; } - static void clearEventFlags() { - int i; - for(i=0;i<TIMER_LAST;i++) { - timerInfo[i].flags &= (~(stats_flags_e::logEvent)); - } - } -}; - -// Where we need explicitly to start and end the timer, this version can be used -// Since these timers normally aren't nicely scoped, so don't have a good place to live -// on the stack of the thread, they're more work to use. -class explicitTimer -{ - timeStat * stat; - tsc_tick_count startTime; - - public: - explicitTimer () : stat(0), startTime(0) { } - explicitTimer (timeStat * s) : stat(s), startTime() { } - - void setStat (timeStat *s) { stat = s; } - void start(timer_e timerEnumValue); - void stop(timer_e timerEnumValue); - void reset() { startTime = 0; } -}; - -// Where all you need is to time a block, this is enough. -// (It avoids the need to have an explicit end, leaving the scope suffices.) -class blockTimer : public explicitTimer -{ - timer_e timerEnumValue; - public: - blockTimer (timeStat * s, timer_e newTimerEnumValue) : timerEnumValue(newTimerEnumValue), explicitTimer(s) { start(timerEnumValue); } - ~blockTimer() { stop(timerEnumValue); } -}; - -// If all you want is a count, then you can use this... -// The individual per-thread counts will be aggregated into a statistic at program exit. -class counter -{ - uint64_t value; - static const statInfo counterInfo[]; - - public: - counter() : value(0) {} - void increment() { value++; } - uint64_t getValue() const { return value; } - void reset() { value = 0; } - static const char * name(counter_e e) { return counterInfo[e].name; } - static bool masterOnly (counter_e e) { return counterInfo[e].flags & stats_flags_e::onlyInMaster; } -}; - -/* **************************************************************** - Class to implement an event - - There are four components to an event: start time, stop time - nest_level, and timer_name. - The start and stop time should be obvious (recorded in clock ticks). - The nest_level relates to the bar width in the timeline graph. - The timer_name is used to determine which timer event triggered this event. - - the interface to this class is through four read-only operations: - 1) getStart() -- returns the start time as 64 bit integer - 2) getStop() -- returns the stop time as 64 bit integer - 3) getNestLevel() -- returns the nest level of the event - 4) getTimerName() -- returns the timer name that triggered event - - *MORE ON NEST_LEVEL* - The nest level is used in the bar graph that represents the timeline. - Its main purpose is for showing how events are nested inside eachother. - For example, say events, A, B, and C are recorded. If the timeline - looks like this: - -Begin -------------------------------------------------------------> Time - | | | | | | - A B C C B A - start start start end end end - - Then A, B, C will have a nest level of 1, 2, 3 respectively. - These values are then used to calculate the barwidth so you can - see that inside A, B has occurred, and inside B, C has occurred. - Currently, this is shown with A's bar width being larger than B's - bar width, and B's bar width being larger than C's bar width. - -**************************************************************** */ -class kmp_stats_event { - uint64_t start; - uint64_t stop; - int nest_level; - timer_e timer_name; - public: - kmp_stats_event() : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {} - kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) : start(strt), stop(stp), nest_level(nst), timer_name(nme) {} - inline uint64_t getStart() const { return start; } - inline uint64_t getStop() const { return stop; } - inline int getNestLevel() const { return nest_level; } - inline timer_e getTimerName() const { return timer_name; } -}; - -/* **************************************************************** - Class to implement a dynamically expandable array of events - - --------------------------------------------------------- - | event 1 | event 2 | event 3 | event 4 | ... | event N | - --------------------------------------------------------- - - An event is pushed onto the back of this array at every - explicitTimer->stop() call. The event records the thread #, - start time, stop time, and nest level related to the bar width. - - The event vector starts at size INIT_SIZE and grows (doubles in size) - if needed. An implication of this behavior is that log(N) - reallocations are needed (where N is number of events). If you want - to avoid reallocations, then set INIT_SIZE to a large value. - - the interface to this class is through six operations: - 1) reset() -- sets the internal_size back to 0 but does not deallocate any memory - 2) size() -- returns the number of valid elements in the vector - 3) push_back(start, stop, nest, timer_name) -- pushes an event onto - the back of the array - 4) deallocate() -- frees all memory associated with the vector - 5) sort() -- sorts the vector by start time - 6) operator[index] or at(index) -- returns event reference at that index - -**************************************************************** */ -class kmp_stats_event_vector { - kmp_stats_event* events; - int internal_size; - int allocated_size; - static const int INIT_SIZE = 1024; - public: - kmp_stats_event_vector() { - events = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*INIT_SIZE); - internal_size = 0; - allocated_size = INIT_SIZE; - } - ~kmp_stats_event_vector() {} - inline void reset() { internal_size = 0; } - inline int size() const { return internal_size; } - void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { - int i; - if(internal_size == allocated_size) { - kmp_stats_event* tmp = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*allocated_size*2); - for(i=0;i<internal_size;i++) tmp[i] = events[i]; - __kmp_free(events); - events = tmp; - allocated_size*=2; - } - events[internal_size] = kmp_stats_event(start_time, stop_time, nest_level, name); - internal_size++; - return; - } - void deallocate(); - void sort(); - const kmp_stats_event & operator[](int index) const { return events[index]; } - kmp_stats_event & operator[](int index) { return events[index]; } - const kmp_stats_event & at(int index) const { return events[index]; } - kmp_stats_event & at(int index) { return events[index]; } -}; - -/* **************************************************************** - Class to implement a doubly-linked, circular, statistics list - - |---| ---> |---| ---> |---| ---> |---| ---> ... next - | | | | | | | | - |---| <--- |---| <--- |---| <--- |---| <--- ... prev - Sentinel first second third - Node node node node - - The Sentinel Node is the user handle on the list. - The first node corresponds to thread 0's statistics. - The second node corresponds to thread 1's statistics and so on... - - Each node has a _timers, _counters, and _explicitTimers array to - hold that thread's statistics. The _explicitTimers - point to the correct _timer and update its statistics at every stop() call. - The explicitTimers' pointers are set up in the constructor. - Each node also has an event vector to hold that thread's timing events. - The event vector expands as necessary and records the start-stop times - for each timer. - - The nestLevel variable is for plotting events and is related - to the bar width in the timeline graph. - - Every thread will have a __thread local pointer to its node in - the list. The sentinel node is used by the master thread to - store "dummy" statistics before __kmp_create_worker() is called. - -**************************************************************** */ -class kmp_stats_list { - int gtid; - timeStat _timers[TIMER_LAST+1]; - counter _counters[COUNTER_LAST+1]; - explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1]; - int _nestLevel; // one per thread - kmp_stats_event_vector _event_vector; - kmp_stats_list* next; - kmp_stats_list* prev; - public: - kmp_stats_list() : next(this) , prev(this) , _event_vector(), _nestLevel(0) { -#define doInit(name,ignore1,ignore2) \ - getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); - KMP_FOREACH_EXPLICIT_TIMER(doInit,0); -#undef doInit - } - ~kmp_stats_list() { } - inline timeStat * getTimer(timer_e idx) { return &_timers[idx]; } - inline counter * getCounter(counter_e idx) { return &_counters[idx]; } - inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; } - inline timeStat * getTimers() { return _timers; } - inline counter * getCounters() { return _counters; } - inline explicitTimer * getExplicitTimers() { return _explicitTimers; } - inline kmp_stats_event_vector & getEventVector() { return _event_vector; } - inline void resetEventVector() { _event_vector.reset(); } - inline void incrementNestValue() { _nestLevel++; } - inline int getNestValue() { return _nestLevel; } - inline void decrementNestValue() { _nestLevel--; } - inline int getGtid() const { return gtid; } - inline void setGtid(int newgtid) { gtid = newgtid; } - kmp_stats_list* push_back(int gtid); // returns newly created list node - inline void push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { - _event_vector.push_back(start_time, stop_time, nest_level, name); - } - void deallocate(); - class iterator; - kmp_stats_list::iterator begin(); - kmp_stats_list::iterator end(); - int size(); - class iterator { - kmp_stats_list* ptr; - friend kmp_stats_list::iterator kmp_stats_list::begin(); - friend kmp_stats_list::iterator kmp_stats_list::end(); - public: - iterator(); - ~iterator(); - iterator operator++(); - iterator operator++(int dummy); - iterator operator--(); - iterator operator--(int dummy); - bool operator!=(const iterator & rhs); - bool operator==(const iterator & rhs); - kmp_stats_list* operator*() const; // dereference operator - }; -}; - -/* **************************************************************** - Class to encapsulate all output functions and the environment variables - - This module holds filenames for various outputs (normal stats, events, plot file), - as well as coloring information for the plot file. - - The filenames and flags variables are read from environment variables. - These are read once by the constructor of the global variable __kmp_stats_output - which calls init(). - - During this init() call, event flags for the timeStat::timerInfo[] global array - are cleared if KMP_STATS_EVENTS is not true (on, 1, yes). - - The only interface function that is public is outputStats(heading). This function - should print out everything it needs to, either to files or stderr, - depending on the environment variables described below - - ENVIRONMENT VARIABLES: - KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this file, - otherwise, print to stderr - KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to either - KMP_STATS_FILE or stderr - KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename, - otherwise, the plot file is sent to "events.plt" - KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log events - KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file, - otherwise, output is sent to "events.dat" - -**************************************************************** */ -class kmp_stats_output_module { - - public: - struct rgb_color { - float r; - float g; - float b; - }; - - private: - static const char* outputFileName; - static const char* eventsFileName; - static const char* plotFileName; - static int printPerThreadFlag; - static int printPerThreadEventsFlag; - static const rgb_color globalColorArray[]; - static rgb_color timerColorInfo[]; - - void init(); - static void setupEventColors(); - static void printPloticusFile(); - static void printStats(FILE *statsOut, statistic const * theStats, bool areTimers); - static void printCounters(FILE * statsOut, counter const * theCounters); - static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents, int gtid); - static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; } - static void windupExplicitTimers(); - bool eventPrintingEnabled() { - if(printPerThreadEventsFlag) return true; - else return false; - } - bool perThreadPrintingEnabled() { - if(printPerThreadFlag) return true; - else return false; - } - - public: - kmp_stats_output_module() { init(); } - void outputStats(const char* heading); -}; - -#ifdef __cplusplus -extern "C" { -#endif -void __kmp_stats_init(); -void __kmp_reset_stats(); -void __kmp_output_stats(const char *); -void __kmp_accumulate_stats_at_exit(void); -// thread local pointer to stats node within list -extern __thread kmp_stats_list* __kmp_stats_thread_ptr; -// head to stats list. -extern kmp_stats_list __kmp_stats_list; -// lock for __kmp_stats_list -extern kmp_tas_lock_t __kmp_stats_lock; -// reference start time -extern tsc_tick_count __kmp_stats_start_time; -// interface to output -extern kmp_stats_output_module __kmp_stats_output; - -#ifdef __cplusplus -} -#endif - -// Simple, standard interfaces that drop out completely if stats aren't enabled - - -/*! - * \brief Uses specified timer (name) to time code block. - * - * @param name timer name as specified under the KMP_FOREACH_TIMER() macro - * - * \details Use KMP_TIME_BLOCK(name) macro to time a code block. This will record the time taken in the block - * and use the destructor to stop the timer. Convenient! - * With this definition you can't have more than one KMP_TIME_BLOCK in the same code block. - * I don't think that's a problem. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_TIME_BLOCK(name) \ - blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name) - -/*! - * \brief Adds value to specified timer (name). - * - * @param name timer name as specified under the KMP_FOREACH_TIMER() macro - * @param value double precision sample value to add to statistics for the timer - * - * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to a timer statistics. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_COUNT_VALUE(name, value) \ - __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) - -/*! - * \brief Increments specified counter (name). - * - * @param name counter name as specified under the KMP_FOREACH_COUNTER() macro - * - * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics counter for the executing thread. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_COUNT_BLOCK(name) \ - __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() - -/*! - * \brief "Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro. - * - * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro - * - * \details Use to start a timer. This will need a corresponding KMP_STOP_EXPLICIT_TIMER() - * macro to stop the timer unlike the KMP_TIME_BLOCK(name) macro which has an implicit stopping macro at the end - * of the code block. All explicit timers are stopped at library exit time before the final statistics are outputted. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_START_EXPLICIT_TIMER(name) \ - __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->start(TIMER_##name) - -/*! - * \brief "Stops" an explicit timer. - * - * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro - * - * \details Use KMP_STOP_EXPLICIT_TIMER(name) to stop a timer. When this is done, the time between the last KMP_START_EXPLICIT_TIMER(name) - * and this KMP_STOP_EXPLICIT_TIMER(name) will be added to the timer's stat value. The timer will then be reset. - * After the KMP_STOP_EXPLICIT_TIMER(name) macro is called, another call to KMP_START_EXPLICIT_TIMER(name) will start the timer once again. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_STOP_EXPLICIT_TIMER(name) \ - __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->stop(TIMER_##name) - -/*! - * \brief Outputs the current thread statistics and reset them. - * - * @param heading_string heading put above the final stats output - * - * \details Explicitly stops all timers and outputs all stats. - * Environment variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a filename instead of stderr - * Environment variable, `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific stats - * For now the `OMPTB_STATSTHREADS` environment variable can either be defined with any value, which will print out thread - * specific stats, or it can be undefined (not specified in the environment) and thread specific stats won't be printed - * It should be noted that all statistics are reset when this macro is called. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_OUTPUT_STATS(heading_string) \ - __kmp_output_stats(heading_string) - -/*! - * \brief resets all stats (counters to 0, timers to 0 elapsed ticks) - * - * \details Reset all stats for all threads. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_RESET_STATS() __kmp_reset_stats() - -#if (KMP_DEVELOPER_STATS) -# define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n) -# define KMP_COUNT_DEVELOPER_VALUE(n,v) KMP_COUNT_VALUE(n,v) -# define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) -# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) -# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) -#else -// Null definitions -# define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) -# define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) -# define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) -# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -#endif - -#else // KMP_STATS_ENABLED - -// Null definitions -#define KMP_TIME_BLOCK(n) ((void)0) -#define KMP_COUNT_VALUE(n,v) ((void)0) -#define KMP_COUNT_BLOCK(n) ((void)0) -#define KMP_START_EXPLICIT_TIMER(n) ((void)0) -#define KMP_STOP_EXPLICIT_TIMER(n) ((void)0) - -#define KMP_OUTPUT_STATS(heading_string) ((void)0) -#define KMP_RESET_STATS() ((void)0) - -#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) -#define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) -#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) -#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -#endif // KMP_STATS_ENABLED - -#endif // KMP_STATS_H +#ifndef KMP_STATS_H +#define KMP_STATS_H + +/** @file kmp_stats.h + * Functions for collecting statistics. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + +#include "kmp_config.h" + +#if KMP_STATS_ENABLED +/* + * Statistics accumulator. + * Accumulates number of samples and computes min, max, mean, standard deviation on the fly. + * + * Online variance calculation algorithm from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm + */ + +#include <limits> +#include <math.h> +#include <string> +#include <stdint.h> +#include <new> // placement new +#include "kmp_stats_timing.h" + +/* + * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and + * are intended for the runtime library developer. + */ +// #define KMP_DEVELOPER_STATS 1 + +/*! + * @ingroup STATS_GATHERING + * \brief flags to describe the statistic ( timers or counter ) + * +*/ +class stats_flags_e { + public: + const static int onlyInMaster = 1<<0; //!< statistic is valid only for master + const static int noUnits = 1<<1; //!< statistic doesn't need units printed next to it in output + const static int synthesized = 1<<2; //!< statistic's value is created atexit time in the __kmp_output_stats function + const static int notInMaster = 1<<3; //!< statistic is valid for non-master threads + const static int logEvent = 1<<4; //!< statistic can be logged when KMP_STATS_EVENTS is on (valid only for timers) +}; + +/*! + * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h + * + * @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg) + * @param arg a user defined argument to send to the user defined macro + * + * \details A counter counts the occurrence of some event. + * Each thread accumulates its own count, at the end of execution the counts are aggregated treating each thread + * as a separate measurement. (Unless onlyInMaster is set, in which case there's only a single measurement). + * The min,mean,max are therefore the values for the threads. + * Adding the counter here and then putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you need to do. + * All of the tables and printing is generated from this macro. + * Format is "macro(name, flags, arg)" + * + * @ingroup STATS_GATHERING +*/ +#define KMP_FOREACH_COUNTER(macro, arg) \ + macro (OMP_PARALLEL, stats_flags_e::onlyInMaster, arg) \ + macro (OMP_NESTED_PARALLEL, 0, arg) \ + macro (OMP_FOR_static, 0, arg) \ + macro (OMP_FOR_dynamic, 0, arg) \ + macro (OMP_DISTRIBUTE, 0, arg) \ + macro (OMP_BARRIER, 0, arg) \ + macro (OMP_CRITICAL,0, arg) \ + macro (OMP_SINGLE, 0, arg) \ + macro (OMP_MASTER, 0, arg) \ + macro (OMP_TEAMS, 0, arg) \ + macro (OMP_set_lock, 0, arg) \ + macro (OMP_test_lock, 0, arg) \ + macro (REDUCE_wait, 0, arg) \ + macro (REDUCE_nowait, 0, arg) \ + macro (OMP_TASKYIELD, 0, arg) \ + macro (TASK_executed, 0, arg) \ + macro (TASK_cancelled, 0, arg) \ + macro (TASK_stolen, 0, arg) \ + macro (LAST,0,arg) + +// OMP_PARALLEL_args -- the number of arguments passed to a fork +// FOR_static_iterations -- Number of available parallel chunks of work in a static for +// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for +// Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2. + +/*! + * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h + * + * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg) + * @param arg a user defined argument to send to the user defined macro + * + * \details A timer collects multiple samples of some count in each thread and then finally aggregates over all the threads. + * The count is normally a time (in ticks), hence the name "timer". (But can be any value, so we use this for "number of arguments passed to fork" + * as well). + * For timers the threads are not significant, it's the individual observations that count, so the statistics are at that level. + * Format is "macro(name, flags, arg)" + * + * @ingroup STATS_GATHERING2 + */ +#define KMP_FOREACH_TIMER(macro, arg) \ + macro (OMP_start_end, stats_flags_e::onlyInMaster, arg) \ + macro (OMP_serial, stats_flags_e::onlyInMaster, arg) \ + macro (OMP_work, 0, arg) \ + macro (Total_work, stats_flags_e::synthesized, arg) \ + macro (OMP_barrier, 0, arg) \ + macro (Total_barrier, stats_flags_e::synthesized, arg) \ + macro (FOR_static_iterations, stats_flags_e::noUnits, arg) \ + macro (FOR_static_scheduling, 0, arg) \ + macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg) \ + macro (FOR_dynamic_scheduling, 0, arg) \ + macro (TASK_execution, 0, arg) \ + macro (OMP_set_numthreads, stats_flags_e::noUnits, arg) \ + macro (OMP_PARALLEL_args, stats_flags_e::noUnits, arg) \ + macro (OMP_single, 0, arg) \ + macro (OMP_master, 0, arg) \ + KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ + macro (LAST,0, arg) + + +// OMP_start_end -- time from when OpenMP is initialized until the stats are printed at exit +// OMP_serial -- thread zero time executing serial code +// OMP_work -- elapsed time in code dispatched by a fork (measured in the thread) +// Total_work -- a synthesized statistic summarizing how much parallel work each thread executed. +// OMP_barrier -- time at "real" barriers +// Total_barrier -- a synthesized statistic summarizing how much time at real barriers in each thread +// FOR_static_scheduling -- time spent doing scheduling for a static "for" +// FOR_dynamic_scheduling -- time spent doing scheduling for a dynamic "for" + +#if (KMP_DEVELOPER_STATS) +// Timers which are of interest tio runtime library developers, not end users. +// THese have to be explicitly enabled in addition to the other stats. + +// KMP_fork_barrier -- time in __kmp_fork_barrier +// KMP_join_barrier -- time in __kmp_join_barrier +// KMP_barrier -- time in __kmp_barrier +// KMP_end_split_barrier -- time in __kmp_end_split_barrier +// KMP_setup_icv_copy -- time in __kmp_setup_icv_copy +// KMP_icv_copy -- start/stop timer for any ICV copying +// KMP_linear_gather -- time in __kmp_linear_barrier_gather +// KMP_linear_release -- time in __kmp_linear_barrier_release +// KMP_tree_gather -- time in __kmp_tree_barrier_gather +// KMP_tree_release -- time in __kmp_tree_barrier_release +// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather +// KMP_hyper_release -- time in __kmp_hyper_barrier_release +# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ + macro (KMP_fork_call, 0, arg) \ + macro (KMP_join_call, 0, arg) \ + macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \ + macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \ + macro (KMP_barrier, 0, arg) \ + macro (KMP_end_split_barrier, 0, arg) \ + macro (KMP_hier_gather, 0, arg) \ + macro (KMP_hier_release, 0, arg) \ + macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \ + macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \ + macro (KMP_linear_gather, 0, arg) \ + macro (KMP_linear_release, 0, arg) \ + macro (KMP_tree_gather, 0, arg) \ + macro (KMP_tree_release, 0, arg) \ + macro (USER_master_invoke, stats_flags_e::logEvent, arg) \ + macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \ + macro (USER_resume, stats_flags_e::logEvent, arg) \ + macro (USER_suspend, stats_flags_e::logEvent, arg) \ + macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \ + macro (KMP_allocate_team, 0, arg) \ + macro (KMP_setup_icv_copy, 0, arg) \ + macro (USER_icv_copy, 0, arg) +#else +# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) +#endif + +/*! + * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro. + * + * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg) + * @param arg a user defined argument to send to the user defined macro + * + * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE BAD THINGS WILL HAPPEN! + * + * \details Explicit timers are ones where we need to allocate a timer itself (as well as the accumulated timing statistics). + * We allocate these on a per-thread basis, and explicitly start and stop them. + * Block timers just allocate the timer itself on the stack, and use the destructor to notice block exit; they don't + * need to be defined here. + * The name here should be the same as that of a timer above. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ + macro(OMP_serial, 0, arg) \ + macro(OMP_start_end, 0, arg) \ + macro(OMP_single, 0, arg) \ + macro(OMP_master, 0, arg) \ + KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \ + macro(LAST, 0, arg) + +#if (KMP_DEVELOPER_STATS) +# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) \ + macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg) +#else +# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) +#endif + +#define ENUMERATE(name,ignore,prefix) prefix##name, +enum timer_e { + KMP_FOREACH_TIMER(ENUMERATE, TIMER_) +}; + +enum explicit_timer_e { + KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) +}; + +enum counter_e { + KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) +}; +#undef ENUMERATE + +class statistic +{ + double minVal; + double maxVal; + double meanVal; + double m2; + uint64_t sampleCount; + + public: + statistic() { reset(); } + statistic (statistic const &o): minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), sampleCount(o.sampleCount) {} + + double getMin() const { return minVal; } + double getMean() const { return meanVal; } + double getMax() const { return maxVal; } + uint64_t getCount() const { return sampleCount; } + double getSD() const { return sqrt(m2/sampleCount); } + double getTotal() const { return sampleCount*meanVal; } + + void reset() + { + minVal = std::numeric_limits<double>::max(); + maxVal = -std::numeric_limits<double>::max(); + meanVal= 0.0; + m2 = 0.0; + sampleCount = 0; + } + void addSample(double sample); + void scale (double factor); + void scaleDown(double f) { scale (1./f); } + statistic & operator+= (statistic const & other); + + std::string format(char unit, bool total=false) const; +}; + +struct statInfo +{ + const char * name; + uint32_t flags; +}; + +class timeStat : public statistic +{ + static statInfo timerInfo[]; + + public: + timeStat() : statistic() {} + static const char * name(timer_e e) { return timerInfo[e].name; } + static bool masterOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::onlyInMaster; } + static bool workerOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::notInMaster; } + static bool noUnits (timer_e e) { return timerInfo[e].flags & stats_flags_e::noUnits; } + static bool synthesized(timer_e e) { return timerInfo[e].flags & stats_flags_e::synthesized; } + static bool logEvent (timer_e e) { return timerInfo[e].flags & stats_flags_e::logEvent; } + static void clearEventFlags() { + int i; + for(i=0;i<TIMER_LAST;i++) { + timerInfo[i].flags &= (~(stats_flags_e::logEvent)); + } + } +}; + +// Where we need explicitly to start and end the timer, this version can be used +// Since these timers normally aren't nicely scoped, so don't have a good place to live +// on the stack of the thread, they're more work to use. +class explicitTimer +{ + timeStat * stat; + tsc_tick_count startTime; + + public: + explicitTimer () : stat(0), startTime(0) { } + explicitTimer (timeStat * s) : stat(s), startTime() { } + + void setStat (timeStat *s) { stat = s; } + void start(timer_e timerEnumValue); + void stop(timer_e timerEnumValue); + void reset() { startTime = 0; } +}; + +// Where all you need is to time a block, this is enough. +// (It avoids the need to have an explicit end, leaving the scope suffices.) +class blockTimer : public explicitTimer +{ + timer_e timerEnumValue; + public: + blockTimer (timeStat * s, timer_e newTimerEnumValue) : timerEnumValue(newTimerEnumValue), explicitTimer(s) { start(timerEnumValue); } + ~blockTimer() { stop(timerEnumValue); } +}; + +// If all you want is a count, then you can use this... +// The individual per-thread counts will be aggregated into a statistic at program exit. +class counter +{ + uint64_t value; + static const statInfo counterInfo[]; + + public: + counter() : value(0) {} + void increment() { value++; } + uint64_t getValue() const { return value; } + void reset() { value = 0; } + static const char * name(counter_e e) { return counterInfo[e].name; } + static bool masterOnly (counter_e e) { return counterInfo[e].flags & stats_flags_e::onlyInMaster; } +}; + +/* **************************************************************** + Class to implement an event + + There are four components to an event: start time, stop time + nest_level, and timer_name. + The start and stop time should be obvious (recorded in clock ticks). + The nest_level relates to the bar width in the timeline graph. + The timer_name is used to determine which timer event triggered this event. + + the interface to this class is through four read-only operations: + 1) getStart() -- returns the start time as 64 bit integer + 2) getStop() -- returns the stop time as 64 bit integer + 3) getNestLevel() -- returns the nest level of the event + 4) getTimerName() -- returns the timer name that triggered event + + *MORE ON NEST_LEVEL* + The nest level is used in the bar graph that represents the timeline. + Its main purpose is for showing how events are nested inside eachother. + For example, say events, A, B, and C are recorded. If the timeline + looks like this: + +Begin -------------------------------------------------------------> Time + | | | | | | + A B C C B A + start start start end end end + + Then A, B, C will have a nest level of 1, 2, 3 respectively. + These values are then used to calculate the barwidth so you can + see that inside A, B has occurred, and inside B, C has occurred. + Currently, this is shown with A's bar width being larger than B's + bar width, and B's bar width being larger than C's bar width. + +**************************************************************** */ +class kmp_stats_event { + uint64_t start; + uint64_t stop; + int nest_level; + timer_e timer_name; + public: + kmp_stats_event() : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {} + kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) : start(strt), stop(stp), nest_level(nst), timer_name(nme) {} + inline uint64_t getStart() const { return start; } + inline uint64_t getStop() const { return stop; } + inline int getNestLevel() const { return nest_level; } + inline timer_e getTimerName() const { return timer_name; } +}; + +/* **************************************************************** + Class to implement a dynamically expandable array of events + + --------------------------------------------------------- + | event 1 | event 2 | event 3 | event 4 | ... | event N | + --------------------------------------------------------- + + An event is pushed onto the back of this array at every + explicitTimer->stop() call. The event records the thread #, + start time, stop time, and nest level related to the bar width. + + The event vector starts at size INIT_SIZE and grows (doubles in size) + if needed. An implication of this behavior is that log(N) + reallocations are needed (where N is number of events). If you want + to avoid reallocations, then set INIT_SIZE to a large value. + + the interface to this class is through six operations: + 1) reset() -- sets the internal_size back to 0 but does not deallocate any memory + 2) size() -- returns the number of valid elements in the vector + 3) push_back(start, stop, nest, timer_name) -- pushes an event onto + the back of the array + 4) deallocate() -- frees all memory associated with the vector + 5) sort() -- sorts the vector by start time + 6) operator[index] or at(index) -- returns event reference at that index + +**************************************************************** */ +class kmp_stats_event_vector { + kmp_stats_event* events; + int internal_size; + int allocated_size; + static const int INIT_SIZE = 1024; + public: + kmp_stats_event_vector() { + events = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*INIT_SIZE); + internal_size = 0; + allocated_size = INIT_SIZE; + } + ~kmp_stats_event_vector() {} + inline void reset() { internal_size = 0; } + inline int size() const { return internal_size; } + void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { + int i; + if(internal_size == allocated_size) { + kmp_stats_event* tmp = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*allocated_size*2); + for(i=0;i<internal_size;i++) tmp[i] = events[i]; + __kmp_free(events); + events = tmp; + allocated_size*=2; + } + events[internal_size] = kmp_stats_event(start_time, stop_time, nest_level, name); + internal_size++; + return; + } + void deallocate(); + void sort(); + const kmp_stats_event & operator[](int index) const { return events[index]; } + kmp_stats_event & operator[](int index) { return events[index]; } + const kmp_stats_event & at(int index) const { return events[index]; } + kmp_stats_event & at(int index) { return events[index]; } +}; + +/* **************************************************************** + Class to implement a doubly-linked, circular, statistics list + + |---| ---> |---| ---> |---| ---> |---| ---> ... next + | | | | | | | | + |---| <--- |---| <--- |---| <--- |---| <--- ... prev + Sentinel first second third + Node node node node + + The Sentinel Node is the user handle on the list. + The first node corresponds to thread 0's statistics. + The second node corresponds to thread 1's statistics and so on... + + Each node has a _timers, _counters, and _explicitTimers array to + hold that thread's statistics. The _explicitTimers + point to the correct _timer and update its statistics at every stop() call. + The explicitTimers' pointers are set up in the constructor. + Each node also has an event vector to hold that thread's timing events. + The event vector expands as necessary and records the start-stop times + for each timer. + + The nestLevel variable is for plotting events and is related + to the bar width in the timeline graph. + + Every thread will have a __thread local pointer to its node in + the list. The sentinel node is used by the master thread to + store "dummy" statistics before __kmp_create_worker() is called. + +**************************************************************** */ +class kmp_stats_list { + int gtid; + timeStat _timers[TIMER_LAST+1]; + counter _counters[COUNTER_LAST+1]; + explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1]; + int _nestLevel; // one per thread + kmp_stats_event_vector _event_vector; + kmp_stats_list* next; + kmp_stats_list* prev; + public: + kmp_stats_list() : next(this) , prev(this) , _event_vector(), _nestLevel(0) { +#define doInit(name,ignore1,ignore2) \ + getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); + KMP_FOREACH_EXPLICIT_TIMER(doInit,0); +#undef doInit + } + ~kmp_stats_list() { } + inline timeStat * getTimer(timer_e idx) { return &_timers[idx]; } + inline counter * getCounter(counter_e idx) { return &_counters[idx]; } + inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; } + inline timeStat * getTimers() { return _timers; } + inline counter * getCounters() { return _counters; } + inline explicitTimer * getExplicitTimers() { return _explicitTimers; } + inline kmp_stats_event_vector & getEventVector() { return _event_vector; } + inline void resetEventVector() { _event_vector.reset(); } + inline void incrementNestValue() { _nestLevel++; } + inline int getNestValue() { return _nestLevel; } + inline void decrementNestValue() { _nestLevel--; } + inline int getGtid() const { return gtid; } + inline void setGtid(int newgtid) { gtid = newgtid; } + kmp_stats_list* push_back(int gtid); // returns newly created list node + inline void push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { + _event_vector.push_back(start_time, stop_time, nest_level, name); + } + void deallocate(); + class iterator; + kmp_stats_list::iterator begin(); + kmp_stats_list::iterator end(); + int size(); + class iterator { + kmp_stats_list* ptr; + friend kmp_stats_list::iterator kmp_stats_list::begin(); + friend kmp_stats_list::iterator kmp_stats_list::end(); + public: + iterator(); + ~iterator(); + iterator operator++(); + iterator operator++(int dummy); + iterator operator--(); + iterator operator--(int dummy); + bool operator!=(const iterator & rhs); + bool operator==(const iterator & rhs); + kmp_stats_list* operator*() const; // dereference operator + }; +}; + +/* **************************************************************** + Class to encapsulate all output functions and the environment variables + + This module holds filenames for various outputs (normal stats, events, plot file), + as well as coloring information for the plot file. + + The filenames and flags variables are read from environment variables. + These are read once by the constructor of the global variable __kmp_stats_output + which calls init(). + + During this init() call, event flags for the timeStat::timerInfo[] global array + are cleared if KMP_STATS_EVENTS is not true (on, 1, yes). + + The only interface function that is public is outputStats(heading). This function + should print out everything it needs to, either to files or stderr, + depending on the environment variables described below + + ENVIRONMENT VARIABLES: + KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this file, + otherwise, print to stderr + KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to either + KMP_STATS_FILE or stderr + KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename, + otherwise, the plot file is sent to "events.plt" + KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log events + KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file, + otherwise, output is sent to "events.dat" + +**************************************************************** */ +class kmp_stats_output_module { + + public: + struct rgb_color { + float r; + float g; + float b; + }; + + private: + static const char* outputFileName; + static const char* eventsFileName; + static const char* plotFileName; + static int printPerThreadFlag; + static int printPerThreadEventsFlag; + static const rgb_color globalColorArray[]; + static rgb_color timerColorInfo[]; + + void init(); + static void setupEventColors(); + static void printPloticusFile(); + static void printStats(FILE *statsOut, statistic const * theStats, bool areTimers); + static void printCounters(FILE * statsOut, counter const * theCounters); + static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents, int gtid); + static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; } + static void windupExplicitTimers(); + bool eventPrintingEnabled() { + if(printPerThreadEventsFlag) return true; + else return false; + } + bool perThreadPrintingEnabled() { + if(printPerThreadFlag) return true; + else return false; + } + + public: + kmp_stats_output_module() { init(); } + void outputStats(const char* heading); +}; + +#ifdef __cplusplus +extern "C" { +#endif +void __kmp_stats_init(); +void __kmp_reset_stats(); +void __kmp_output_stats(const char *); +void __kmp_accumulate_stats_at_exit(void); +// thread local pointer to stats node within list +extern __thread kmp_stats_list* __kmp_stats_thread_ptr; +// head to stats list. +extern kmp_stats_list __kmp_stats_list; +// lock for __kmp_stats_list +extern kmp_tas_lock_t __kmp_stats_lock; +// reference start time +extern tsc_tick_count __kmp_stats_start_time; +// interface to output +extern kmp_stats_output_module __kmp_stats_output; + +#ifdef __cplusplus +} +#endif + +// Simple, standard interfaces that drop out completely if stats aren't enabled + + +/*! + * \brief Uses specified timer (name) to time code block. + * + * @param name timer name as specified under the KMP_FOREACH_TIMER() macro + * + * \details Use KMP_TIME_BLOCK(name) macro to time a code block. This will record the time taken in the block + * and use the destructor to stop the timer. Convenient! + * With this definition you can't have more than one KMP_TIME_BLOCK in the same code block. + * I don't think that's a problem. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_TIME_BLOCK(name) \ + blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name) + +/*! + * \brief Adds value to specified timer (name). + * + * @param name timer name as specified under the KMP_FOREACH_TIMER() macro + * @param value double precision sample value to add to statistics for the timer + * + * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to a timer statistics. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_COUNT_VALUE(name, value) \ + __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) + +/*! + * \brief Increments specified counter (name). + * + * @param name counter name as specified under the KMP_FOREACH_COUNTER() macro + * + * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics counter for the executing thread. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_COUNT_BLOCK(name) \ + __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() + +/*! + * \brief "Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro. + * + * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro + * + * \details Use to start a timer. This will need a corresponding KMP_STOP_EXPLICIT_TIMER() + * macro to stop the timer unlike the KMP_TIME_BLOCK(name) macro which has an implicit stopping macro at the end + * of the code block. All explicit timers are stopped at library exit time before the final statistics are outputted. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_START_EXPLICIT_TIMER(name) \ + __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->start(TIMER_##name) + +/*! + * \brief "Stops" an explicit timer. + * + * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro + * + * \details Use KMP_STOP_EXPLICIT_TIMER(name) to stop a timer. When this is done, the time between the last KMP_START_EXPLICIT_TIMER(name) + * and this KMP_STOP_EXPLICIT_TIMER(name) will be added to the timer's stat value. The timer will then be reset. + * After the KMP_STOP_EXPLICIT_TIMER(name) macro is called, another call to KMP_START_EXPLICIT_TIMER(name) will start the timer once again. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_STOP_EXPLICIT_TIMER(name) \ + __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->stop(TIMER_##name) + +/*! + * \brief Outputs the current thread statistics and reset them. + * + * @param heading_string heading put above the final stats output + * + * \details Explicitly stops all timers and outputs all stats. + * Environment variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a filename instead of stderr + * Environment variable, `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific stats + * For now the `OMPTB_STATSTHREADS` environment variable can either be defined with any value, which will print out thread + * specific stats, or it can be undefined (not specified in the environment) and thread specific stats won't be printed + * It should be noted that all statistics are reset when this macro is called. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_OUTPUT_STATS(heading_string) \ + __kmp_output_stats(heading_string) + +/*! + * \brief resets all stats (counters to 0, timers to 0 elapsed ticks) + * + * \details Reset all stats for all threads. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_RESET_STATS() __kmp_reset_stats() + +#if (KMP_DEVELOPER_STATS) +# define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n) +# define KMP_COUNT_DEVELOPER_VALUE(n,v) KMP_COUNT_VALUE(n,v) +# define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) +# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) +# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) +#else +// Null definitions +# define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) +# define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) +# define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) +# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +#endif + +#else // KMP_STATS_ENABLED + +// Null definitions +#define KMP_TIME_BLOCK(n) ((void)0) +#define KMP_COUNT_VALUE(n,v) ((void)0) +#define KMP_COUNT_BLOCK(n) ((void)0) +#define KMP_START_EXPLICIT_TIMER(n) ((void)0) +#define KMP_STOP_EXPLICIT_TIMER(n) ((void)0) + +#define KMP_OUTPUT_STATS(heading_string) ((void)0) +#define KMP_RESET_STATS() ((void)0) + +#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) +#define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) +#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) +#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +#endif // KMP_STATS_ENABLED + +#endif // KMP_STATS_H |