1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
|
//===--------------------- TimelineView.h -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \brief
///
/// This file implements a timeline view for the llvm-mca tool.
///
/// Class TimelineView observes events generated by the pipeline. For every
/// instruction executed by the pipeline, it stores information related to
/// state transition. It then plots that information in the form of a table
/// as reported by the example below:
///
/// Timeline view:
/// 0123456
/// Index 0123456789
///
/// [0,0] DeER . . .. vmovshdup %xmm0, %xmm1
/// [0,1] DeER . . .. vpermilpd $1, %xmm0, %xmm2
/// [0,2] .DeER. . .. vpermilps $231, %xmm0, %xmm5
/// [0,3] .DeeeER . .. vaddss %xmm1, %xmm0, %xmm3
/// [0,4] . D==eeeER. .. vaddss %xmm3, %xmm2, %xmm4
/// [0,5] . D=====eeeER .. vaddss %xmm4, %xmm5, %xmm6
///
/// [1,0] . DeE------R .. vmovshdup %xmm0, %xmm1
/// [1,1] . DeE------R .. vpermilpd $1, %xmm0, %xmm2
/// [1,2] . DeE-----R .. vpermilps $231, %xmm0, %xmm5
/// [1,3] . D=eeeE--R .. vaddss %xmm1, %xmm0, %xmm3
/// [1,4] . D===eeeER .. vaddss %xmm3, %xmm2, %xmm4
/// [1,5] . D======eeeER vaddss %xmm4, %xmm5, %xmm6
///
/// There is an entry for every instruction in the input assembly sequence.
/// The first field is a pair of numbers obtained from the instruction index.
/// The first element of the pair is the iteration index, while the second
/// element of the pair is a sequence number (i.e. a position in the assembly
/// sequence).
/// The second field of the table is the actual timeline information; each
/// column is the information related to a specific cycle of execution.
/// The timeline of an instruction is described by a sequence of character
/// where each character represents the instruction state at a specific cycle.
///
/// Possible instruction states are:
/// D: Instruction Dispatched
/// e: Instruction Executing
/// E: Instruction Executed (write-back stage)
/// R: Instruction retired
/// =: Instruction waiting in the Scheduler's queue
/// -: Instruction executed, waiting to retire in order.
///
/// dots ('.') and empty spaces are cycles where the instruction is not
/// in-flight.
///
/// The last column is the assembly instruction associated to the entry.
///
/// Based on the timeline view information from the example, instruction 0
/// at iteration 0 was dispatched at cycle 0, and was retired at cycle 3.
/// Instruction [0,1] was also dispatched at cycle 0, and it retired at
/// the same cycle than instruction [0,0].
/// Instruction [0,4] has been dispatched at cycle 2. However, it had to
/// wait for two cycles before being issued. That is because operands
/// became ready only at cycle 5.
///
/// This view helps further understanding bottlenecks and the impact of
/// resource pressure on the code.
///
/// To better understand why instructions had to wait for multiple cycles in
/// the scheduler's queue, class TimelineView also reports extra timing info
/// in another table named "Average Wait times" (see example below).
///
///
/// Average Wait times (based on the timeline view):
/// [0]: Executions
/// [1]: Average time spent waiting in a scheduler's queue
/// [2]: Average time spent waiting in a scheduler's queue while ready
/// [3]: Average time elapsed from WB until retire stage
///
/// [0] [1] [2] [3]
/// 0. 2 1.0 1.0 3.0 vmovshdup %xmm0, %xmm1
/// 1. 2 1.0 1.0 3.0 vpermilpd $1, %xmm0, %xmm2
/// 2. 2 1.0 1.0 2.5 vpermilps $231, %xmm0, %xmm5
/// 3. 2 1.5 0.5 1.0 vaddss %xmm1, %xmm0, %xmm3
/// 4. 2 3.5 0.0 0.0 vaddss %xmm3, %xmm2, %xmm4
/// 5. 2 6.5 0.0 0.0 vaddss %xmm4, %xmm5, %xmm6
/// 2 2.4 0.6 1.6 <total>
///
/// By comparing column [2] with column [1], we get an idea about how many
/// cycles were spent in the scheduler's queue due to data dependencies.
///
/// In this example, instruction 5 spent an average of ~6 cycles in the
/// scheduler's queue. As soon as operands became ready, the instruction
/// was immediately issued to the pipeline(s).
/// That is expected because instruction 5 cannot transition to the "ready"
/// state until %xmm4 is written by instruction 4.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
#define LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
#include "Views/InstructionView.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
namespace mca {
/// This class listens to instruction state transition events
/// in order to construct a timeline information.
///
/// For every instruction executed by the Pipeline, this class constructs
/// a TimelineViewEntry object. TimelineViewEntry objects are then used
/// to print the timeline information, as well as the "average wait times"
/// for every instruction in the input assembly sequence.
class TimelineView : public InstructionView {
unsigned CurrentCycle;
unsigned MaxCycle;
unsigned LastCycle;
struct TimelineViewEntry {
int CycleDispatched; // A negative value is an "invalid cycle".
unsigned CycleReady;
unsigned CycleIssued;
unsigned CycleExecuted;
unsigned CycleRetired;
};
std::vector<TimelineViewEntry> Timeline;
struct WaitTimeEntry {
unsigned CyclesSpentInSchedulerQueue;
unsigned CyclesSpentInSQWhileReady;
unsigned CyclesSpentAfterWBAndBeforeRetire;
};
std::vector<WaitTimeEntry> WaitTime;
// This field is used to map instructions to buffered resources.
// Elements of this vector are <resourceID, BufferSizer> pairs.
std::vector<std::pair<unsigned, int>> UsedBuffer;
void printTimelineViewEntry(llvm::formatted_raw_ostream &OS,
const TimelineViewEntry &E, unsigned Iteration,
unsigned SourceIndex) const;
void printWaitTimeEntry(llvm::formatted_raw_ostream &OS,
const WaitTimeEntry &E, unsigned Index,
unsigned Executions) const;
// Display characters for the TimelineView report output.
struct DisplayChar {
static const char Dispatched = 'D';
static const char Executed = 'E';
static const char Retired = 'R';
static const char Waiting = '='; // Instruction is waiting in the scheduler.
static const char Executing = 'e';
static const char RetireLag = '-'; // The instruction is waiting to retire.
};
public:
TimelineView(const llvm::MCSubtargetInfo &sti, llvm::MCInstPrinter &Printer,
llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,
unsigned Cycles);
// Event handlers.
void onCycleEnd() override { ++CurrentCycle; }
void onEvent(const HWInstructionEvent &Event) override;
void onReservedBuffers(const InstRef &IR,
llvm::ArrayRef<unsigned> Buffers) override;
// print functionalities.
void printTimeline(llvm::raw_ostream &OS) const;
void printAverageWaitTimes(llvm::raw_ostream &OS) const;
void printView(llvm::raw_ostream &OS) const override {
printTimeline(OS);
printAverageWaitTimes(OS);
}
StringRef getNameAsString() const override { return "TimelineView"; }
json::Value toJSON() const override;
};
} // namespace mca
} // namespace llvm
#endif
|