diff options
author | vvvv <vvvv@ydb.tech> | 2024-02-06 20:01:22 +0300 |
---|---|---|
committer | Alexander Smirnov <alex@ydb.tech> | 2024-02-09 19:18:27 +0300 |
commit | ee2b7fbda052aa09b6fdb83b8c6f0305fef3e193 (patch) | |
tree | 102765416c3866bde98a82facc7752d329ee0226 /contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.cpp | |
parent | 7494ca32d3a5aca00b7ac527b5f127989335102c (diff) | |
download | ydb-ee2b7fbda052aa09b6fdb83b8c6f0305fef3e193.tar.gz |
llvm16 targets
Diffstat (limited to 'contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.cpp')
-rw-r--r-- | contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.cpp | 328 |
1 files changed, 328 insertions, 0 deletions
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.cpp new file mode 100644 index 0000000000..5c05edbdea --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.cpp @@ -0,0 +1,328 @@ +//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \brief +/// +/// This file implements the TimelineView interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/TimelineView.h" +#include <numeric> + +namespace llvm { +namespace mca { + +TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer, + llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations, + unsigned Cycles) + : InstructionView(sti, Printer, S), CurrentCycle(0), + MaxCycle(Cycles == 0 ? std::numeric_limits<unsigned>::max() : Cycles), + LastCycle(0), WaitTime(S.size()), UsedBuffer(S.size()) { + unsigned NumInstructions = getSource().size(); + assert(Iterations && "Invalid number of iterations specified!"); + NumInstructions *= Iterations; + Timeline.resize(NumInstructions); + TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0, 0}; + std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry); + + WaitTimeEntry NullWTEntry = {0, 0, 0}; + std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry); + + std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0, + /* unknown buffer size */ -1}; + std::fill(UsedBuffer.begin(), UsedBuffer.end(), NullUsedBufferEntry); +} + +void TimelineView::onReservedBuffers(const InstRef &IR, + ArrayRef<unsigned> Buffers) { + if (IR.getSourceIndex() >= getSource().size()) + return; + + const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); + std::pair<unsigned, int> BufferInfo = {0, -1}; + for (const unsigned Buffer : Buffers) { + const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer); + if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) { + BufferInfo.first = Buffer; + BufferInfo.second = MCDesc.BufferSize; + } + } + + UsedBuffer[IR.getSourceIndex()] = BufferInfo; +} + +void TimelineView::onEvent(const HWInstructionEvent &Event) { + const unsigned Index = Event.IR.getSourceIndex(); + if (Index >= Timeline.size()) + return; + + switch (Event.Type) { + case HWInstructionEvent::Retired: { + TimelineViewEntry &TVEntry = Timeline[Index]; + if (CurrentCycle < MaxCycle) + TVEntry.CycleRetired = CurrentCycle; + + // Update the WaitTime entry which corresponds to this Index. + assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!"); + unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched); + WaitTimeEntry &WTEntry = WaitTime[Index % getSource().size()]; + WTEntry.CyclesSpentInSchedulerQueue += + TVEntry.CycleIssued - CycleDispatched; + assert(CycleDispatched <= TVEntry.CycleReady && + "Instruction cannot be ready if it hasn't been dispatched yet!"); + WTEntry.CyclesSpentInSQWhileReady += + TVEntry.CycleIssued - TVEntry.CycleReady; + if (CurrentCycle > TVEntry.CycleExecuted) { + WTEntry.CyclesSpentAfterWBAndBeforeRetire += + (CurrentCycle - 1) - TVEntry.CycleExecuted; + } + break; + } + case HWInstructionEvent::Ready: + Timeline[Index].CycleReady = CurrentCycle; + break; + case HWInstructionEvent::Issued: + Timeline[Index].CycleIssued = CurrentCycle; + break; + case HWInstructionEvent::Executed: + Timeline[Index].CycleExecuted = CurrentCycle; + break; + case HWInstructionEvent::Dispatched: + // There may be multiple dispatch events. Microcoded instructions that are + // expanded into multiple uOps may require multiple dispatch cycles. Here, + // we want to capture the first dispatch cycle. + if (Timeline[Index].CycleDispatched == -1) + Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle); + break; + default: + return; + } + if (CurrentCycle < MaxCycle) + LastCycle = std::max(LastCycle, CurrentCycle); +} + +static raw_ostream::Colors chooseColor(unsigned CumulativeCycles, + unsigned Executions, int BufferSize) { + if (CumulativeCycles && BufferSize < 0) + return raw_ostream::MAGENTA; + unsigned Size = static_cast<unsigned>(BufferSize); + if (CumulativeCycles >= Size * Executions) + return raw_ostream::RED; + if ((CumulativeCycles * 2) >= Size * Executions) + return raw_ostream::YELLOW; + return raw_ostream::SAVEDCOLOR; +} + +static void tryChangeColor(raw_ostream &OS, unsigned Cycles, + unsigned Executions, int BufferSize) { + if (!OS.has_colors()) + return; + + raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize); + if (Color == raw_ostream::SAVEDCOLOR) { + OS.resetColor(); + return; + } + OS.changeColor(Color, /* bold */ true, /* BG */ false); +} + +void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, + const WaitTimeEntry &Entry, + unsigned SourceIndex, + unsigned Executions) const { + bool PrintingTotals = SourceIndex == getSource().size(); + unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions; + + if (!PrintingTotals) + OS << SourceIndex << '.'; + + OS.PadToColumn(7); + + double AverageTime1, AverageTime2, AverageTime3; + AverageTime1 = + (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions; + AverageTime2 = + (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions; + AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) / + CumulativeExecutions; + + OS << Executions; + OS.PadToColumn(13); + + int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second; + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions, + BufferSize); + OS << format("%.1f", floor(AverageTime1 + 0.5) / 10); + OS.PadToColumn(20); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions, + BufferSize); + OS << format("%.1f", floor(AverageTime2 + 0.5) / 10); + OS.PadToColumn(27); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, + CumulativeExecutions, + getSubTargetInfo().getSchedModel().MicroOpBufferSize); + OS << format("%.1f", floor(AverageTime3 + 0.5) / 10); + + if (OS.has_colors()) + OS.resetColor(); + OS.PadToColumn(34); +} + +void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { + std::string Header = + "\n\nAverage Wait times (based on the timeline view):\n" + "[0]: Executions\n" + "[1]: Average time spent waiting in a scheduler's queue\n" + "[2]: Average time spent waiting in a scheduler's queue while ready\n" + "[3]: Average time elapsed from WB until retire stage\n\n" + " [0] [1] [2] [3]\n"; + OS << Header; + formatted_raw_ostream FOS(OS); + unsigned Executions = Timeline.size() / getSource().size(); + unsigned IID = 0; + for (const MCInst &Inst : getSource()) { + printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions); + FOS << " " << printInstructionString(Inst) << '\n'; + FOS.flush(); + ++IID; + } + + // If the timeline contains more than one instruction, + // let's also print global averages. + if (getSource().size() != 1) { + WaitTimeEntry TotalWaitTime = std::accumulate( + WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0}, + [](const WaitTimeEntry &A, const WaitTimeEntry &B) { + return WaitTimeEntry{ + A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue, + A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady, + A.CyclesSpentAfterWBAndBeforeRetire + + B.CyclesSpentAfterWBAndBeforeRetire}; + }); + printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions); + FOS << " " + << "<total>" << '\n'; + FOS.flush(); + } +} + +void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS, + const TimelineViewEntry &Entry, + unsigned Iteration, + unsigned SourceIndex) const { + if (Iteration == 0 && SourceIndex == 0) + OS << '\n'; + OS << '[' << Iteration << ',' << SourceIndex << ']'; + OS.PadToColumn(10); + assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!"); + unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched); + for (unsigned I = 0, E = CycleDispatched; I < E; ++I) + OS << ((I % 5 == 0) ? '.' : ' '); + OS << TimelineView::DisplayChar::Dispatched; + if (CycleDispatched != Entry.CycleExecuted) { + // Zero latency instructions have the same value for CycleDispatched, + // CycleIssued and CycleExecuted. + for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I) + OS << TimelineView::DisplayChar::Waiting; + if (Entry.CycleIssued == Entry.CycleExecuted) + OS << TimelineView::DisplayChar::DisplayChar::Executed; + else { + if (CycleDispatched != Entry.CycleIssued) + OS << TimelineView::DisplayChar::Executing; + for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E; + ++I) + OS << TimelineView::DisplayChar::Executing; + OS << TimelineView::DisplayChar::Executed; + } + } + + for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I) + OS << TimelineView::DisplayChar::RetireLag; + if (Entry.CycleExecuted < Entry.CycleRetired) + OS << TimelineView::DisplayChar::Retired; + + // Skip other columns. + for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I) + OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' '); +} + +static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) { + OS << "\n\nTimeline view:\n"; + if (Cycles >= 10) { + OS.PadToColumn(10); + for (unsigned I = 0; I <= Cycles; ++I) { + if (((I / 10) & 1) == 0) + OS << ' '; + else + OS << I % 10; + } + OS << '\n'; + } + + OS << "Index"; + OS.PadToColumn(10); + for (unsigned I = 0; I <= Cycles; ++I) { + if (((I / 10) & 1) == 0) + OS << I % 10; + else + OS << ' '; + } + OS << '\n'; +} + +void TimelineView::printTimeline(raw_ostream &OS) const { + formatted_raw_ostream FOS(OS); + printTimelineHeader(FOS, LastCycle); + FOS.flush(); + + unsigned IID = 0; + ArrayRef<llvm::MCInst> Source = getSource(); + const unsigned Iterations = Timeline.size() / Source.size(); + for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) { + for (const MCInst &Inst : Source) { + const TimelineViewEntry &Entry = Timeline[IID]; + // When an instruction is retired after timeline-max-cycles, + // its CycleRetired is left at 0. However, it's possible for + // a 0 latency instruction to be retired during cycle 0 and we + // don't want to early exit in that case. The CycleExecuted + // attribute is set correctly whether or not it is greater + // than timeline-max-cycles so we can use that to ensure + // we don't early exit because of a 0 latency instruction. + if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0) { + FOS << "Truncated display due to cycle limit\n"; + return; + } + + unsigned SourceIndex = IID % Source.size(); + printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex); + FOS << " " << printInstructionString(Inst) << '\n'; + FOS.flush(); + + ++IID; + } + } +} + +json::Value TimelineView::toJSON() const { + json::Array TimelineInfo; + + for (const TimelineViewEntry &TLE : Timeline) { + TimelineInfo.push_back( + json::Object({{"CycleDispatched", TLE.CycleDispatched}, + {"CycleReady", TLE.CycleReady}, + {"CycleIssued", TLE.CycleIssued}, + {"CycleExecuted", TLE.CycleExecuted}, + {"CycleRetired", TLE.CycleRetired}})); + } + return json::Object({{"TimelineInfo", std::move(TimelineInfo)}}); +} +} // namespace mca +} // namespace llvm |