aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm16/tools/llvm-mca
diff options
context:
space:
mode:
authorvvvv <vvvv@ydb.tech>2024-02-06 20:01:22 +0300
committerAlexander Smirnov <alex@ydb.tech>2024-02-09 19:18:27 +0300
commitee2b7fbda052aa09b6fdb83b8c6f0305fef3e193 (patch)
tree102765416c3866bde98a82facc7752d329ee0226 /contrib/libs/llvm16/tools/llvm-mca
parent7494ca32d3a5aca00b7ac527b5f127989335102c (diff)
downloadydb-ee2b7fbda052aa09b6fdb83b8c6f0305fef3e193.tar.gz
llvm16 targets
Diffstat (limited to 'contrib/libs/llvm16/tools/llvm-mca')
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/CodeRegion.cpp174
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/CodeRegion.h195
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.cpp209
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.h205
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.cpp129
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.h69
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.cpp644
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.h348
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.cpp98
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.h87
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.cpp177
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.h93
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.cpp43
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.h60
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.cpp170
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.h84
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.cpp200
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.h103
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp91
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.h64
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.cpp178
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.h97
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.cpp113
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.h90
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.cpp328
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.h188
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/llvm-mca.cpp761
-rw-r--r--contrib/libs/llvm16/tools/llvm-mca/ya.make101
28 files changed, 5099 insertions, 0 deletions
diff --git a/contrib/libs/llvm16/tools/llvm-mca/CodeRegion.cpp b/contrib/libs/llvm16/tools/llvm-mca/CodeRegion.cpp
new file mode 100644
index 0000000000..c91ed759ee
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/CodeRegion.cpp
@@ -0,0 +1,174 @@
+//===-------------------------- CodeRegion.cpp -----------------*- C++ -* -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements methods from the CodeRegions interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "CodeRegion.h"
+
+namespace llvm {
+namespace mca {
+
+bool CodeRegion::isLocInRange(SMLoc Loc) const {
+ if (RangeEnd.isValid() && Loc.getPointer() > RangeEnd.getPointer())
+ return false;
+ if (RangeStart.isValid() && Loc.getPointer() < RangeStart.getPointer())
+ return false;
+ return true;
+}
+
+void CodeRegions::addInstruction(const MCInst &Instruction) {
+ SMLoc Loc = Instruction.getLoc();
+ for (UniqueCodeRegion &Region : Regions)
+ if (Region->isLocInRange(Loc))
+ Region->addInstruction(Instruction);
+}
+
+AnalysisRegions::AnalysisRegions(llvm::SourceMgr &S) : CodeRegions(S) {
+ // Create a default region for the input code sequence.
+ Regions.emplace_back(std::make_unique<CodeRegion>("", SMLoc()));
+}
+
+void AnalysisRegions::beginRegion(StringRef Description, SMLoc Loc) {
+ if (ActiveRegions.empty()) {
+ // Remove the default region if there is at least one user defined region.
+ // By construction, only the default region has an invalid start location.
+ if (Regions.size() == 1 && !Regions[0]->startLoc().isValid() &&
+ !Regions[0]->endLoc().isValid()) {
+ ActiveRegions[Description] = 0;
+ Regions[0] = std::make_unique<CodeRegion>(Description, Loc);
+ return;
+ }
+ } else {
+ auto It = ActiveRegions.find(Description);
+ if (It != ActiveRegions.end()) {
+ const CodeRegion &R = *Regions[It->second];
+ if (Description.empty()) {
+ SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error,
+ "found multiple overlapping anonymous regions");
+ SM.PrintMessage(R.startLoc(), llvm::SourceMgr::DK_Note,
+ "Previous anonymous region was defined here");
+ FoundErrors = true;
+ return;
+ }
+
+ SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error,
+ "overlapping regions cannot have the same name");
+ SM.PrintMessage(R.startLoc(), llvm::SourceMgr::DK_Note,
+ "region " + Description + " was previously defined here");
+ FoundErrors = true;
+ return;
+ }
+ }
+
+ ActiveRegions[Description] = Regions.size();
+ Regions.emplace_back(std::make_unique<CodeRegion>(Description, Loc));
+}
+
+void AnalysisRegions::endRegion(StringRef Description, SMLoc Loc) {
+ if (Description.empty()) {
+ // Special case where there is only one user defined region,
+ // and this LLVM-MCA-END directive doesn't provide a region name.
+ // In this case, we assume that the user simply wanted to just terminate
+ // the only active region.
+ if (ActiveRegions.size() == 1) {
+ auto It = ActiveRegions.begin();
+ Regions[It->second]->setEndLocation(Loc);
+ ActiveRegions.erase(It);
+ return;
+ }
+
+ // Special case where the region end marker applies to the default region.
+ if (ActiveRegions.empty() && Regions.size() == 1 &&
+ !Regions[0]->startLoc().isValid() && !Regions[0]->endLoc().isValid()) {
+ Regions[0]->setEndLocation(Loc);
+ return;
+ }
+ }
+
+ auto It = ActiveRegions.find(Description);
+ if (It != ActiveRegions.end()) {
+ Regions[It->second]->setEndLocation(Loc);
+ ActiveRegions.erase(It);
+ return;
+ }
+
+ FoundErrors = true;
+ SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error,
+ "found an invalid region end directive");
+ if (!Description.empty()) {
+ SM.PrintMessage(Loc, llvm::SourceMgr::DK_Note,
+ "unable to find an active region named " + Description);
+ } else {
+ SM.PrintMessage(Loc, llvm::SourceMgr::DK_Note,
+ "unable to find an active anonymous region");
+ }
+}
+
+InstrumentRegions::InstrumentRegions(llvm::SourceMgr &S) : CodeRegions(S) {}
+
+void InstrumentRegions::beginRegion(StringRef Description, SMLoc Loc,
+ SharedInstrument I) {
+ if (Description.empty()) {
+ SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error,
+ "anonymous instrumentation regions are not permitted");
+ FoundErrors = true;
+ return;
+ }
+
+ auto It = ActiveRegions.find(Description);
+ if (It != ActiveRegions.end()) {
+ const CodeRegion &R = *Regions[It->second];
+ SM.PrintMessage(
+ Loc, llvm::SourceMgr::DK_Error,
+ "overlapping instrumentation regions cannot be of the same kind");
+ SM.PrintMessage(R.startLoc(), llvm::SourceMgr::DK_Note,
+ "instrumentation region " + Description +
+ " was previously defined here");
+ FoundErrors = true;
+ return;
+ }
+
+ ActiveRegions[Description] = Regions.size();
+ Regions.emplace_back(std::make_unique<InstrumentRegion>(Description, Loc, I));
+}
+
+void InstrumentRegions::endRegion(StringRef Description, SMLoc Loc) {
+ auto It = ActiveRegions.find(Description);
+ if (It != ActiveRegions.end()) {
+ Regions[It->second]->setEndLocation(Loc);
+ ActiveRegions.erase(It);
+ return;
+ }
+
+ FoundErrors = true;
+ SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error,
+ "found an invalid instrumentation region end directive");
+ if (!Description.empty()) {
+ SM.PrintMessage(Loc, llvm::SourceMgr::DK_Note,
+ "unable to find an active instrumentation region named " +
+ Description);
+ }
+}
+
+const SmallVector<SharedInstrument>
+InstrumentRegions::getActiveInstruments(SMLoc Loc) const {
+ SmallVector<SharedInstrument> AI;
+ for (auto &R : Regions) {
+ if (R->isLocInRange(Loc)) {
+ InstrumentRegion *IR = static_cast<InstrumentRegion *>(R.get());
+ AI.emplace_back(IR->getInstrument());
+ }
+ }
+ return AI;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/libs/llvm16/tools/llvm-mca/CodeRegion.h b/contrib/libs/llvm16/tools/llvm-mca/CodeRegion.h
new file mode 100644
index 0000000000..b5b2f3a0d1
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/CodeRegion.h
@@ -0,0 +1,195 @@
+//===-------------------------- CodeRegion.h -------------------*- C++ -* -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements class CodeRegion and CodeRegions, InstrumentRegion,
+/// AnalysisRegions, and InstrumentRegions.
+///
+/// A CodeRegion describes a region of assembly code guarded by special LLVM-MCA
+/// comment directives.
+///
+/// # LLVM-MCA-BEGIN foo
+/// ... ## asm
+/// # LLVM-MCA-END
+///
+/// A comment starting with substring LLVM-MCA-BEGIN marks the beginning of a
+/// new region of code.
+/// A comment starting with substring LLVM-MCA-END marks the end of the
+/// last-seen region of code.
+///
+/// Code regions are not allowed to overlap. Each region can have a optional
+/// description; internally, regions are described by a range of source
+/// locations (SMLoc objects).
+///
+/// An instruction (a MCInst) is added to a CodeRegion R only if its
+/// location is in range [R.RangeStart, R.RangeEnd].
+///
+/// A InstrumentRegion describes a region of assembly code guarded by
+/// special LLVM-MCA comment directives.
+///
+/// # LLVM-MCA-<INSTRUMENTATION_TYPE> <data>
+/// ... ## asm
+///
+/// where INSTRUMENTATION_TYPE is a type defined in llvm and expects to use
+/// data.
+///
+/// A comment starting with substring LLVM-MCA-<INSTRUMENTATION_TYPE>
+/// brings data into scope for llvm-mca to use in its analysis for
+/// all following instructions.
+///
+/// If the same INSTRUMENTATION_TYPE is found later in the instruction list,
+/// then the original InstrumentRegion will be automatically ended,
+/// and a new InstrumentRegion will begin.
+///
+/// If there are comments containing the different INSTRUMENTATION_TYPEs,
+/// then both data sets remain available. In contrast with a CodeRegion,
+/// an InstrumentRegion does not need a comment to end the region.
+//
+// An instruction (a MCInst) is added to an InstrumentRegion R only
+// if its location is in range [R.RangeStart, R.RangeEnd].
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_CODEREGION_H
+#define LLVM_TOOLS_LLVM_MCA_CODEREGION_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MCA/CustomBehaviour.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
+#include <vector>
+
+namespace llvm {
+namespace mca {
+
+/// A region of assembly code.
+///
+/// It identifies a sequence of machine instructions.
+class CodeRegion {
+ // An optional descriptor for this region.
+ llvm::StringRef Description;
+ // Instructions that form this region.
+ llvm::SmallVector<llvm::MCInst, 16> Instructions;
+ // Source location range.
+ llvm::SMLoc RangeStart;
+ llvm::SMLoc RangeEnd;
+
+ CodeRegion(const CodeRegion &) = delete;
+ CodeRegion &operator=(const CodeRegion &) = delete;
+
+public:
+ CodeRegion(llvm::StringRef Desc, llvm::SMLoc Start)
+ : Description(Desc), RangeStart(Start) {}
+
+ void addInstruction(const llvm::MCInst &Instruction) {
+ Instructions.emplace_back(Instruction);
+ }
+
+ llvm::SMLoc startLoc() const { return RangeStart; }
+ llvm::SMLoc endLoc() const { return RangeEnd; }
+
+ void setEndLocation(llvm::SMLoc End) { RangeEnd = End; }
+ bool empty() const { return Instructions.empty(); }
+ bool isLocInRange(llvm::SMLoc Loc) const;
+
+ llvm::ArrayRef<llvm::MCInst> getInstructions() const { return Instructions; }
+
+ llvm::StringRef getDescription() const { return Description; }
+};
+
+/// Alias AnalysisRegion with CodeRegion since CodeRegionGenerator
+/// is absract and AnalysisRegionGenerator operates on AnalysisRegions
+using AnalysisRegion = CodeRegion;
+
+/// A CodeRegion that contains instrumentation that can be used
+/// in analysis of the region.
+class InstrumentRegion : public CodeRegion {
+ /// Instrument for this region.
+ SharedInstrument Instrument;
+
+public:
+ InstrumentRegion(llvm::StringRef Desc, llvm::SMLoc Start, SharedInstrument I)
+ : CodeRegion(Desc, Start), Instrument(I) {}
+
+public:
+ SharedInstrument getInstrument() const { return Instrument; }
+};
+
+class CodeRegionParseError final : public Error {};
+
+class CodeRegions {
+ CodeRegions(const CodeRegions &) = delete;
+ CodeRegions &operator=(const CodeRegions &) = delete;
+
+protected:
+ // A source manager. Used by the tool to generate meaningful warnings.
+ llvm::SourceMgr &SM;
+
+ using UniqueCodeRegion = std::unique_ptr<CodeRegion>;
+ std::vector<UniqueCodeRegion> Regions;
+ llvm::StringMap<unsigned> ActiveRegions;
+ bool FoundErrors;
+
+public:
+ CodeRegions(llvm::SourceMgr &S) : SM(S), FoundErrors(false) {}
+
+ typedef std::vector<UniqueCodeRegion>::iterator iterator;
+ typedef std::vector<UniqueCodeRegion>::const_iterator const_iterator;
+
+ iterator begin() { return Regions.begin(); }
+ iterator end() { return Regions.end(); }
+ const_iterator begin() const { return Regions.cbegin(); }
+ const_iterator end() const { return Regions.cend(); }
+
+ void addInstruction(const llvm::MCInst &Instruction);
+ llvm::SourceMgr &getSourceMgr() const { return SM; }
+
+ llvm::ArrayRef<llvm::MCInst> getInstructionSequence(unsigned Idx) const {
+ return Regions[Idx]->getInstructions();
+ }
+
+ bool empty() const {
+ return llvm::all_of(Regions, [](const UniqueCodeRegion &Region) {
+ return Region->empty();
+ });
+ }
+
+ bool isValid() const { return !FoundErrors; }
+
+ bool isRegionActive(llvm::StringRef Description) const {
+ return ActiveRegions.find(Description) != ActiveRegions.end();
+ }
+};
+
+struct AnalysisRegions : public CodeRegions {
+ AnalysisRegions(llvm::SourceMgr &S);
+
+ void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc);
+ void endRegion(llvm::StringRef Description, llvm::SMLoc Loc);
+};
+
+struct InstrumentRegions : public CodeRegions {
+ InstrumentRegions(llvm::SourceMgr &S);
+
+ void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc,
+ SharedInstrument Instrument);
+ void endRegion(llvm::StringRef Description, llvm::SMLoc Loc);
+
+ const SmallVector<SharedInstrument>
+ getActiveInstruments(llvm::SMLoc Loc) const;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.cpp b/contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.cpp
new file mode 100644
index 0000000000..b8e10fa69c
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.cpp
@@ -0,0 +1,209 @@
+//===----------------------- CodeRegionGenerator.cpp ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines classes responsible for generating llvm-mca
+/// CodeRegions from various types of input. llvm-mca only analyzes CodeRegions,
+/// so the classes here provide the input-to-CodeRegions translation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeRegionGenerator.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/SMLoc.h"
+#include <memory>
+
+namespace llvm {
+namespace mca {
+
+// This virtual dtor serves as the anchor for the CodeRegionGenerator class.
+CodeRegionGenerator::~CodeRegionGenerator() {}
+
+// This class provides the callbacks that occur when parsing input assembly.
+class MCStreamerWrapper final : public MCStreamer {
+ CodeRegions &Regions;
+
+public:
+ MCStreamerWrapper(MCContext &Context, mca::CodeRegions &R)
+ : MCStreamer(Context), Regions(R) {}
+
+ // We only want to intercept the emission of new instructions.
+ void emitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo & /* unused */) override {
+ Regions.addInstruction(Inst);
+ }
+
+ bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override {
+ return true;
+ }
+
+ void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ Align ByteAlignment) override {}
+ void emitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
+ uint64_t Size = 0, Align ByteAlignment = Align(1),
+ SMLoc Loc = SMLoc()) override {}
+ void emitGPRel32Value(const MCExpr *Value) override {}
+ void beginCOFFSymbolDef(const MCSymbol *Symbol) override {}
+ void emitCOFFSymbolStorageClass(int StorageClass) override {}
+ void emitCOFFSymbolType(int Type) override {}
+ void endCOFFSymbolDef() override {}
+
+ ArrayRef<MCInst> GetInstructionSequence(unsigned Index) const {
+ return Regions.getInstructionSequence(Index);
+ }
+};
+
+Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions(
+ const std::unique_ptr<MCInstPrinter> &IP) {
+ MCTargetOptions Opts;
+ Opts.PreserveAsmComments = false;
+ CodeRegions &Regions = getRegions();
+ MCStreamerWrapper Str(Ctx, Regions);
+
+ // Need to initialize an MCTargetStreamer otherwise
+ // certain asm directives will cause a segfault.
+ // Using nulls() so that anything emitted by the MCTargetStreamer
+ // doesn't show up in the llvm-mca output.
+ raw_ostream &OSRef = nulls();
+ formatted_raw_ostream FOSRef(OSRef);
+ TheTarget.createAsmTargetStreamer(Str, FOSRef, IP.get(),
+ /*IsVerboseAsm=*/true);
+
+ // Create a MCAsmParser and setup the lexer to recognize llvm-mca ASM
+ // comments.
+ std::unique_ptr<MCAsmParser> Parser(
+ createMCAsmParser(Regions.getSourceMgr(), Ctx, Str, MAI));
+ MCAsmLexer &Lexer = Parser->getLexer();
+ MCACommentConsumer *CCP = getCommentConsumer();
+ Lexer.setCommentConsumer(CCP);
+ // Enable support for MASM literal numbers (example: 05h, 101b).
+ Lexer.setLexMasmIntegers(true);
+
+ std::unique_ptr<MCTargetAsmParser> TAP(
+ TheTarget.createMCAsmParser(STI, *Parser, MCII, Opts));
+ if (!TAP)
+ return make_error<StringError>(
+ "This target does not support assembly parsing.",
+ inconvertibleErrorCode());
+ Parser->setTargetParser(*TAP);
+ Parser->Run(false);
+
+ if (CCP->hadErr())
+ return make_error<StringError>("There was an error parsing comments.",
+ inconvertibleErrorCode());
+
+ // Set the assembler dialect from the input. llvm-mca will use this as the
+ // default dialect when printing reports.
+ AssemblerDialect = Parser->getAssemblerDialect();
+ return Regions;
+}
+
+void AnalysisRegionCommentConsumer::HandleComment(SMLoc Loc,
+ StringRef CommentText) {
+ // Skip empty comments.
+ StringRef Comment(CommentText);
+ if (Comment.empty())
+ return;
+
+ // Skip spaces and tabs.
+ unsigned Position = Comment.find_first_not_of(" \t");
+ if (Position >= Comment.size())
+ // We reached the end of the comment. Bail out.
+ return;
+
+ Comment = Comment.drop_front(Position);
+ if (Comment.consume_front("LLVM-MCA-END")) {
+ // Skip spaces and tabs.
+ Position = Comment.find_first_not_of(" \t");
+ if (Position < Comment.size())
+ Comment = Comment.drop_front(Position);
+ Regions.endRegion(Comment, Loc);
+ return;
+ }
+
+ // Try to parse the LLVM-MCA-BEGIN comment.
+ if (!Comment.consume_front("LLVM-MCA-BEGIN"))
+ return;
+
+ // Skip spaces and tabs.
+ Position = Comment.find_first_not_of(" \t");
+ if (Position < Comment.size())
+ Comment = Comment.drop_front(Position);
+ // Use the rest of the string as a descriptor for this code snippet.
+ Regions.beginRegion(Comment, Loc);
+}
+
+void InstrumentRegionCommentConsumer::HandleComment(SMLoc Loc,
+ StringRef CommentText) {
+ // Skip empty comments.
+ StringRef Comment(CommentText);
+ if (Comment.empty())
+ return;
+
+ // Skip spaces and tabs.
+ unsigned Position = Comment.find_first_not_of(" \t");
+ if (Position >= Comment.size())
+ // We reached the end of the comment. Bail out.
+ return;
+ Comment = Comment.drop_front(Position);
+
+ // Bail out if not an MCA style comment
+ if (!Comment.consume_front("LLVM-MCA-"))
+ return;
+
+ // Skip AnalysisRegion comments
+ if (Comment.consume_front("BEGIN") || Comment.consume_front("END"))
+ return;
+
+ if (IM.shouldIgnoreInstruments())
+ return;
+
+ auto [InstrumentKind, Data] = Comment.split(" ");
+
+ // An error if not of the form LLVM-MCA-TARGET-KIND
+ if (!IM.supportsInstrumentType(InstrumentKind)) {
+ if (InstrumentKind.empty())
+ SM.PrintMessage(
+ Loc, llvm::SourceMgr::DK_Error,
+ "No instrumentation kind was provided in LLVM-MCA comment");
+ else
+ SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error,
+ "Unknown instrumentation type in LLVM-MCA comment: " +
+ InstrumentKind);
+ FoundError = true;
+ return;
+ }
+
+ SharedInstrument I = IM.createInstrument(InstrumentKind, Data);
+ if (!I) {
+ if (Data.empty())
+ SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error,
+ "Failed to create " + InstrumentKind +
+ " instrument with no data");
+ else
+ SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error,
+ "Failed to create " + InstrumentKind +
+ " instrument with data: " + Data);
+ FoundError = true;
+ return;
+ }
+
+ // End InstrumentType region if one is open
+ if (Regions.isRegionActive(InstrumentKind))
+ Regions.endRegion(InstrumentKind, Loc);
+ // Start new instrumentation region
+ Regions.beginRegion(InstrumentKind, Loc, I);
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.h b/contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.h
new file mode 100644
index 0000000000..88621ed856
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.h
@@ -0,0 +1,205 @@
+//===----------------------- CodeRegionGenerator.h --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file declares classes responsible for generating llvm-mca
+/// CodeRegions from various types of input. llvm-mca only analyzes CodeRegions,
+/// so the classes here provide the input-to-CodeRegions translation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_CODEREGION_GENERATOR_H
+#define LLVM_TOOLS_LLVM_MCA_CODEREGION_GENERATOR_H
+
+#include "CodeRegion.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/MCA/CustomBehaviour.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/SourceMgr.h"
+#include <memory>
+
+namespace llvm {
+namespace mca {
+
+class MCACommentConsumer : public AsmCommentConsumer {
+protected:
+ bool FoundError;
+
+public:
+ MCACommentConsumer() : FoundError(false) {}
+
+ bool hadErr() const { return FoundError; }
+};
+
+/// A comment consumer that parses strings. The only valid tokens are strings.
+class AnalysisRegionCommentConsumer : public MCACommentConsumer {
+ AnalysisRegions &Regions;
+
+public:
+ AnalysisRegionCommentConsumer(AnalysisRegions &R) : Regions(R) {}
+
+ /// Parses a comment. It begins a new region if it is of the form
+ /// LLVM-MCA-BEGIN. It ends a region if it is of the form LLVM-MCA-END.
+ /// Regions can be optionally named if they are of the form
+ /// LLVM-MCA-BEGIN <name> or LLVM-MCA-END <name>. Subregions are
+ /// permitted, but a region that begins while another region is active
+ /// must be ended before the outer region is ended. If thre is only one
+ /// active region, LLVM-MCA-END does not need to provide a name.
+ void HandleComment(SMLoc Loc, StringRef CommentText) override;
+};
+
+/// A comment consumer that parses strings to create InstrumentRegions.
+/// The only valid tokens are strings.
+class InstrumentRegionCommentConsumer : public MCACommentConsumer {
+ llvm::SourceMgr &SM;
+
+ InstrumentRegions &Regions;
+
+ InstrumentManager &IM;
+
+public:
+ InstrumentRegionCommentConsumer(llvm::SourceMgr &SM, InstrumentRegions &R,
+ InstrumentManager &IM)
+ : SM(SM), Regions(R), IM(IM) {}
+
+ /// Parses a comment. It begins a new region if it is of the form
+ /// LLVM-MCA-<INSTRUMENTATION_TYPE> <data> where INSTRUMENTATION_TYPE
+ /// is a valid InstrumentKind. If there is already an active
+ /// region of type INSTRUMENATION_TYPE, then it will end the active
+ /// one and begin a new one using the new data.
+ void HandleComment(SMLoc Loc, StringRef CommentText) override;
+};
+
+/// This abstract class is responsible for parsing the input given to
+/// the llvm-mca driver, and converting that into a CodeRegions instance.
+class CodeRegionGenerator {
+protected:
+ CodeRegionGenerator(const CodeRegionGenerator &) = delete;
+ CodeRegionGenerator &operator=(const CodeRegionGenerator &) = delete;
+ virtual Expected<const CodeRegions &>
+ parseCodeRegions(const std::unique_ptr<MCInstPrinter> &IP) = 0;
+
+public:
+ CodeRegionGenerator() {}
+ virtual ~CodeRegionGenerator();
+};
+
+/// Abastract CodeRegionGenerator with AnalysisRegions member
+class AnalysisRegionGenerator : public virtual CodeRegionGenerator {
+protected:
+ AnalysisRegions Regions;
+
+public:
+ AnalysisRegionGenerator(llvm::SourceMgr &SM) : Regions(SM) {}
+
+ virtual Expected<const AnalysisRegions &>
+ parseAnalysisRegions(const std::unique_ptr<MCInstPrinter> &IP) = 0;
+};
+
+/// Abstract CodeRegionGenerator with InstrumentRegionsRegions member
+class InstrumentRegionGenerator : public virtual CodeRegionGenerator {
+protected:
+ InstrumentRegions Regions;
+
+public:
+ InstrumentRegionGenerator(llvm::SourceMgr &SM) : Regions(SM) {}
+
+ virtual Expected<const InstrumentRegions &>
+ parseInstrumentRegions(const std::unique_ptr<MCInstPrinter> &IP) = 0;
+};
+
+/// This abstract class is responsible for parsing input ASM and
+/// generating a CodeRegions instance.
+class AsmCodeRegionGenerator : public virtual CodeRegionGenerator {
+ const Target &TheTarget;
+ MCContext &Ctx;
+ const MCAsmInfo &MAI;
+ const MCSubtargetInfo &STI;
+ const MCInstrInfo &MCII;
+ unsigned AssemblerDialect; // This is set during parsing.
+
+public:
+ AsmCodeRegionGenerator(const Target &T, MCContext &C, const MCAsmInfo &A,
+ const MCSubtargetInfo &S, const MCInstrInfo &I)
+ : TheTarget(T), Ctx(C), MAI(A), STI(S), MCII(I), AssemblerDialect(0) {}
+
+ virtual MCACommentConsumer *getCommentConsumer() = 0;
+ virtual CodeRegions &getRegions() = 0;
+
+ unsigned getAssemblerDialect() const { return AssemblerDialect; }
+ Expected<const CodeRegions &>
+ parseCodeRegions(const std::unique_ptr<MCInstPrinter> &IP) override;
+};
+
+class AsmAnalysisRegionGenerator final : public AnalysisRegionGenerator,
+ public AsmCodeRegionGenerator {
+ AnalysisRegionCommentConsumer CC;
+
+public:
+ AsmAnalysisRegionGenerator(const Target &T, llvm::SourceMgr &SM, MCContext &C,
+ const MCAsmInfo &A, const MCSubtargetInfo &S,
+ const MCInstrInfo &I)
+ : AnalysisRegionGenerator(SM), AsmCodeRegionGenerator(T, C, A, S, I),
+ CC(Regions) {}
+
+ MCACommentConsumer *getCommentConsumer() override { return &CC; };
+ CodeRegions &getRegions() override { return Regions; };
+
+ Expected<const AnalysisRegions &>
+ parseAnalysisRegions(const std::unique_ptr<MCInstPrinter> &IP) override {
+ Expected<const CodeRegions &> RegionsOrErr = parseCodeRegions(IP);
+ if (!RegionsOrErr)
+ return RegionsOrErr.takeError();
+ else
+ return static_cast<const AnalysisRegions &>(*RegionsOrErr);
+ }
+
+ Expected<const CodeRegions &>
+ parseCodeRegions(const std::unique_ptr<MCInstPrinter> &IP) override {
+ return AsmCodeRegionGenerator::parseCodeRegions(IP);
+ }
+};
+
+class AsmInstrumentRegionGenerator final : public InstrumentRegionGenerator,
+ public AsmCodeRegionGenerator {
+ InstrumentRegionCommentConsumer CC;
+
+public:
+ AsmInstrumentRegionGenerator(const Target &T, llvm::SourceMgr &SM,
+ MCContext &C, const MCAsmInfo &A,
+ const MCSubtargetInfo &S, const MCInstrInfo &I,
+ InstrumentManager &IM)
+ : InstrumentRegionGenerator(SM), AsmCodeRegionGenerator(T, C, A, S, I),
+ CC(SM, Regions, IM) {}
+
+ MCACommentConsumer *getCommentConsumer() override { return &CC; };
+ CodeRegions &getRegions() override { return Regions; };
+
+ Expected<const InstrumentRegions &>
+ parseInstrumentRegions(const std::unique_ptr<MCInstPrinter> &IP) override {
+ Expected<const CodeRegions &> RegionsOrErr = parseCodeRegions(IP);
+ if (!RegionsOrErr)
+ return RegionsOrErr.takeError();
+ else
+ return static_cast<const InstrumentRegions &>(*RegionsOrErr);
+ }
+
+ Expected<const CodeRegions &>
+ parseCodeRegions(const std::unique_ptr<MCInstPrinter> &IP) override {
+ return AsmCodeRegionGenerator::parseCodeRegions(IP);
+ }
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_TOOLS_LLVM_MCA_CODEREGION_GENERATOR_H
diff --git a/contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.cpp b/contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.cpp
new file mode 100644
index 0000000000..9d06c6a193
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.cpp
@@ -0,0 +1,129 @@
+//===--------------------- PipelinePrinter.cpp ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the PipelinePrinter interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "PipelinePrinter.h"
+#include "CodeRegion.h"
+#include "Views/InstructionView.h"
+
+namespace llvm {
+namespace mca {
+
+void PipelinePrinter::printRegionHeader(llvm::raw_ostream &OS) const {
+ StringRef RegionName;
+ if (!Region.getDescription().empty())
+ RegionName = Region.getDescription();
+
+ OS << "\n[" << RegionIdx << "] Code Region";
+ if (!RegionName.empty())
+ OS << " - " << RegionName;
+ OS << "\n\n";
+}
+
+json::Object PipelinePrinter::getJSONReportRegion() const {
+ json::Object JO;
+
+ StringRef RegionName = "";
+ if (!Region.getDescription().empty())
+ RegionName = Region.getDescription();
+
+ JO.try_emplace("Name", RegionName);
+ for (const auto &V : Views)
+ if (V->isSerializable())
+ JO.try_emplace(V->getNameAsString().str(), V->toJSON());
+
+ return JO;
+}
+
+json::Object PipelinePrinter::getJSONSimulationParameters() const {
+ json::Object SimParameters({{"-mcpu", STI.getCPU()},
+ {"-mtriple", STI.getTargetTriple().getTriple()},
+ {"-march", STI.getTargetTriple().getArchName()}});
+
+ const MCSchedModel &SM = STI.getSchedModel();
+ if (!SM.isOutOfOrder())
+ return SimParameters;
+
+ if (PO.RegisterFileSize)
+ SimParameters.try_emplace("-register-file-size", PO.RegisterFileSize);
+
+ if (!PO.AssumeNoAlias)
+ SimParameters.try_emplace("-noalias", PO.AssumeNoAlias);
+
+ if (PO.DecodersThroughput)
+ SimParameters.try_emplace("-decoder-throughput", PO.DecodersThroughput);
+
+ if (PO.MicroOpQueueSize)
+ SimParameters.try_emplace("-micro-op-queue-size", PO.MicroOpQueueSize);
+
+ if (PO.DispatchWidth)
+ SimParameters.try_emplace("-dispatch", PO.DispatchWidth);
+
+ if (PO.LoadQueueSize)
+ SimParameters.try_emplace("-lqueue", PO.LoadQueueSize);
+
+ if (PO.StoreQueueSize)
+ SimParameters.try_emplace("-squeue", PO.StoreQueueSize);
+
+ return SimParameters;
+}
+
+json::Object PipelinePrinter::getJSONTargetInfo() const {
+ json::Array Resources;
+ const MCSchedModel &SM = STI.getSchedModel();
+ StringRef MCPU = STI.getCPU();
+
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ unsigned NumUnits = ProcResource.NumUnits;
+ if (ProcResource.SubUnitsIdxBegin || !NumUnits)
+ continue;
+
+ for (unsigned J = 0; J < NumUnits; ++J) {
+ std::string ResourceName = ProcResource.Name;
+ if (NumUnits > 1) {
+ ResourceName += ".";
+ ResourceName += J;
+ }
+
+ Resources.push_back(ResourceName);
+ }
+ }
+
+ return json::Object({{"CPUName", MCPU}, {"Resources", std::move(Resources)}});
+}
+
+void PipelinePrinter::printReport(json::Object &JO) const {
+ if (!RegionIdx) {
+ JO.try_emplace("TargetInfo", getJSONTargetInfo());
+ JO.try_emplace("SimulationParameters", getJSONSimulationParameters());
+ // Construct an array of regions.
+ JO.try_emplace("CodeRegions", json::Array());
+ }
+
+ json::Array *Regions = JO.getArray("CodeRegions");
+ assert(Regions && "This array must exist!");
+ Regions->push_back(getJSONReportRegion());
+}
+
+void PipelinePrinter::printReport(llvm::raw_ostream &OS) const {
+ // Don't print the header of this region if it is the default region, and if
+ // it doesn't have an end location.
+ if (Region.startLoc().isValid() || Region.endLoc().isValid())
+ printRegionHeader(OS);
+
+ for (const auto &V : Views)
+ V->printView(OS);
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.h b/contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.h
new file mode 100644
index 0000000000..d89e913f97
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.h
@@ -0,0 +1,69 @@
+//===--------------------- PipelinePrinter.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements class PipelinePrinter.
+///
+/// PipelinePrinter allows the customization of the performance report.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H
+#define LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/Context.h"
+#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/View.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+class CodeRegion;
+
+/// A printer class that knows how to collects statistics on the
+/// code analyzed by the llvm-mca tool.
+///
+/// This class knows how to print out the analysis information collected
+/// during the execution of the code. Internally, it delegates to other
+/// classes the task of printing out timeline information as well as
+/// resource pressure.
+class PipelinePrinter {
+ Pipeline &P;
+ const CodeRegion &Region;
+ unsigned RegionIdx;
+ const MCSubtargetInfo &STI;
+ const PipelineOptions &PO;
+ llvm::SmallVector<std::unique_ptr<View>, 8> Views;
+
+ void printRegionHeader(llvm::raw_ostream &OS) const;
+ json::Object getJSONReportRegion() const;
+ json::Object getJSONTargetInfo() const;
+ json::Object getJSONSimulationParameters() const;
+
+public:
+ PipelinePrinter(Pipeline &Pipe, const CodeRegion &R, unsigned Idx,
+ const MCSubtargetInfo &STI, const PipelineOptions &PO)
+ : P(Pipe), Region(R), RegionIdx(Idx), STI(STI), PO(PO) {}
+
+ void addView(std::unique_ptr<View> V) {
+ P.addEventListener(V.get());
+ Views.emplace_back(std::move(V));
+ }
+
+ void printReport(llvm::raw_ostream &OS) const;
+ void printReport(json::Object &JO) const;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.cpp
new file mode 100644
index 0000000000..dc0a07e75e
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -0,0 +1,644 @@
+//===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the functionalities used by the BottleneckAnalysis
+/// to report bottleneck info.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/BottleneckAnalysis.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+PressureTracker::PressureTracker(const MCSchedModel &Model)
+ : SM(Model),
+ ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
+ ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
+ ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
+ ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
+ computeProcResourceMasks(SM, ProcResID2Mask);
+
+ // Ignore the invalid resource at index zero.
+ unsigned NextResourceUsersIdx = 0;
+ for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
+ NextResourceUsersIdx += ProcResource.NumUnits;
+ uint64_t ResourceMask = ProcResID2Mask[I];
+ ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
+ }
+
+ ResourceUsers.resize(NextResourceUsersIdx);
+ std::fill(ResourceUsers.begin(), ResourceUsers.end(),
+ std::make_pair<unsigned, unsigned>(~0U, 0U));
+}
+
+void PressureTracker::getResourceUsers(uint64_t ResourceMask,
+ SmallVectorImpl<User> &Users) const {
+ unsigned Index = getResourceStateIndex(ResourceMask);
+ unsigned ProcResID = ResIdx2ProcResID[Index];
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+ for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
+ const User U = getResourceUser(ProcResID, I);
+ if (U.second && IPI.find(U.first) != IPI.end())
+ Users.emplace_back(U);
+ }
+}
+
+void PressureTracker::onInstructionDispatched(unsigned IID) {
+ IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
+}
+
+void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
+
+void PressureTracker::handleInstructionIssuedEvent(
+ const HWInstructionIssuedEvent &Event) {
+ unsigned IID = Event.IR.getSourceIndex();
+ for (const ResourceUse &Use : Event.UsedResources) {
+ const ResourceRef &RR = Use.first;
+ unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
+ Index += countTrailingZeros(RR.second);
+ ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
+ }
+}
+
+void PressureTracker::updateResourcePressureDistribution(
+ uint64_t CumulativeMask) {
+ while (CumulativeMask) {
+ uint64_t Current = CumulativeMask & (-CumulativeMask);
+ unsigned ResIdx = getResourceStateIndex(Current);
+ unsigned ProcResID = ResIdx2ProcResID[ResIdx];
+ uint64_t Mask = ProcResID2Mask[ProcResID];
+
+ if (Mask == Current) {
+ ResourcePressureDistribution[ProcResID]++;
+ CumulativeMask ^= Current;
+ continue;
+ }
+
+ Mask ^= Current;
+ while (Mask) {
+ uint64_t SubUnit = Mask & (-Mask);
+ ResIdx = getResourceStateIndex(SubUnit);
+ ProcResID = ResIdx2ProcResID[ResIdx];
+ ResourcePressureDistribution[ProcResID]++;
+ Mask ^= SubUnit;
+ }
+
+ CumulativeMask ^= Current;
+ }
+}
+
+void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
+ assert(Event.Reason != HWPressureEvent::INVALID &&
+ "Unexpected invalid event!");
+
+ switch (Event.Reason) {
+ default:
+ break;
+
+ case HWPressureEvent::RESOURCES: {
+ const uint64_t ResourceMask = Event.ResourceMask;
+ updateResourcePressureDistribution(Event.ResourceMask);
+
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ const Instruction &IS = *IR.getInstruction();
+ unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
+ if (!BusyResources)
+ continue;
+
+ unsigned IID = IR.getSourceIndex();
+ IPI[IID].ResourcePressureCycles++;
+ }
+ break;
+ }
+
+ case HWPressureEvent::REGISTER_DEPS:
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ unsigned IID = IR.getSourceIndex();
+ IPI[IID].RegisterPressureCycles++;
+ }
+ break;
+
+ case HWPressureEvent::MEMORY_DEPS:
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ unsigned IID = IR.getSourceIndex();
+ IPI[IID].MemoryPressureCycles++;
+ }
+ }
+}
+
+#ifndef NDEBUG
+void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
+ const DependencyEdge &DepEdge,
+ MCInstPrinter &MCIP) const {
+ unsigned FromIID = DepEdge.FromIID;
+ unsigned ToIID = DepEdge.ToIID;
+ assert(FromIID < ToIID && "Graph should be acyclic!");
+
+ const DependencyEdge::Dependency &DE = DepEdge.Dep;
+ assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
+
+ OS << " FROM: " << FromIID << " TO: " << ToIID << " ";
+ if (DE.Type == DependencyEdge::DT_REGISTER) {
+ OS << " - REGISTER: ";
+ MCIP.printRegName(OS, DE.ResourceOrRegID);
+ } else if (DE.Type == DependencyEdge::DT_MEMORY) {
+ OS << " - MEMORY";
+ } else {
+ assert(DE.Type == DependencyEdge::DT_RESOURCE &&
+ "Unsupported dependency type!");
+ OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
+ }
+ OS << " - COST: " << DE.Cost << '\n';
+}
+#endif // NDEBUG
+
+void DependencyGraph::pruneEdges(unsigned Iterations) {
+ for (DGNode &N : Nodes) {
+ unsigned NumPruned = 0;
+ const unsigned Size = N.OutgoingEdges.size();
+ // Use a cut-off threshold to prune edges with a low frequency.
+ for (unsigned I = 0, E = Size; I < E; ++I) {
+ DependencyEdge &Edge = N.OutgoingEdges[I];
+ if (Edge.Frequency == Iterations)
+ continue;
+ double Factor = (double)Edge.Frequency / Iterations;
+ if (0.10 < Factor)
+ continue;
+ Nodes[Edge.ToIID].NumPredecessors--;
+ std::swap(Edge, N.OutgoingEdges[E - 1]);
+ --E;
+ ++NumPruned;
+ }
+
+ if (NumPruned)
+ N.OutgoingEdges.resize(Size - NumPruned);
+ }
+}
+
+void DependencyGraph::initializeRootSet(
+ SmallVectorImpl<unsigned> &RootSet) const {
+ for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+ const DGNode &N = Nodes[I];
+ if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
+ RootSet.emplace_back(I);
+ }
+}
+
+void DependencyGraph::propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet,
+ unsigned Iterations) {
+ SmallVector<unsigned, 8> ToVisit;
+
+ // A critical sequence is computed as the longest path from a node of the
+ // RootSet to a leaf node (i.e. a node with no successors). The RootSet is
+ // composed of nodes with at least one successor, and no predecessors.
+ //
+ // Each node of the graph starts with an initial default cost of zero. The
+ // cost of a node is a measure of criticality: the higher the cost, the bigger
+ // is the performance impact.
+ // For register and memory dependencies, the cost is a function of the write
+ // latency as well as the actual delay (in cycles) caused to users.
+ // For processor resource dependencies, the cost is a function of the resource
+ // pressure. Resource interferences with low frequency values are ignored.
+ //
+ // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
+ // the inner loop selects (i.e. visits) a node N from a set of `unvisited
+ // nodes`, and then propagates the cost of N to all its neighbors.
+ //
+ // The `unvisited nodes` set initially contains all the nodes from the
+ // RootSet. A node N is added to the `unvisited nodes` if all its
+ // predecessors have been visited already.
+ //
+ // For simplicity, every node tracks the number of unvisited incoming edges in
+ // field `NumVisitedPredecessors`. When the value of that field drops to
+ // zero, then the corresponding node is added to a `ToVisit` set.
+ //
+ // At the end of every iteration of the outer loop, set `ToVisit` becomes our
+ // new `unvisited nodes` set.
+ //
+ // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
+ // is empty. This algorithm works under the assumption that the graph is
+ // acyclic.
+ do {
+ for (unsigned IID : RootSet) {
+ const DGNode &N = Nodes[IID];
+ for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
+ unsigned ToIID = DepEdge.ToIID;
+ DGNode &To = Nodes[ToIID];
+ uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
+ // Check if this is the most expensive incoming edge seen so far. In
+ // case, update the total cost of the destination node (ToIID), as well
+ // its field `CriticalPredecessor`.
+ if (Cost > To.Cost) {
+ To.CriticalPredecessor = DepEdge;
+ To.Cost = Cost;
+ To.Depth = N.Depth + 1;
+ }
+ To.NumVisitedPredecessors++;
+ if (To.NumVisitedPredecessors == To.NumPredecessors)
+ ToVisit.emplace_back(ToIID);
+ }
+ }
+
+ std::swap(RootSet, ToVisit);
+ ToVisit.clear();
+ } while (!RootSet.empty());
+}
+
+void DependencyGraph::getCriticalSequence(
+ SmallVectorImpl<const DependencyEdge *> &Seq) const {
+ // At this stage, nodes of the graph have been already visited, and costs have
+ // been propagated through the edges (see method `propagateThroughEdges()`).
+
+ // Identify the node N with the highest cost in the graph. By construction,
+ // that node is the last instruction of our critical sequence.
+ // Field N.Depth would tell us the total length of the sequence.
+ //
+ // To obtain the sequence of critical edges, we simply follow the chain of
+ // critical predecessors starting from node N (field
+ // DGNode::CriticalPredecessor).
+ const auto It = std::max_element(
+ Nodes.begin(), Nodes.end(),
+ [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
+ unsigned IID = std::distance(Nodes.begin(), It);
+ Seq.resize(Nodes[IID].Depth);
+ for (const DependencyEdge *&DE : llvm::reverse(Seq)) {
+ const DGNode &N = Nodes[IID];
+ DE = &N.CriticalPredecessor;
+ IID = N.CriticalPredecessor.FromIID;
+ }
+}
+
+void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS,
+ const MCInst &MCI,
+ bool UseDifferentColor) const {
+ FOS.PadToColumn(14);
+ if (UseDifferentColor)
+ FOS.changeColor(raw_ostream::CYAN, true, false);
+ FOS << printInstructionString(MCI);
+ if (UseDifferentColor)
+ FOS.resetColor();
+}
+
+void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
+ // Early exit if no bottlenecks were found during the simulation.
+ if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
+ return;
+
+ SmallVector<const DependencyEdge *, 16> Seq;
+ DG.getCriticalSequence(Seq);
+ if (Seq.empty())
+ return;
+
+ OS << "\nCritical sequence based on the simulation:\n\n";
+
+ const DependencyEdge &FirstEdge = *Seq[0];
+ ArrayRef<llvm::MCInst> Source = getSource();
+ unsigned FromIID = FirstEdge.FromIID % Source.size();
+ unsigned ToIID = FirstEdge.ToIID % Source.size();
+ bool IsLoopCarried = FromIID >= ToIID;
+
+ formatted_raw_ostream FOS(OS);
+ FOS.PadToColumn(14);
+ FOS << "Instruction";
+ FOS.PadToColumn(58);
+ FOS << "Dependency Information";
+
+ bool HasColors = FOS.has_colors();
+
+ unsigned CurrentIID = 0;
+ if (IsLoopCarried) {
+ FOS << "\n +----< " << FromIID << ".";
+ printInstruction(FOS, Source[FromIID], HasColors);
+ FOS << "\n |\n | < loop carried > \n |";
+ } else {
+ while (CurrentIID < FromIID) {
+ FOS << "\n " << CurrentIID << ".";
+ printInstruction(FOS, Source[CurrentIID]);
+ CurrentIID++;
+ }
+
+ FOS << "\n +----< " << CurrentIID << ".";
+ printInstruction(FOS, Source[CurrentIID], HasColors);
+ CurrentIID++;
+ }
+
+ for (const DependencyEdge *&DE : Seq) {
+ ToIID = DE->ToIID % Source.size();
+ unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
+
+ while (CurrentIID < LastIID) {
+ FOS << "\n | " << CurrentIID << ".";
+ printInstruction(FOS, Source[CurrentIID]);
+ CurrentIID++;
+ }
+
+ if (CurrentIID == ToIID) {
+ FOS << "\n +----> " << ToIID << ".";
+ printInstruction(FOS, Source[CurrentIID], HasColors);
+ } else {
+ FOS << "\n |\n | < loop carried > \n |"
+ << "\n +----> " << ToIID << ".";
+ printInstruction(FOS, Source[ToIID], HasColors);
+ }
+ FOS.PadToColumn(58);
+
+ const DependencyEdge::Dependency &Dep = DE->Dep;
+ if (HasColors)
+ FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+
+ if (Dep.Type == DependencyEdge::DT_REGISTER) {
+ FOS << "## REGISTER dependency: ";
+ if (HasColors)
+ FOS.changeColor(raw_ostream::MAGENTA, true, false);
+ getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID);
+ } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
+ FOS << "## MEMORY dependency.";
+ } else {
+ assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
+ "Unsupported dependency type!");
+ FOS << "## RESOURCE interference: ";
+ if (HasColors)
+ FOS.changeColor(raw_ostream::MAGENTA, true, false);
+ FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
+ if (HasColors) {
+ FOS.resetColor();
+ FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+ }
+ FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
+ << "% ]";
+ }
+ if (HasColors)
+ FOS.resetColor();
+ ++CurrentIID;
+ }
+
+ while (CurrentIID < Source.size()) {
+ FOS << "\n " << CurrentIID << ".";
+ printInstruction(FOS, Source[CurrentIID]);
+ CurrentIID++;
+ }
+
+ FOS << '\n';
+ FOS.flush();
+}
+
+#ifndef NDEBUG
+void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
+ OS << "\nREG DEPS\n";
+ for (const DGNode &Node : Nodes)
+ for (const DependencyEdge &DE : Node.OutgoingEdges)
+ if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
+ dumpDependencyEdge(OS, DE, MCIP);
+
+ OS << "\nMEM DEPS\n";
+ for (const DGNode &Node : Nodes)
+ for (const DependencyEdge &DE : Node.OutgoingEdges)
+ if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
+ dumpDependencyEdge(OS, DE, MCIP);
+
+ OS << "\nRESOURCE DEPS\n";
+ for (const DGNode &Node : Nodes)
+ for (const DependencyEdge &DE : Node.OutgoingEdges)
+ if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
+ dumpDependencyEdge(OS, DE, MCIP);
+}
+#endif // NDEBUG
+
+void DependencyGraph::addDependency(unsigned From, unsigned To,
+ DependencyEdge::Dependency &&Dep) {
+ DGNode &NodeFrom = Nodes[From];
+ DGNode &NodeTo = Nodes[To];
+ SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
+
+ auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
+ return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
+ });
+
+ if (It != Vec.end()) {
+ It->Dep.Cost += Dep.Cost;
+ It->Frequency++;
+ return;
+ }
+
+ DependencyEdge DE = {Dep, From, To, 1};
+ Vec.emplace_back(DE);
+ NodeTo.NumPredecessors++;
+}
+
+BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
+ MCInstPrinter &Printer,
+ ArrayRef<MCInst> S, unsigned NumIter)
+ : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()),
+ DG(S.size() * 3), Iterations(NumIter), TotalCycles(0),
+ PressureIncreasedBecauseOfResources(false),
+ PressureIncreasedBecauseOfRegisterDependencies(false),
+ PressureIncreasedBecauseOfMemoryDependencies(false),
+ SeenStallCycles(false), BPI() {}
+
+void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
+ unsigned RegID, unsigned Cost) {
+ bool IsLoopCarried = From >= To;
+ unsigned SourceSize = getSource().size();
+ if (IsLoopCarried) {
+ DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
+ DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
+ return;
+ }
+ DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
+}
+
+void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
+ unsigned Cost) {
+ bool IsLoopCarried = From >= To;
+ unsigned SourceSize = getSource().size();
+ if (IsLoopCarried) {
+ DG.addMemoryDep(From, To + SourceSize, Cost);
+ DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
+ return;
+ }
+ DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
+}
+
+void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
+ uint64_t Mask, unsigned Cost) {
+ bool IsLoopCarried = From >= To;
+ unsigned SourceSize = getSource().size();
+ if (IsLoopCarried) {
+ DG.addResourceDep(From, To + SourceSize, Mask, Cost);
+ DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
+ return;
+ }
+ DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
+}
+
+void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
+ const unsigned IID = Event.IR.getSourceIndex();
+ if (Event.Type == HWInstructionEvent::Dispatched) {
+ Tracker.onInstructionDispatched(IID);
+ return;
+ }
+ if (Event.Type == HWInstructionEvent::Executed) {
+ Tracker.onInstructionExecuted(IID);
+ return;
+ }
+
+ if (Event.Type != HWInstructionEvent::Issued)
+ return;
+
+ ArrayRef<llvm::MCInst> Source = getSource();
+ const Instruction &IS = *Event.IR.getInstruction();
+ unsigned To = IID % Source.size();
+
+ unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
+ uint64_t ResourceMask = IS.getCriticalResourceMask();
+ SmallVector<std::pair<unsigned, unsigned>, 4> Users;
+ while (ResourceMask) {
+ uint64_t Current = ResourceMask & (-ResourceMask);
+ Tracker.getResourceUsers(Current, Users);
+ for (const std::pair<unsigned, unsigned> &U : Users)
+ addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
+ Users.clear();
+ ResourceMask ^= Current;
+ }
+
+ const CriticalDependency &RegDep = IS.getCriticalRegDep();
+ if (RegDep.Cycles) {
+ Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
+ unsigned From = RegDep.IID % Source.size();
+ addRegisterDep(From, To, RegDep.RegID, Cycles);
+ }
+
+ const CriticalDependency &MemDep = IS.getCriticalMemDep();
+ if (MemDep.Cycles) {
+ Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
+ unsigned From = MemDep.IID % Source.size();
+ addMemoryDep(From, To, Cycles);
+ }
+
+ Tracker.handleInstructionIssuedEvent(
+ static_cast<const HWInstructionIssuedEvent &>(Event));
+
+ // Check if this is the last simulated instruction.
+ if (IID == ((Iterations * Source.size()) - 1))
+ DG.finalizeGraph(Iterations);
+}
+
+void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
+ assert(Event.Reason != HWPressureEvent::INVALID &&
+ "Unexpected invalid event!");
+
+ Tracker.handlePressureEvent(Event);
+
+ switch (Event.Reason) {
+ default:
+ break;
+
+ case HWPressureEvent::RESOURCES:
+ PressureIncreasedBecauseOfResources = true;
+ break;
+ case HWPressureEvent::REGISTER_DEPS:
+ PressureIncreasedBecauseOfRegisterDependencies = true;
+ break;
+ case HWPressureEvent::MEMORY_DEPS:
+ PressureIncreasedBecauseOfMemoryDependencies = true;
+ break;
+ }
+}
+
+void BottleneckAnalysis::onCycleEnd() {
+ ++TotalCycles;
+
+ bool PressureIncreasedBecauseOfDataDependencies =
+ PressureIncreasedBecauseOfRegisterDependencies ||
+ PressureIncreasedBecauseOfMemoryDependencies;
+ if (!PressureIncreasedBecauseOfResources &&
+ !PressureIncreasedBecauseOfDataDependencies)
+ return;
+
+ ++BPI.PressureIncreaseCycles;
+ if (PressureIncreasedBecauseOfRegisterDependencies)
+ ++BPI.RegisterDependencyCycles;
+ if (PressureIncreasedBecauseOfMemoryDependencies)
+ ++BPI.MemoryDependencyCycles;
+ if (PressureIncreasedBecauseOfDataDependencies)
+ ++BPI.DataDependencyCycles;
+ if (PressureIncreasedBecauseOfResources)
+ ++BPI.ResourcePressureCycles;
+ PressureIncreasedBecauseOfResources = false;
+ PressureIncreasedBecauseOfRegisterDependencies = false;
+ PressureIncreasedBecauseOfMemoryDependencies = false;
+}
+
+void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
+ if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
+ OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
+ return;
+ }
+
+ double PressurePerCycle =
+ (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
+ double ResourcePressurePerCycle =
+ (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
+ double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
+ double RegDepPressurePerCycle =
+ (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
+ double MemDepPressurePerCycle =
+ (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
+
+ OS << "\n\nCycles with backend pressure increase [ "
+ << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
+
+ OS << "\nThroughput Bottlenecks: "
+ << "\n Resource Pressure [ "
+ << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
+ << "% ]";
+
+ if (BPI.PressureIncreaseCycles) {
+ ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
+ for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
+ unsigned ResourceCycles = Distribution[I];
+ if (ResourceCycles) {
+ double Frequency = (double)ResourceCycles * 100 / TotalCycles;
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
+ OS << "\n - " << PRDesc.Name << " [ "
+ << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
+ }
+ }
+ }
+
+ OS << "\n Data Dependencies: [ "
+ << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
+ OS << "\n - Register Dependencies [ "
+ << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
+ << "% ]";
+ OS << "\n - Memory Dependencies [ "
+ << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
+ << "% ]\n";
+}
+
+void BottleneckAnalysis::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ printBottleneckHints(TempStream);
+ TempStream.flush();
+ OS << Buffer;
+ printCriticalSequence(OS);
+}
+
+} // namespace mca.
+} // namespace llvm
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.h b/contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.h
new file mode 100644
index 0000000000..cd5af0afcf
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -0,0 +1,348 @@
+//===--------------------- BottleneckAnalysis.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the bottleneck analysis view.
+///
+/// This view internally observes backend pressure increase events in order to
+/// identify problematic data dependencies and processor resource interferences.
+///
+/// Example of bottleneck analysis report for a dot-product on X86 btver2:
+///
+/// Cycles with backend pressure increase [ 40.76% ]
+/// Throughput Bottlenecks:
+/// Resource Pressure [ 39.34% ]
+/// - JFPA [ 39.34% ]
+/// - JFPU0 [ 39.34% ]
+/// Data Dependencies: [ 1.42% ]
+/// - Register Dependencies [ 1.42% ]
+/// - Memory Dependencies [ 0.00% ]
+///
+/// According to the example, backend pressure increased during the 40.76% of
+/// the simulated cycles. In particular, the major cause of backend pressure
+/// increases was the contention on floating point adder JFPA accessible from
+/// pipeline resource JFPU0.
+///
+/// At the end of each cycle, if pressure on the simulated out-of-order buffers
+/// has increased, a backend pressure event is reported.
+/// In particular, this occurs when there is a delta between the number of uOps
+/// dispatched and the number of uOps issued to the underlying pipelines.
+///
+/// The bottleneck analysis view is also responsible for identifying and
+/// printing the most "critical" sequence of dependent instructions according to
+/// the simulated run.
+///
+/// Below is the critical sequence computed for the dot-product example on
+/// btver2:
+///
+/// Instruction Dependency Information
+/// +----< 2. vhaddps %xmm3, %xmm3, %xmm4
+/// |
+/// | < loop carried >
+/// |
+/// | 0. vmulps %xmm0, %xmm0, %xmm2
+/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ]
+/// +----> 2. vhaddps %xmm3, %xmm3, %xmm4 ## REGISTER dependency: %xmm3
+/// |
+/// | < loop carried >
+/// |
+/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ]
+///
+///
+/// The algorithm that computes the critical sequence is very similar to a
+/// critical path analysis.
+///
+/// A dependency graph is used internally to track dependencies between nodes.
+/// Nodes of the graph represent instructions from the input assembly sequence,
+/// and edges of the graph represent data dependencies or processor resource
+/// interferences.
+///
+/// Edges are dynamically 'discovered' by observing instruction state
+/// transitions and backend pressure increase events. Edges are internally
+/// ranked based on their "criticality". A dependency is considered to be
+/// critical if it takes a long time to execute, and if it contributes to
+/// backend pressure increases. Criticality is internally measured in terms of
+/// cycles; it is computed for every edge in the graph as a function of the edge
+/// latency and the number of backend pressure increase cycles contributed by
+/// that edge.
+///
+/// At the end of simulation, costs are propagated to nodes through the edges of
+/// the graph, and the most expensive path connecting the root-set (a
+/// set of nodes with no predecessors) to a leaf node is reported as critical
+/// sequence.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H
+#define LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H
+
+#include "Views/InstructionView.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+class PressureTracker {
+ const MCSchedModel &SM;
+
+ // Resource pressure distribution. There is an element for every processor
+ // resource declared by the scheduling model. Quantities are number of cycles.
+ SmallVector<unsigned, 4> ResourcePressureDistribution;
+
+ // Each processor resource is associated with a so-called processor resource
+ // mask. This vector allows to correlate processor resource IDs with processor
+ // resource masks. There is exactly one element per each processor resource
+ // declared by the scheduling model.
+ SmallVector<uint64_t, 4> ProcResID2Mask;
+
+ // Maps processor resource state indices (returned by calls to
+ // `getResourceStateIndex(Mask)` to processor resource identifiers.
+ SmallVector<unsigned, 4> ResIdx2ProcResID;
+
+ // Maps Processor Resource identifiers to ResourceUsers indices.
+ SmallVector<unsigned, 4> ProcResID2ResourceUsersIndex;
+
+ // Identifies the last user of a processor resource unit.
+ // This vector is updated on every instruction issued event.
+ // There is one entry for every processor resource unit declared by the
+ // processor model. An all_ones value is treated like an invalid instruction
+ // identifier.
+ using User = std::pair<unsigned, unsigned>;
+ SmallVector<User, 4> ResourceUsers;
+
+ struct InstructionPressureInfo {
+ unsigned RegisterPressureCycles;
+ unsigned MemoryPressureCycles;
+ unsigned ResourcePressureCycles;
+ };
+ DenseMap<unsigned, InstructionPressureInfo> IPI;
+
+ void updateResourcePressureDistribution(uint64_t CumulativeMask);
+
+ User getResourceUser(unsigned ProcResID, unsigned UnitID) const {
+ unsigned Index = ProcResID2ResourceUsersIndex[ProcResID];
+ return ResourceUsers[Index + UnitID];
+ }
+
+public:
+ PressureTracker(const MCSchedModel &Model);
+
+ ArrayRef<unsigned> getResourcePressureDistribution() const {
+ return ResourcePressureDistribution;
+ }
+
+ void getResourceUsers(uint64_t ResourceMask,
+ SmallVectorImpl<User> &Users) const;
+
+ unsigned getRegisterPressureCycles(unsigned IID) const {
+ assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+ const InstructionPressureInfo &Info = IPI.find(IID)->second;
+ return Info.RegisterPressureCycles;
+ }
+
+ unsigned getMemoryPressureCycles(unsigned IID) const {
+ assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+ const InstructionPressureInfo &Info = IPI.find(IID)->second;
+ return Info.MemoryPressureCycles;
+ }
+
+ unsigned getResourcePressureCycles(unsigned IID) const {
+ assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+ const InstructionPressureInfo &Info = IPI.find(IID)->second;
+ return Info.ResourcePressureCycles;
+ }
+
+ const char *resolveResourceName(uint64_t ResourceMask) const {
+ unsigned Index = getResourceStateIndex(ResourceMask);
+ unsigned ProcResID = ResIdx2ProcResID[Index];
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+ return PRDesc.Name;
+ }
+
+ void onInstructionDispatched(unsigned IID);
+ void onInstructionExecuted(unsigned IID);
+
+ void handlePressureEvent(const HWPressureEvent &Event);
+ void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event);
+};
+
+// A dependency edge.
+struct DependencyEdge {
+ enum DependencyType { DT_INVALID, DT_REGISTER, DT_MEMORY, DT_RESOURCE };
+
+ // Dependency edge descriptor.
+ //
+ // It specifies the dependency type, as well as the edge cost in cycles.
+ struct Dependency {
+ DependencyType Type;
+ uint64_t ResourceOrRegID;
+ uint64_t Cost;
+ };
+ Dependency Dep;
+
+ unsigned FromIID;
+ unsigned ToIID;
+
+ // Used by the bottleneck analysis to compute the interference
+ // probability for processor resources.
+ unsigned Frequency;
+};
+
+// A dependency graph used by the bottleneck analysis to describe data
+// dependencies and processor resource interferences between instructions.
+//
+// There is a node (an instance of struct DGNode) for every instruction in the
+// input assembly sequence. Edges of the graph represent dependencies between
+// instructions.
+//
+// Each edge of the graph is associated with a cost value which is used
+// internally to rank dependency based on their impact on the runtime
+// performance (see field DependencyEdge::Dependency::Cost). In general, the
+// higher the cost of an edge, the higher the impact on performance.
+//
+// The cost of a dependency is a function of both the latency and the number of
+// cycles where the dependency has been seen as critical (i.e. contributing to
+// back-pressure increases).
+//
+// Loop carried dependencies are carefully expanded by the bottleneck analysis
+// to guarantee that the graph stays acyclic. To this end, extra nodes are
+// pre-allocated at construction time to describe instructions from "past and
+// future" iterations. The graph is kept acyclic mainly because it simplifies
+// the complexity of the algorithm that computes the critical sequence.
+class DependencyGraph {
+ struct DGNode {
+ unsigned NumPredecessors;
+ unsigned NumVisitedPredecessors;
+ uint64_t Cost;
+ unsigned Depth;
+
+ DependencyEdge CriticalPredecessor;
+ SmallVector<DependencyEdge, 8> OutgoingEdges;
+ };
+ SmallVector<DGNode, 16> Nodes;
+
+ DependencyGraph(const DependencyGraph &) = delete;
+ DependencyGraph &operator=(const DependencyGraph &) = delete;
+
+ void addDependency(unsigned From, unsigned To,
+ DependencyEdge::Dependency &&DE);
+
+ void pruneEdges(unsigned Iterations);
+ void initializeRootSet(SmallVectorImpl<unsigned> &RootSet) const;
+ void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet,
+ unsigned Iterations);
+
+#ifndef NDEBUG
+ void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE,
+ MCInstPrinter &MCIP) const;
+#endif
+
+public:
+ DependencyGraph(unsigned Size) : Nodes(Size) {}
+
+ void addRegisterDep(unsigned From, unsigned To, unsigned RegID,
+ unsigned Cost) {
+ addDependency(From, To, {DependencyEdge::DT_REGISTER, RegID, Cost});
+ }
+
+ void addMemoryDep(unsigned From, unsigned To, unsigned Cost) {
+ addDependency(From, To, {DependencyEdge::DT_MEMORY, /* unused */ 0, Cost});
+ }
+
+ void addResourceDep(unsigned From, unsigned To, uint64_t Mask,
+ unsigned Cost) {
+ addDependency(From, To, {DependencyEdge::DT_RESOURCE, Mask, Cost});
+ }
+
+ // Called by the bottleneck analysis at the end of simulation to propagate
+ // costs through the edges of the graph, and compute a critical path.
+ void finalizeGraph(unsigned Iterations) {
+ SmallVector<unsigned, 16> RootSet;
+ pruneEdges(Iterations);
+ initializeRootSet(RootSet);
+ propagateThroughEdges(RootSet, Iterations);
+ }
+
+ // Returns a sequence of edges representing the critical sequence based on the
+ // simulated run. It assumes that the graph has already been finalized (i.e.
+ // method `finalizeGraph()` has already been called on this graph).
+ void getCriticalSequence(SmallVectorImpl<const DependencyEdge *> &Seq) const;
+
+#ifndef NDEBUG
+ void dump(raw_ostream &OS, MCInstPrinter &MCIP) const;
+#endif
+};
+
+/// A view that collects and prints a few performance numbers.
+class BottleneckAnalysis : public InstructionView {
+ PressureTracker Tracker;
+ DependencyGraph DG;
+
+ unsigned Iterations;
+ unsigned TotalCycles;
+
+ bool PressureIncreasedBecauseOfResources;
+ bool PressureIncreasedBecauseOfRegisterDependencies;
+ bool PressureIncreasedBecauseOfMemoryDependencies;
+ // True if throughput was affected by dispatch stalls.
+ bool SeenStallCycles;
+
+ struct BackPressureInfo {
+ // Cycles where backpressure increased.
+ unsigned PressureIncreaseCycles;
+ // Cycles where backpressure increased because of pipeline pressure.
+ unsigned ResourcePressureCycles;
+ // Cycles where backpressure increased because of data dependencies.
+ unsigned DataDependencyCycles;
+ // Cycles where backpressure increased because of register dependencies.
+ unsigned RegisterDependencyCycles;
+ // Cycles where backpressure increased because of memory dependencies.
+ unsigned MemoryDependencyCycles;
+ };
+ BackPressureInfo BPI;
+
+ // Used to populate the dependency graph DG.
+ void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy);
+ void addMemoryDep(unsigned From, unsigned To, unsigned Cy);
+ void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy);
+
+ void printInstruction(formatted_raw_ostream &FOS, const MCInst &MCI,
+ bool UseDifferentColor = false) const;
+
+ // Prints a bottleneck message to OS.
+ void printBottleneckHints(raw_ostream &OS) const;
+ void printCriticalSequence(raw_ostream &OS) const;
+
+public:
+ BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
+ ArrayRef<MCInst> Sequence, unsigned Iterations);
+
+ void onCycleEnd() override;
+ void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; }
+ void onEvent(const HWPressureEvent &Event) override;
+ void onEvent(const HWInstructionEvent &Event) override;
+
+ void printView(raw_ostream &OS) const override;
+ StringRef getNameAsString() const override { return "BottleneckAnalysis"; }
+ bool isSerializable() const override { return false; }
+
+#ifndef NDEBUG
+ void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); }
+#endif
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.cpp
new file mode 100644
index 0000000000..3dc17c8754
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.cpp
@@ -0,0 +1,98 @@
+//===--------------------- DispatchStatistics.cpp ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the DispatchStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/DispatchStatistics.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+void DispatchStatistics::onEvent(const HWStallEvent &Event) {
+ if (Event.Type < HWStallEvent::LastGenericEvent)
+ HWStalls[Event.Type]++;
+}
+
+void DispatchStatistics::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type != HWInstructionEvent::Dispatched)
+ return;
+
+ const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
+ NumDispatched += DE.MicroOpcodes;
+}
+
+void DispatchStatistics::printDispatchHistogram(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ TempStream << "\n\nDispatch Logic - "
+ << "number of cycles where we saw N micro opcodes dispatched:\n";
+ TempStream << "[# dispatched], [# cycles]\n";
+ for (const std::pair<const unsigned, unsigned> &Entry :
+ DispatchGroupSizePerCycle) {
+ double Percentage = ((double)Entry.second / NumCycles) * 100.0;
+ TempStream << " " << Entry.first << ", " << Entry.second
+ << " (" << format("%.1f", floor((Percentage * 10) + 0.5) / 10)
+ << "%)\n";
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+static void printStalls(raw_ostream &OS, unsigned NumStalls,
+ unsigned NumCycles) {
+ if (!NumStalls) {
+ OS << NumStalls;
+ return;
+ }
+
+ double Percentage = ((double)NumStalls / NumCycles) * 100.0;
+ OS << NumStalls << " ("
+ << format("%.1f", floor((Percentage * 10) + 0.5) / 10) << "%)";
+}
+
+void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream SS(Buffer);
+ SS << "\n\nDynamic Dispatch Stall Cycles:\n";
+ SS << "RAT - Register unavailable: ";
+ printStalls(SS, HWStalls[HWStallEvent::RegisterFileStall], NumCycles);
+ SS << "\nRCU - Retire tokens unavailable: ";
+ printStalls(SS, HWStalls[HWStallEvent::RetireControlUnitStall], NumCycles);
+ SS << "\nSCHEDQ - Scheduler full: ";
+ printStalls(SS, HWStalls[HWStallEvent::SchedulerQueueFull], NumCycles);
+ SS << "\nLQ - Load queue full: ";
+ printStalls(SS, HWStalls[HWStallEvent::LoadQueueFull], NumCycles);
+ SS << "\nSQ - Store queue full: ";
+ printStalls(SS, HWStalls[HWStallEvent::StoreQueueFull], NumCycles);
+ SS << "\nGROUP - Static restrictions on the dispatch group: ";
+ printStalls(SS, HWStalls[HWStallEvent::DispatchGroupStall], NumCycles);
+ SS << "\nUSH - Uncategorised Structural Hazard: ";
+ printStalls(SS, HWStalls[HWStallEvent::CustomBehaviourStall], NumCycles);
+ SS << '\n';
+ SS.flush();
+ OS << Buffer;
+}
+
+json::Value DispatchStatistics::toJSON() const {
+ json::Object JO({{"RAT", HWStalls[HWStallEvent::RegisterFileStall]},
+ {"RCU", HWStalls[HWStallEvent::RetireControlUnitStall]},
+ {"SCHEDQ", HWStalls[HWStallEvent::SchedulerQueueFull]},
+ {"LQ", HWStalls[HWStallEvent::LoadQueueFull]},
+ {"SQ", HWStalls[HWStallEvent::StoreQueueFull]},
+ {"GROUP", HWStalls[HWStallEvent::DispatchGroupStall]},
+ {"USH", HWStalls[HWStallEvent::CustomBehaviourStall]}});
+ return JO;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.h b/contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.h
new file mode 100644
index 0000000000..cfd12691c0
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.h
@@ -0,0 +1,87 @@
+//===--------------------- DispatchStatistics.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements a view that prints a few statistics related to the
+/// dispatch logic. It collects and analyzes instruction dispatch events as
+/// well as static/dynamic dispatch stall events.
+///
+/// Example:
+/// ========
+///
+/// Dynamic Dispatch Stall Cycles:
+/// RAT - Register unavailable: 0
+/// RCU - Retire tokens unavailable: 0
+/// SCHEDQ - Scheduler full: 42
+/// LQ - Load queue full: 0
+/// SQ - Store queue full: 0
+/// GROUP - Static restrictions on the dispatch group: 0
+///
+///
+/// Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+/// [# dispatched], [# cycles]
+/// 0, 15 (11.5%)
+/// 2, 4 (3.1%)
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/View.h"
+#include <map>
+
+namespace llvm {
+namespace mca {
+
+class DispatchStatistics : public View {
+ unsigned NumDispatched;
+ unsigned NumCycles;
+
+ // Counts dispatch stall events caused by unavailability of resources. There
+ // is one counter for every generic stall kind (see class HWStallEvent).
+ llvm::SmallVector<unsigned, 8> HWStalls;
+
+ using Histogram = std::map<unsigned, unsigned>;
+ Histogram DispatchGroupSizePerCycle;
+
+ void updateHistograms() {
+ DispatchGroupSizePerCycle[NumDispatched]++;
+ NumDispatched = 0;
+ }
+
+ void printDispatchHistogram(llvm::raw_ostream &OS) const;
+
+ void printDispatchStalls(llvm::raw_ostream &OS) const;
+
+public:
+ DispatchStatistics()
+ : NumDispatched(0), NumCycles(0),
+ HWStalls(HWStallEvent::LastGenericEvent) {}
+
+ void onEvent(const HWStallEvent &Event) override;
+
+ void onEvent(const HWInstructionEvent &Event) override;
+
+ void onCycleBegin() override { NumCycles++; }
+
+ void onCycleEnd() override { updateHistograms(); }
+
+ void printView(llvm::raw_ostream &OS) const override {
+ printDispatchStalls(OS);
+ printDispatchHistogram(OS);
+ }
+ StringRef getNameAsString() const override { return "DispatchStatistics"; }
+ json::Value toJSON() const override;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.cpp
new file mode 100644
index 0000000000..257fdca8cb
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -0,0 +1,177 @@
+//===--------------------- InstructionInfoView.cpp --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the InstructionInfoView API.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/InstructionInfoView.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/JSON.h"
+
+namespace llvm {
+namespace mca {
+
+void InstructionInfoView::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+
+ ArrayRef<llvm::MCInst> Source = getSource();
+ if (!Source.size())
+ return;
+
+ IIVDVec IIVD(Source.size());
+ collectData(IIVD);
+
+ TempStream << "\n\nInstruction Info:\n";
+ TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n"
+ << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n";
+ if (PrintBarriers) {
+ TempStream << "[7]: LoadBarrier\n[8]: StoreBarrier\n";
+ }
+ if (PrintEncodings) {
+ if (PrintBarriers) {
+ TempStream << "[9]: Encoding Size\n";
+ TempStream << "\n[1] [2] [3] [4] [5] [6] [7] [8] "
+ << "[9] Encodings: Instructions:\n";
+ } else {
+ TempStream << "[7]: Encoding Size\n";
+ TempStream << "\n[1] [2] [3] [4] [5] [6] [7] "
+ << "Encodings: Instructions:\n";
+ }
+ } else {
+ if (PrintBarriers) {
+ TempStream << "\n[1] [2] [3] [4] [5] [6] [7] [8] "
+ << "Instructions:\n";
+ } else {
+ TempStream << "\n[1] [2] [3] [4] [5] [6] "
+ << "Instructions:\n";
+ }
+ }
+
+ int Index = 0;
+ for (const auto &I : enumerate(zip(IIVD, Source))) {
+ const InstructionInfoViewData &IIVDEntry = std::get<0>(I.value());
+
+ TempStream << ' ' << IIVDEntry.NumMicroOpcodes << " ";
+ if (IIVDEntry.NumMicroOpcodes < 10)
+ TempStream << " ";
+ else if (IIVDEntry.NumMicroOpcodes < 100)
+ TempStream << ' ';
+ TempStream << IIVDEntry.Latency << " ";
+ if (IIVDEntry.Latency < 10)
+ TempStream << " ";
+ else if (IIVDEntry.Latency < 100)
+ TempStream << ' ';
+
+ if (IIVDEntry.RThroughput) {
+ double RT = *IIVDEntry.RThroughput;
+ TempStream << format("%.2f", RT) << ' ';
+ if (RT < 10.0)
+ TempStream << " ";
+ else if (RT < 100.0)
+ TempStream << ' ';
+ } else {
+ TempStream << " - ";
+ }
+ TempStream << (IIVDEntry.mayLoad ? " * " : " ");
+ TempStream << (IIVDEntry.mayStore ? " * " : " ");
+ TempStream << (IIVDEntry.hasUnmodeledSideEffects ? " U " : " ");
+
+ if (PrintBarriers) {
+ TempStream << (LoweredInsts[Index]->isALoadBarrier() ? " * "
+ : " ");
+ TempStream << (LoweredInsts[Index]->isAStoreBarrier() ? " * "
+ : " ");
+ }
+
+ if (PrintEncodings) {
+ StringRef Encoding(CE.getEncoding(I.index()));
+ unsigned EncodingSize = Encoding.size();
+ TempStream << " " << EncodingSize
+ << (EncodingSize < 10 ? " " : " ");
+ TempStream.flush();
+ formatted_raw_ostream FOS(TempStream);
+ for (unsigned i = 0, e = Encoding.size(); i != e; ++i)
+ FOS << format("%02x ", (uint8_t)Encoding[i]);
+ FOS.PadToColumn(30);
+ FOS.flush();
+ }
+
+ const MCInst &Inst = std::get<1>(I.value());
+ TempStream << printInstructionString(Inst) << '\n';
+ ++Index;
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+void InstructionInfoView::collectData(
+ MutableArrayRef<InstructionInfoViewData> IIVD) const {
+ const llvm::MCSubtargetInfo &STI = getSubTargetInfo();
+ const MCSchedModel &SM = STI.getSchedModel();
+ for (const auto I : zip(getSource(), IIVD)) {
+ const MCInst &Inst = std::get<0>(I);
+ InstructionInfoViewData &IIVDEntry = std::get<1>(I);
+ const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
+
+ // Obtain the scheduling class information from the instruction.
+ unsigned SchedClassID = MCDesc.getSchedClass();
+ unsigned CPUID = SM.getProcessorID();
+
+ // Try to solve variant scheduling classes.
+ while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
+ SchedClassID =
+ STI.resolveVariantSchedClass(SchedClassID, &Inst, &MCII, CPUID);
+
+ const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+ IIVDEntry.NumMicroOpcodes = SCDesc.NumMicroOps;
+ IIVDEntry.Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
+ // Add extra latency due to delays in the forwarding data paths.
+ IIVDEntry.Latency += MCSchedModel::getForwardingDelayCycles(
+ STI.getReadAdvanceEntries(SCDesc));
+ IIVDEntry.RThroughput = MCSchedModel::getReciprocalThroughput(STI, SCDesc);
+ IIVDEntry.mayLoad = MCDesc.mayLoad();
+ IIVDEntry.mayStore = MCDesc.mayStore();
+ IIVDEntry.hasUnmodeledSideEffects = MCDesc.hasUnmodeledSideEffects();
+ }
+}
+
+// Construct a JSON object from a single InstructionInfoViewData object.
+json::Object
+InstructionInfoView::toJSON(const InstructionInfoViewData &IIVD) const {
+ json::Object JO({{"NumMicroOpcodes", IIVD.NumMicroOpcodes},
+ {"Latency", IIVD.Latency},
+ {"mayLoad", IIVD.mayLoad},
+ {"mayStore", IIVD.mayStore},
+ {"hasUnmodeledSideEffects", IIVD.hasUnmodeledSideEffects}});
+ JO.try_emplace("RThroughput", IIVD.RThroughput.value_or(0.0));
+ return JO;
+}
+
+json::Value InstructionInfoView::toJSON() const {
+ ArrayRef<llvm::MCInst> Source = getSource();
+ if (!Source.size())
+ return json::Value(0);
+
+ IIVDVec IIVD(Source.size());
+ collectData(IIVD);
+
+ json::Array InstInfo;
+ for (const auto &I : enumerate(IIVD)) {
+ const InstructionInfoViewData &IIVDEntry = I.value();
+ json::Object JO = toJSON(IIVDEntry);
+ JO.try_emplace("Instruction", (unsigned)I.index());
+ InstInfo.push_back(std::move(JO));
+ }
+ return json::Object({{"InstructionList", json::Value(std::move(InstInfo))}});
+}
+} // namespace mca.
+} // namespace llvm
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.h b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.h
new file mode 100644
index 0000000000..bddd01a086
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.h
@@ -0,0 +1,93 @@
+//===--------------------- InstructionInfoView.h ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the instruction info view.
+///
+/// The goal fo the instruction info view is to print the latency and reciprocal
+/// throughput information for every instruction in the input sequence.
+/// This section also reports extra information related to the number of micro
+/// opcodes, and opcode properties (i.e. 'MayLoad', 'MayStore', 'HasSideEffects)
+///
+/// Example:
+///
+/// Instruction Info:
+/// [1]: #uOps
+/// [2]: Latency
+/// [3]: RThroughput
+/// [4]: MayLoad
+/// [5]: MayStore
+/// [6]: HasSideEffects
+///
+/// [1] [2] [3] [4] [5] [6] Instructions:
+/// 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2
+/// 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3
+/// 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H
+
+#include "Views/InstructionView.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/CodeEmitter.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+/// A view that prints out generic instruction information.
+class InstructionInfoView : public InstructionView {
+ const llvm::MCInstrInfo &MCII;
+ CodeEmitter &CE;
+ bool PrintEncodings;
+ bool PrintBarriers;
+ using UniqueInst = std::unique_ptr<Instruction>;
+ ArrayRef<UniqueInst> LoweredInsts;
+
+ struct InstructionInfoViewData {
+ unsigned NumMicroOpcodes = 0;
+ unsigned Latency = 0;
+ std::optional<double> RThroughput = 0.0;
+ bool mayLoad = false;
+ bool mayStore = false;
+ bool hasUnmodeledSideEffects = false;
+ };
+ using IIVDVec = SmallVector<InstructionInfoViewData, 16>;
+
+ /// Place the data into the array of InstructionInfoViewData IIVD.
+ void collectData(MutableArrayRef<InstructionInfoViewData> IIVD) const;
+
+public:
+ InstructionInfoView(const llvm::MCSubtargetInfo &ST,
+ const llvm::MCInstrInfo &II, CodeEmitter &C,
+ bool ShouldPrintEncodings, llvm::ArrayRef<llvm::MCInst> S,
+ llvm::MCInstPrinter &IP,
+ ArrayRef<UniqueInst> LoweredInsts,
+ bool ShouldPrintBarriers)
+ : InstructionView(ST, IP, S), MCII(II), CE(C),
+ PrintEncodings(ShouldPrintEncodings),
+ PrintBarriers(ShouldPrintBarriers), LoweredInsts(LoweredInsts) {}
+
+ void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override { return "InstructionInfoView"; }
+ json::Value toJSON() const override;
+ json::Object toJSON(const InstructionInfoViewData &IIVD) const;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.cpp
new file mode 100644
index 0000000000..3b174a0649
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.cpp
@@ -0,0 +1,43 @@
+//===----------------------- InstructionView.cpp ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the member functions of the class InstructionView.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/InstructionView.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+namespace mca {
+
+InstructionView::~InstructionView() = default;
+
+StringRef
+InstructionView::printInstructionString(const llvm::MCInst &MCI) const {
+ InstructionString = "";
+ MCIP.printInst(&MCI, 0, "", STI, InstrStream);
+ InstrStream.flush();
+ // Remove any tabs or spaces at the beginning of the instruction.
+ return StringRef(InstructionString).ltrim();
+}
+
+json::Value InstructionView::toJSON() const {
+ json::Array SourceInfo;
+ for (const auto &MCI : getSource()) {
+ StringRef Instruction = printInstructionString(MCI);
+ SourceInfo.push_back(Instruction.str());
+ }
+ return SourceInfo;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.h b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.h
new file mode 100644
index 0000000000..ae57246fc3
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.h
@@ -0,0 +1,60 @@
+//===----------------------- InstructionView.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the main interface for Views that examine and reference
+/// a sequence of machine instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONVIEW_H
+
+#include "llvm/MCA/View.h"
+#include "llvm/Support/JSON.h"
+
+namespace llvm {
+class MCInstPrinter;
+
+namespace mca {
+
+// The base class for views that deal with individual machine instructions.
+class InstructionView : public View {
+ const llvm::MCSubtargetInfo &STI;
+ llvm::MCInstPrinter &MCIP;
+ llvm::ArrayRef<llvm::MCInst> Source;
+
+ mutable std::string InstructionString;
+ mutable raw_string_ostream InstrStream;
+
+public:
+ void printView(llvm::raw_ostream &) const override {}
+ InstructionView(const llvm::MCSubtargetInfo &STI,
+ llvm::MCInstPrinter &Printer, llvm::ArrayRef<llvm::MCInst> S)
+ : STI(STI), MCIP(Printer), Source(S), InstrStream(InstructionString) {}
+
+ virtual ~InstructionView();
+
+ StringRef getNameAsString() const override { return "Instructions"; }
+
+ // Return a reference to a string representing a given machine instruction.
+ // The result should be used or copied before the next call to
+ // printInstructionString() as it will overwrite the previous result.
+ StringRef printInstructionString(const llvm::MCInst &MCI) const;
+ const llvm::MCSubtargetInfo &getSubTargetInfo() const { return STI; }
+
+ llvm::MCInstPrinter &getInstPrinter() const { return MCIP; }
+ llvm::ArrayRef<llvm::MCInst> getSource() const { return Source; }
+
+ json::Value toJSON() const override;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.cpp
new file mode 100644
index 0000000000..4ef8053bff
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.cpp
@@ -0,0 +1,170 @@
+//===--------------------- RegisterFileStatistics.cpp -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the RegisterFileStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/RegisterFileStatistics.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti)
+ : STI(sti) {
+ const MCSchedModel &SM = STI.getSchedModel();
+ RegisterFileUsage RFUEmpty = {0, 0, 0};
+ MoveEliminationInfo MEIEmpty = {0, 0, 0, 0, 0};
+ if (!SM.hasExtraProcessorInfo()) {
+ // Assume a single register file.
+ PRFUsage.emplace_back(RFUEmpty);
+ MoveElimInfo.emplace_back(MEIEmpty);
+ return;
+ }
+
+ // Initialize a RegisterFileUsage for every user defined register file, plus
+ // the default register file which is always at index #0.
+ const MCExtraProcessorInfo &PI = SM.getExtraProcessorInfo();
+ // There is always an "InvalidRegisterFile" entry in tablegen. That entry can
+ // be skipped. If there are no user defined register files, then reserve a
+ // single entry for the default register file at index #0.
+ unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U);
+
+ PRFUsage.resize(NumRegFiles);
+ std::fill(PRFUsage.begin(), PRFUsage.end(), RFUEmpty);
+
+ MoveElimInfo.resize(NumRegFiles);
+ std::fill(MoveElimInfo.begin(), MoveElimInfo.end(), MEIEmpty);
+}
+
+void RegisterFileStatistics::updateRegisterFileUsage(
+ ArrayRef<unsigned> UsedPhysRegs) {
+ for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) {
+ RegisterFileUsage &RFU = PRFUsage[I];
+ unsigned NumUsedPhysRegs = UsedPhysRegs[I];
+ RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
+ RFU.TotalMappings += NumUsedPhysRegs;
+ RFU.MaxUsedMappings =
+ std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
+ }
+}
+
+void RegisterFileStatistics::updateMoveElimInfo(const Instruction &Inst) {
+ if (!Inst.isOptimizableMove())
+ return;
+
+ if (Inst.getDefs().size() != Inst.getUses().size())
+ return;
+
+ for (size_t I = 0, E = Inst.getDefs().size(); I < E; ++I) {
+ const WriteState &WS = Inst.getDefs()[I];
+ const ReadState &RS = Inst.getUses()[E - (I + 1)];
+
+ MoveEliminationInfo &Info =
+ MoveElimInfo[Inst.getDefs()[0].getRegisterFileID()];
+ Info.TotalMoveEliminationCandidates++;
+ if (WS.isEliminated())
+ Info.CurrentMovesEliminated++;
+ if (WS.isWriteZero() && RS.isReadZero())
+ Info.TotalMovesThatPropagateZero++;
+ }
+}
+
+void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) {
+ switch (Event.Type) {
+ default:
+ break;
+ case HWInstructionEvent::Retired: {
+ const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event);
+ for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I)
+ PRFUsage[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
+ break;
+ }
+ case HWInstructionEvent::Dispatched: {
+ const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
+ updateRegisterFileUsage(DE.UsedPhysRegs);
+ updateMoveElimInfo(*DE.IR.getInstruction());
+ }
+ }
+}
+
+void RegisterFileStatistics::onCycleEnd() {
+ for (MoveEliminationInfo &MEI : MoveElimInfo) {
+ unsigned &CurrentMax = MEI.MaxMovesEliminatedPerCycle;
+ CurrentMax = std::max(CurrentMax, MEI.CurrentMovesEliminated);
+ MEI.TotalMovesEliminated += MEI.CurrentMovesEliminated;
+ MEI.CurrentMovesEliminated = 0;
+ }
+}
+
+void RegisterFileStatistics::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+
+ TempStream << "\n\nRegister File statistics:";
+ const RegisterFileUsage &GlobalUsage = PRFUsage[0];
+ TempStream << "\nTotal number of mappings created: "
+ << GlobalUsage.TotalMappings;
+ TempStream << "\nMax number of mappings used: "
+ << GlobalUsage.MaxUsedMappings << '\n';
+
+ for (unsigned I = 1, E = PRFUsage.size(); I < E; ++I) {
+ const RegisterFileUsage &RFU = PRFUsage[I];
+ // Obtain the register file descriptor from the scheduling model.
+ assert(STI.getSchedModel().hasExtraProcessorInfo() &&
+ "Unable to find register file info!");
+ const MCExtraProcessorInfo &PI =
+ STI.getSchedModel().getExtraProcessorInfo();
+ assert(I <= PI.NumRegisterFiles && "Unexpected register file index!");
+ const MCRegisterFileDesc &RFDesc = PI.RegisterFiles[I];
+ // Skip invalid register files.
+ if (!RFDesc.NumPhysRegs)
+ continue;
+
+ TempStream << "\n* Register File #" << I;
+ TempStream << " -- " << StringRef(RFDesc.Name) << ':';
+ TempStream << "\n Number of physical registers: ";
+ if (!RFDesc.NumPhysRegs)
+ TempStream << "unbounded";
+ else
+ TempStream << RFDesc.NumPhysRegs;
+ TempStream << "\n Total number of mappings created: "
+ << RFU.TotalMappings;
+ TempStream << "\n Max number of mappings used: "
+ << RFU.MaxUsedMappings << '\n';
+ const MoveEliminationInfo &MEI = MoveElimInfo[I];
+
+ if (MEI.TotalMoveEliminationCandidates) {
+ TempStream << " Number of optimizable moves: "
+ << MEI.TotalMoveEliminationCandidates;
+ double EliminatedMovProportion = (double)MEI.TotalMovesEliminated /
+ MEI.TotalMoveEliminationCandidates *
+ 100.0;
+ double ZeroMovProportion = (double)MEI.TotalMovesThatPropagateZero /
+ MEI.TotalMoveEliminationCandidates * 100.0;
+ TempStream << "\n Number of moves eliminated: "
+ << MEI.TotalMovesEliminated << " "
+ << format("(%.1f%%)",
+ floor((EliminatedMovProportion * 10) + 0.5) / 10);
+ TempStream << "\n Number of zero moves: "
+ << MEI.TotalMovesThatPropagateZero << " "
+ << format("(%.1f%%)",
+ floor((ZeroMovProportion * 10) + 0.5) / 10);
+ TempStream << "\n Max moves eliminated per cycle: "
+ << MEI.MaxMovesEliminatedPerCycle << '\n';
+ }
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.h b/contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.h
new file mode 100644
index 0000000000..3de2a22ac3
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.h
@@ -0,0 +1,84 @@
+//===--------------------- RegisterFileStatistics.h -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This view collects and prints register file usage statistics.
+///
+/// Example (-mcpu=btver2):
+/// ========================
+///
+/// Register File statistics:
+/// Total number of mappings created: 6
+/// Max number of mappings used: 3
+///
+/// * Register File #1 -- FpuPRF:
+/// Number of physical registers: 72
+/// Total number of mappings created: 0
+/// Max number of mappings used: 0
+/// Number of optimizable moves: 200
+/// Number of moves eliminated: 200 (100.0%)
+/// Number of zero moves: 200 (100.0%)
+/// Max moves eliminated per cycle: 2
+///
+/// * Register File #2 -- IntegerPRF:
+/// Number of physical registers: 64
+/// Total number of mappings created: 6
+/// Max number of mappings used: 3
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
+#define LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/View.h"
+
+namespace llvm {
+namespace mca {
+
+class RegisterFileStatistics : public View {
+ const llvm::MCSubtargetInfo &STI;
+
+ // Used to track the number of physical registers used in a register file.
+ struct RegisterFileUsage {
+ unsigned TotalMappings;
+ unsigned MaxUsedMappings;
+ unsigned CurrentlyUsedMappings;
+ };
+
+ struct MoveEliminationInfo {
+ unsigned TotalMoveEliminationCandidates;
+ unsigned TotalMovesEliminated;
+ unsigned TotalMovesThatPropagateZero;
+ unsigned MaxMovesEliminatedPerCycle;
+ unsigned CurrentMovesEliminated;
+ };
+
+ // There is one entry for each register file implemented by the processor.
+ llvm::SmallVector<RegisterFileUsage, 4> PRFUsage;
+ llvm::SmallVector<MoveEliminationInfo, 4> MoveElimInfo;
+
+ void updateRegisterFileUsage(ArrayRef<unsigned> UsedPhysRegs);
+ void updateMoveElimInfo(const Instruction &Inst);
+
+public:
+ RegisterFileStatistics(const llvm::MCSubtargetInfo &sti);
+
+ void onCycleEnd() override;
+ void onEvent(const HWInstructionEvent &Event) override;
+ void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override {
+ return "RegisterFileStatistics";
+ }
+ bool isSerializable() const override { return false; }
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.cpp
new file mode 100644
index 0000000000..77b3ba0b7c
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.cpp
@@ -0,0 +1,200 @@
+//===--------------------- ResourcePressureView.cpp -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements methods in the ResourcePressureView interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/ResourcePressureView.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+ResourcePressureView::ResourcePressureView(const llvm::MCSubtargetInfo &sti,
+ MCInstPrinter &Printer,
+ ArrayRef<MCInst> S)
+ : InstructionView(sti, Printer, S), LastInstructionIdx(0) {
+ // Populate the map of resource descriptors.
+ unsigned R2VIndex = 0;
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ unsigned NumUnits = ProcResource.NumUnits;
+ // Skip groups and invalid resources with zero units.
+ if (ProcResource.SubUnitsIdxBegin || !NumUnits)
+ continue;
+
+ Resource2VecIndex.insert(std::pair<unsigned, unsigned>(I, R2VIndex));
+ R2VIndex += ProcResource.NumUnits;
+ }
+
+ NumResourceUnits = R2VIndex;
+ ResourceUsage.resize(NumResourceUnits * (getSource().size() + 1));
+ std::fill(ResourceUsage.begin(), ResourceUsage.end(), 0.0);
+}
+
+void ResourcePressureView::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Dispatched) {
+ LastInstructionIdx = Event.IR.getSourceIndex();
+ return;
+ }
+
+ // We're only interested in Issue events.
+ if (Event.Type != HWInstructionEvent::Issued)
+ return;
+
+ const auto &IssueEvent = static_cast<const HWInstructionIssuedEvent &>(Event);
+ ArrayRef<llvm::MCInst> Source = getSource();
+ const unsigned SourceIdx = Event.IR.getSourceIndex() % Source.size();
+ for (const std::pair<ResourceRef, ResourceCycles> &Use :
+ IssueEvent.UsedResources) {
+ const ResourceRef &RR = Use.first;
+ assert(Resource2VecIndex.find(RR.first) != Resource2VecIndex.end());
+ unsigned R2VIndex = Resource2VecIndex[RR.first];
+ R2VIndex += countTrailingZeros(RR.second);
+ ResourceUsage[R2VIndex + NumResourceUnits * SourceIdx] += Use.second;
+ ResourceUsage[R2VIndex + NumResourceUnits * Source.size()] += Use.second;
+ }
+}
+
+static void printColumnNames(formatted_raw_ostream &OS,
+ const MCSchedModel &SM) {
+ unsigned Column = OS.getColumn();
+ for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds();
+ I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ unsigned NumUnits = ProcResource.NumUnits;
+ // Skip groups and invalid resources with zero units.
+ if (ProcResource.SubUnitsIdxBegin || !NumUnits)
+ continue;
+
+ for (unsigned J = 0; J < NumUnits; ++J) {
+ Column += 7;
+ OS << "[" << ResourceIndex;
+ if (NumUnits > 1)
+ OS << '.' << J;
+ OS << ']';
+ OS.PadToColumn(Column);
+ }
+
+ ResourceIndex++;
+ }
+}
+
+static void printResourcePressure(formatted_raw_ostream &OS, double Pressure,
+ unsigned Col) {
+ if (!Pressure || Pressure < 0.005) {
+ OS << " - ";
+ } else {
+ // Round to the value to the nearest hundredth and then print it.
+ OS << format("%.2f", floor((Pressure * 100) + 0.5) / 100);
+ }
+ OS.PadToColumn(Col);
+}
+
+void ResourcePressureView::printResourcePressurePerIter(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ formatted_raw_ostream FOS(TempStream);
+
+ FOS << "\n\nResources:\n";
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
+ for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds();
+ I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ unsigned NumUnits = ProcResource.NumUnits;
+ // Skip groups and invalid resources with zero units.
+ if (ProcResource.SubUnitsIdxBegin || !NumUnits)
+ continue;
+
+ for (unsigned J = 0; J < NumUnits; ++J) {
+ FOS << '[' << ResourceIndex;
+ if (NumUnits > 1)
+ FOS << '.' << J;
+ FOS << ']';
+ FOS.PadToColumn(6);
+ FOS << "- " << ProcResource.Name << '\n';
+ }
+
+ ResourceIndex++;
+ }
+
+ FOS << "\n\nResource pressure per iteration:\n";
+ FOS.flush();
+ printColumnNames(FOS, SM);
+ FOS << '\n';
+ FOS.flush();
+
+ ArrayRef<llvm::MCInst> Source = getSource();
+ const unsigned Executions = LastInstructionIdx / Source.size() + 1;
+ for (unsigned I = 0, E = NumResourceUnits; I < E; ++I) {
+ double Usage = ResourceUsage[I + Source.size() * E];
+ printResourcePressure(FOS, Usage / Executions, (I + 1) * 7);
+ }
+
+ FOS.flush();
+ OS << Buffer;
+}
+
+void ResourcePressureView::printResourcePressurePerInst(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ formatted_raw_ostream FOS(TempStream);
+
+ FOS << "\n\nResource pressure by instruction:\n";
+ printColumnNames(FOS, getSubTargetInfo().getSchedModel());
+ FOS << "Instructions:\n";
+
+ unsigned InstrIndex = 0;
+ ArrayRef<llvm::MCInst> Source = getSource();
+ const unsigned Executions = LastInstructionIdx / Source.size() + 1;
+ for (const MCInst &MCI : Source) {
+ unsigned BaseEltIdx = InstrIndex * NumResourceUnits;
+ for (unsigned J = 0; J < NumResourceUnits; ++J) {
+ double Usage = ResourceUsage[J + BaseEltIdx];
+ printResourcePressure(FOS, Usage / Executions, (J + 1) * 7);
+ }
+
+ FOS << printInstructionString(MCI) << '\n';
+ FOS.flush();
+ OS << Buffer;
+ Buffer = "";
+
+ ++InstrIndex;
+ }
+}
+
+json::Value ResourcePressureView::toJSON() const {
+ // We're dumping the instructions and the ResourceUsage array.
+ json::Array ResourcePressureInfo;
+
+ // The ResourceUsage matrix is sparse, so we only consider
+ // non-zero values.
+ ArrayRef<llvm::MCInst> Source = getSource();
+ const unsigned Executions = LastInstructionIdx / Source.size() + 1;
+ for (const auto &R : enumerate(ResourceUsage)) {
+ const ResourceCycles &RU = R.value();
+ if (RU.getNumerator() == 0)
+ continue;
+ unsigned InstructionIndex = R.index() / NumResourceUnits;
+ unsigned ResourceIndex = R.index() % NumResourceUnits;
+ double Usage = RU / Executions;
+ ResourcePressureInfo.push_back(
+ json::Object({{"InstructionIndex", InstructionIndex},
+ {"ResourceIndex", ResourceIndex},
+ {"ResourceUsage", Usage}}));
+ }
+
+ json::Object JO({{"ResourcePressureInfo", std::move(ResourcePressureInfo)}});
+ return JO;
+}
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.h b/contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.h
new file mode 100644
index 0000000000..c3993a08c1
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.h
@@ -0,0 +1,103 @@
+//===--------------------- ResourcePressureView.h ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file define class ResourcePressureView.
+/// Class ResourcePressureView observes hardware events generated by
+/// the Pipeline object and collects statistics related to resource usage at
+/// instruction granularity.
+/// Resource pressure information is then printed out to a stream in the
+/// form of a table like the one from the example below:
+///
+/// Resources:
+/// [0] - JALU0
+/// [1] - JALU1
+/// [2] - JDiv
+/// [3] - JFPM
+/// [4] - JFPU0
+/// [5] - JFPU1
+/// [6] - JLAGU
+/// [7] - JSAGU
+/// [8] - JSTC
+/// [9] - JVIMUL
+///
+/// Resource pressure per iteration:
+/// [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+/// 0.00 0.00 0.00 0.00 2.00 2.00 0.00 0.00 0.00 0.00
+///
+/// Resource pressure by instruction:
+/// [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+/// - - - - - 1.00 - - - - vpermilpd $1, %xmm0,
+/// %xmm1
+/// - - - - 1.00 - - - - - vaddps %xmm0, %xmm1,
+/// %xmm2
+/// - - - - - 1.00 - - - - vmovshdup %xmm2, %xmm3
+/// - - - - 1.00 - - - - - vaddss %xmm2, %xmm3,
+/// %xmm4
+///
+/// In this example, we have AVX code executed on AMD Jaguar (btver2).
+/// Both shuffles and vector floating point add operations on XMM registers have
+/// a reciprocal throughput of 1cy.
+/// Each add is issued to pipeline JFPU0, while each shuffle is issued to
+/// pipeline JFPU1. The overall pressure per iteration is reported by two
+/// tables: the first smaller table is the resource pressure per iteration;
+/// the second table reports resource pressure per instruction. Values are the
+/// average resource cycles consumed by an instruction.
+/// Every vector add from the example uses resource JFPU0 for an average of 1cy
+/// per iteration. Consequently, the resource pressure on JFPU0 is of 2cy per
+/// iteration.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H
+
+#include "Views/InstructionView.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/JSON.h"
+
+namespace llvm {
+namespace mca {
+
+/// This class collects resource pressure statistics and it is able to print
+/// out all the collected information as a table to an output stream.
+class ResourcePressureView : public InstructionView {
+ unsigned LastInstructionIdx;
+
+ // Map to quickly obtain the ResourceUsage column index from a processor
+ // resource ID.
+ llvm::DenseMap<unsigned, unsigned> Resource2VecIndex;
+
+ // Table of resources used by instructions.
+ std::vector<ResourceCycles> ResourceUsage;
+ unsigned NumResourceUnits;
+
+ void printResourcePressurePerIter(llvm::raw_ostream &OS) const;
+ void printResourcePressurePerInst(llvm::raw_ostream &OS) const;
+
+public:
+ ResourcePressureView(const llvm::MCSubtargetInfo &sti,
+ llvm::MCInstPrinter &Printer,
+ llvm::ArrayRef<llvm::MCInst> S);
+
+ void onEvent(const HWInstructionEvent &Event) override;
+ void printView(llvm::raw_ostream &OS) const override {
+ printResourcePressurePerIter(OS);
+ printResourcePressurePerInst(OS);
+ }
+ StringRef getNameAsString() const override { return "ResourcePressureView"; }
+ json::Value toJSON() const override;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
new file mode 100644
index 0000000000..1c40428fb0
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
@@ -0,0 +1,91 @@
+//===--------------------- RetireControlUnitStatistics.cpp ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the RetireControlUnitStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/RetireControlUnitStatistics.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+RetireControlUnitStatistics::RetireControlUnitStatistics(const MCSchedModel &SM)
+ : NumRetired(0), NumCycles(0), EntriesInUse(0), MaxUsedEntries(0),
+ SumOfUsedEntries(0) {
+ TotalROBEntries = SM.MicroOpBufferSize;
+ if (SM.hasExtraProcessorInfo()) {
+ const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+ if (EPI.ReorderBufferSize)
+ TotalROBEntries = EPI.ReorderBufferSize;
+ }
+}
+
+void RetireControlUnitStatistics::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Dispatched) {
+ unsigned NumEntries =
+ static_cast<const HWInstructionDispatchedEvent &>(Event).MicroOpcodes;
+ EntriesInUse += NumEntries;
+ }
+
+ if (Event.Type == HWInstructionEvent::Retired) {
+ unsigned ReleasedEntries = Event.IR.getInstruction()->getDesc().NumMicroOps;
+ assert(EntriesInUse >= ReleasedEntries && "Invalid internal state!");
+ EntriesInUse -= ReleasedEntries;
+ ++NumRetired;
+ }
+}
+
+void RetireControlUnitStatistics::onCycleEnd() {
+ // Update histogram
+ RetiredPerCycle[NumRetired]++;
+ NumRetired = 0;
+ ++NumCycles;
+ MaxUsedEntries = std::max(MaxUsedEntries, EntriesInUse);
+ SumOfUsedEntries += EntriesInUse;
+}
+
+void RetireControlUnitStatistics::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ TempStream << "\n\nRetire Control Unit - "
+ << "number of cycles where we saw N instructions retired:\n";
+ TempStream << "[# retired], [# cycles]\n";
+
+ for (const std::pair<const unsigned, unsigned> &Entry : RetiredPerCycle) {
+ TempStream << " " << Entry.first;
+ if (Entry.first < 10)
+ TempStream << ", ";
+ else
+ TempStream << ", ";
+ TempStream << Entry.second << " ("
+ << format("%.1f", ((double)Entry.second / NumCycles) * 100.0)
+ << "%)\n";
+ }
+
+ unsigned AvgUsage = (double)SumOfUsedEntries / NumCycles;
+ double MaxUsagePercentage =
+ ((double)MaxUsedEntries / TotalROBEntries) * 100.0;
+ double NormalizedMaxPercentage = floor((MaxUsagePercentage * 10) + 0.5) / 10;
+ double AvgUsagePercentage = ((double)AvgUsage / TotalROBEntries) * 100.0;
+ double NormalizedAvgPercentage = floor((AvgUsagePercentage * 10) + 0.5) / 10;
+
+ TempStream << "\nTotal ROB Entries: " << TotalROBEntries
+ << "\nMax Used ROB Entries: " << MaxUsedEntries
+ << format(" ( %.1f%% )", NormalizedMaxPercentage)
+ << "\nAverage Used ROB Entries per cy: " << AvgUsage
+ << format(" ( %.1f%% )\n", NormalizedAvgPercentage);
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.h b/contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.h
new file mode 100644
index 0000000000..ed3736c645
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.h
@@ -0,0 +1,64 @@
+//===--------------------- RetireControlUnitStatistics.h --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines class RetireControlUnitStatistics: a view that knows how
+/// to print general statistics related to the retire control unit.
+///
+/// Example:
+/// ========
+///
+/// Retire Control Unit - number of cycles where we saw N instructions retired:
+/// [# retired], [# cycles]
+/// 0, 109 (17.9%)
+/// 1, 102 (16.7%)
+/// 2, 399 (65.4%)
+///
+/// Total ROB Entries: 64
+/// Max Used ROB Entries: 35 ( 54.7% )
+/// Average Used ROB Entries per cy: 32 ( 50.0% )
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
+#define LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
+
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/View.h"
+#include <map>
+
+namespace llvm {
+namespace mca {
+
+class RetireControlUnitStatistics : public View {
+ using Histogram = std::map<unsigned, unsigned>;
+ Histogram RetiredPerCycle;
+
+ unsigned NumRetired;
+ unsigned NumCycles;
+ unsigned TotalROBEntries;
+ unsigned EntriesInUse;
+ unsigned MaxUsedEntries;
+ unsigned SumOfUsedEntries;
+
+public:
+ RetireControlUnitStatistics(const MCSchedModel &SM);
+
+ void onEvent(const HWInstructionEvent &Event) override;
+ void onCycleEnd() override;
+ void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override {
+ return "RetireControlUnitStatistics";
+ }
+ bool isSerializable() const override { return false; }
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.cpp
new file mode 100644
index 0000000000..06caeda344
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.cpp
@@ -0,0 +1,178 @@
+//===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the SchedulerStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/SchedulerStatistics.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+
+namespace llvm {
+namespace mca {
+
+SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
+ : SM(STI.getSchedModel()), LQResourceID(0), SQResourceID(0), NumIssued(0),
+ NumCycles(0), MostRecentLoadDispatched(~0U),
+ MostRecentStoreDispatched(~0U),
+ Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) {
+ if (SM.hasExtraProcessorInfo()) {
+ const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+ LQResourceID = EPI.LoadQueueID;
+ SQResourceID = EPI.StoreQueueID;
+ }
+}
+
+// FIXME: This implementation works under the assumption that load/store queue
+// entries are reserved at 'instruction dispatched' stage, and released at
+// 'instruction executed' stage. This currently matches the behavior of LSUnit.
+//
+// The current design minimizes the number of events generated by the
+// Dispatch/Execute stages, at the cost of doing extra bookkeeping in method
+// `onEvent`. However, it introduces a subtle dependency between this view and
+// how the LSUnit works.
+//
+// In future we should add a new "memory queue" event type, so that we stop
+// making assumptions on how LSUnit internally works (See PR39828).
+void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Issued) {
+ const Instruction &Inst = *Event.IR.getInstruction();
+ NumIssued += Inst.getDesc().NumMicroOps;
+ } else if (Event.Type == HWInstructionEvent::Dispatched) {
+ const Instruction &Inst = *Event.IR.getInstruction();
+ const unsigned Index = Event.IR.getSourceIndex();
+ if (LQResourceID && Inst.getMayLoad() &&
+ MostRecentLoadDispatched != Index) {
+ Usage[LQResourceID].SlotsInUse++;
+ MostRecentLoadDispatched = Index;
+ }
+ if (SQResourceID && Inst.getMayStore() &&
+ MostRecentStoreDispatched != Index) {
+ Usage[SQResourceID].SlotsInUse++;
+ MostRecentStoreDispatched = Index;
+ }
+ } else if (Event.Type == HWInstructionEvent::Executed) {
+ const Instruction &Inst = *Event.IR.getInstruction();
+ if (LQResourceID && Inst.getMayLoad()) {
+ assert(Usage[LQResourceID].SlotsInUse);
+ Usage[LQResourceID].SlotsInUse--;
+ }
+ if (SQResourceID && Inst.getMayStore()) {
+ assert(Usage[SQResourceID].SlotsInUse);
+ Usage[SQResourceID].SlotsInUse--;
+ }
+ }
+}
+
+void SchedulerStatistics::onReservedBuffers(const InstRef & /* unused */,
+ ArrayRef<unsigned> Buffers) {
+ for (const unsigned Buffer : Buffers) {
+ if (Buffer == LQResourceID || Buffer == SQResourceID)
+ continue;
+ Usage[Buffer].SlotsInUse++;
+ }
+}
+
+void SchedulerStatistics::onReleasedBuffers(const InstRef & /* unused */,
+ ArrayRef<unsigned> Buffers) {
+ for (const unsigned Buffer : Buffers) {
+ if (Buffer == LQResourceID || Buffer == SQResourceID)
+ continue;
+ Usage[Buffer].SlotsInUse--;
+ }
+}
+
+void SchedulerStatistics::updateHistograms() {
+ for (BufferUsage &BU : Usage) {
+ BU.CumulativeNumUsedSlots += BU.SlotsInUse;
+ BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse);
+ }
+
+ IssueWidthPerCycle[NumIssued]++;
+ NumIssued = 0;
+}
+
+void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const {
+ OS << "\n\nSchedulers - "
+ << "number of cycles where we saw N micro opcodes issued:\n";
+ OS << "[# issued], [# cycles]\n";
+
+ bool HasColors = OS.has_colors();
+ const auto It =
+ std::max_element(IssueWidthPerCycle.begin(), IssueWidthPerCycle.end());
+ for (const std::pair<const unsigned, unsigned> &Entry : IssueWidthPerCycle) {
+ unsigned NumIssued = Entry.first;
+ if (NumIssued == It->first && HasColors)
+ OS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+
+ unsigned IPC = Entry.second;
+ OS << " " << NumIssued << ", " << IPC << " ("
+ << format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n";
+ if (HasColors)
+ OS.resetColor();
+ }
+}
+
+void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const {
+ assert(NumCycles && "Unexpected number of cycles!");
+
+ OS << "\nScheduler's queue usage:\n";
+ if (all_of(Usage, [](const BufferUsage &BU) { return !BU.MaxUsedSlots; })) {
+ OS << "No scheduler resources used.\n";
+ return;
+ }
+
+ OS << "[1] Resource name.\n"
+ << "[2] Average number of used buffer entries.\n"
+ << "[3] Maximum number of used buffer entries.\n"
+ << "[4] Total number of buffer entries.\n\n"
+ << " [1] [2] [3] [4]\n";
+
+ formatted_raw_ostream FOS(OS);
+ bool HasColors = FOS.has_colors();
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ if (ProcResource.BufferSize <= 0)
+ continue;
+
+ const BufferUsage &BU = Usage[I];
+ double AvgUsage = (double)BU.CumulativeNumUsedSlots / NumCycles;
+ double AlmostFullThreshold = (double)(ProcResource.BufferSize * 4) / 5;
+ unsigned NormalizedAvg = floor((AvgUsage * 10) + 0.5) / 10;
+ unsigned NormalizedThreshold = floor((AlmostFullThreshold * 10) + 0.5) / 10;
+
+ FOS << ProcResource.Name;
+ FOS.PadToColumn(17);
+ if (HasColors && NormalizedAvg >= NormalizedThreshold)
+ FOS.changeColor(raw_ostream::YELLOW, true, false);
+ FOS << NormalizedAvg;
+ if (HasColors)
+ FOS.resetColor();
+ FOS.PadToColumn(28);
+ if (HasColors &&
+ BU.MaxUsedSlots == static_cast<unsigned>(ProcResource.BufferSize))
+ FOS.changeColor(raw_ostream::RED, true, false);
+ FOS << BU.MaxUsedSlots;
+ if (HasColors)
+ FOS.resetColor();
+ FOS.PadToColumn(39);
+ FOS << ProcResource.BufferSize << '\n';
+ }
+
+ FOS.flush();
+}
+
+void SchedulerStatistics::printView(raw_ostream &OS) const {
+ printSchedulerStats(OS);
+ printSchedulerUsage(OS);
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.h b/contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.h
new file mode 100644
index 0000000000..9d2f71c13e
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.h
@@ -0,0 +1,97 @@
+//===--------------------- SchedulerStatistics.h ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines class SchedulerStatistics. Class SchedulerStatistics is a
+/// View that listens to instruction issue events in order to print general
+/// statistics related to the hardware schedulers.
+///
+/// Example:
+/// ========
+///
+/// Schedulers - number of cycles where we saw N instructions issued:
+/// [# issued], [# cycles]
+/// 0, 6 (2.9%)
+/// 1, 106 (50.7%)
+/// 2, 97 (46.4%)
+///
+/// Scheduler's queue usage:
+/// [1] Resource name.
+/// [2] Average number of used buffer entries.
+/// [3] Maximum number of used buffer entries.
+/// [4] Total number of buffer entries.
+///
+/// [1] [2] [3] [4]
+/// JALU01 0 0 20
+/// JFPU01 15 18 18
+/// JLSAGU 0 0 12
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
+#define LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/View.h"
+#include <map>
+
+namespace llvm {
+namespace mca {
+
+class SchedulerStatistics final : public View {
+ const llvm::MCSchedModel &SM;
+ unsigned LQResourceID;
+ unsigned SQResourceID;
+
+ unsigned NumIssued;
+ unsigned NumCycles;
+
+ unsigned MostRecentLoadDispatched;
+ unsigned MostRecentStoreDispatched;
+
+ // Tracks the usage of a scheduler's queue.
+ struct BufferUsage {
+ unsigned SlotsInUse;
+ unsigned MaxUsedSlots;
+ uint64_t CumulativeNumUsedSlots;
+ };
+
+ using Histogram = std::map<unsigned, unsigned>;
+ Histogram IssueWidthPerCycle;
+
+ std::vector<BufferUsage> Usage;
+
+ void updateHistograms();
+ void printSchedulerStats(llvm::raw_ostream &OS) const;
+ void printSchedulerUsage(llvm::raw_ostream &OS) const;
+
+public:
+ SchedulerStatistics(const llvm::MCSubtargetInfo &STI);
+ void onEvent(const HWInstructionEvent &Event) override;
+ void onCycleBegin() override { NumCycles++; }
+ void onCycleEnd() override { updateHistograms(); }
+
+ // Increases the number of used scheduler queue slots of every buffered
+ // resource in the Buffers set.
+ void onReservedBuffers(const InstRef &IR,
+ llvm::ArrayRef<unsigned> Buffers) override;
+
+ // Decreases by one the number of used scheduler queue slots of every
+ // buffered resource in the Buffers set.
+ void onReleasedBuffers(const InstRef &IR,
+ llvm::ArrayRef<unsigned> Buffers) override;
+
+ void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override { return "SchedulerStatistics"; }
+ bool isSerializable() const override { return false; }
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.cpp
new file mode 100644
index 0000000000..bf258b4c26
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.cpp
@@ -0,0 +1,113 @@
+//===--------------------- SummaryView.cpp ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the functionalities used by the SummaryView to print
+/// the report information.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/SummaryView.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+SummaryView::SummaryView(const MCSchedModel &Model, ArrayRef<MCInst> S,
+ unsigned Width)
+ : SM(Model), Source(S), DispatchWidth(Width ? Width : Model.IssueWidth),
+ LastInstructionIdx(0), TotalCycles(0), NumMicroOps(0),
+ ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
+ ProcResourceMasks(Model.getNumProcResourceKinds()),
+ ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0) {
+ computeProcResourceMasks(SM, ProcResourceMasks);
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ unsigned Index = getResourceStateIndex(ProcResourceMasks[I]);
+ ResIdx2ProcResID[Index] = I;
+ }
+}
+
+void SummaryView::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Dispatched)
+ LastInstructionIdx = Event.IR.getSourceIndex();
+
+ // We are only interested in the "instruction retired" events generated by
+ // the retire stage for instructions that are part of iteration #0.
+ if (Event.Type != HWInstructionEvent::Retired ||
+ Event.IR.getSourceIndex() >= Source.size())
+ return;
+
+ // Update the cumulative number of resource cycles based on the processor
+ // resource usage information available from the instruction descriptor. We
+ // need to compute the cumulative number of resource cycles for every
+ // processor resource which is consumed by an instruction of the block.
+ const Instruction &Inst = *Event.IR.getInstruction();
+ const InstrDesc &Desc = Inst.getDesc();
+ NumMicroOps += Desc.NumMicroOps;
+ for (const std::pair<uint64_t, ResourceUsage> &RU : Desc.Resources) {
+ if (RU.second.size()) {
+ unsigned ProcResID = ResIdx2ProcResID[getResourceStateIndex(RU.first)];
+ ProcResourceUsage[ProcResID] += RU.second.size();
+ }
+ }
+}
+
+void SummaryView::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ DisplayValues DV;
+
+ collectData(DV);
+ TempStream << "Iterations: " << DV.Iterations;
+ TempStream << "\nInstructions: " << DV.TotalInstructions;
+ TempStream << "\nTotal Cycles: " << DV.TotalCycles;
+ TempStream << "\nTotal uOps: " << DV.TotalUOps << '\n';
+ TempStream << "\nDispatch Width: " << DV.DispatchWidth;
+ TempStream << "\nuOps Per Cycle: "
+ << format("%.2f", floor((DV.UOpsPerCycle * 100) + 0.5) / 100);
+ TempStream << "\nIPC: "
+ << format("%.2f", floor((DV.IPC * 100) + 0.5) / 100);
+ TempStream << "\nBlock RThroughput: "
+ << format("%.1f", floor((DV.BlockRThroughput * 10) + 0.5) / 10)
+ << '\n';
+ TempStream.flush();
+ OS << Buffer;
+}
+
+void SummaryView::collectData(DisplayValues &DV) const {
+ DV.Instructions = Source.size();
+ DV.Iterations = (LastInstructionIdx / DV.Instructions) + 1;
+ DV.TotalInstructions = DV.Instructions * DV.Iterations;
+ DV.TotalCycles = TotalCycles;
+ DV.DispatchWidth = DispatchWidth;
+ DV.TotalUOps = NumMicroOps * DV.Iterations;
+ DV.UOpsPerCycle = (double)DV.TotalUOps / TotalCycles;
+ DV.IPC = (double)DV.TotalInstructions / TotalCycles;
+ DV.BlockRThroughput = computeBlockRThroughput(SM, DispatchWidth, NumMicroOps,
+ ProcResourceUsage);
+}
+
+json::Value SummaryView::toJSON() const {
+ DisplayValues DV;
+ collectData(DV);
+ json::Object JO({{"Iterations", DV.Iterations},
+ {"Instructions", DV.TotalInstructions},
+ {"TotalCycles", DV.TotalCycles},
+ {"TotaluOps", DV.TotalUOps},
+ {"DispatchWidth", DV.DispatchWidth},
+ {"uOpsPerCycle", DV.UOpsPerCycle},
+ {"IPC", DV.IPC},
+ {"BlockRThroughput", DV.BlockRThroughput}});
+ return JO;
+}
+} // namespace mca.
+} // namespace llvm
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.h b/contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.h
new file mode 100644
index 0000000000..21f3fad23c
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.h
@@ -0,0 +1,90 @@
+//===--------------------- SummaryView.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the summary view.
+///
+/// The goal of the summary view is to give a very quick overview of the
+/// performance throughput. Below is an example of summary view:
+///
+///
+/// Iterations: 300
+/// Instructions: 900
+/// Total Cycles: 610
+/// Dispatch Width: 2
+/// IPC: 1.48
+/// Block RThroughput: 2.0
+///
+/// The summary view collects a few performance numbers. The two main
+/// performance indicators are 'Total Cycles' and IPC (Instructions Per Cycle).
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/View.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+/// A view that collects and prints a few performance numbers.
+class SummaryView : public View {
+ const llvm::MCSchedModel &SM;
+ llvm::ArrayRef<llvm::MCInst> Source;
+ const unsigned DispatchWidth;
+ unsigned LastInstructionIdx;
+ unsigned TotalCycles;
+ // The total number of micro opcodes contributed by a block of instructions.
+ unsigned NumMicroOps;
+
+ struct DisplayValues {
+ unsigned Instructions;
+ unsigned Iterations;
+ unsigned TotalInstructions;
+ unsigned TotalCycles;
+ unsigned DispatchWidth;
+ unsigned TotalUOps;
+ double IPC;
+ double UOpsPerCycle;
+ double BlockRThroughput;
+ };
+
+ // For each processor resource, this vector stores the cumulative number of
+ // resource cycles consumed by the analyzed code block.
+ llvm::SmallVector<unsigned, 8> ProcResourceUsage;
+
+ // Each processor resource is associated with a so-called processor resource
+ // mask. This vector allows to correlate processor resource IDs with processor
+ // resource masks. There is exactly one element per each processor resource
+ // declared by the scheduling model.
+ llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
+
+ // Used to map resource indices to actual processor resource IDs.
+ llvm::SmallVector<unsigned, 8> ResIdx2ProcResID;
+
+ /// Compute the data we want to print out in the object DV.
+ void collectData(DisplayValues &DV) const;
+
+public:
+ SummaryView(const llvm::MCSchedModel &Model, llvm::ArrayRef<llvm::MCInst> S,
+ unsigned Width);
+
+ void onCycleEnd() override { ++TotalCycles; }
+ void onEvent(const HWInstructionEvent &Event) override;
+ void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override { return "SummaryView"; }
+ json::Value toJSON() const override;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.cpp
new file mode 100644
index 0000000000..5c05edbdea
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.cpp
@@ -0,0 +1,328 @@
+//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \brief
+///
+/// This file implements the TimelineView interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/TimelineView.h"
+#include <numeric>
+
+namespace llvm {
+namespace mca {
+
+TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,
+ llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,
+ unsigned Cycles)
+ : InstructionView(sti, Printer, S), CurrentCycle(0),
+ MaxCycle(Cycles == 0 ? std::numeric_limits<unsigned>::max() : Cycles),
+ LastCycle(0), WaitTime(S.size()), UsedBuffer(S.size()) {
+ unsigned NumInstructions = getSource().size();
+ assert(Iterations && "Invalid number of iterations specified!");
+ NumInstructions *= Iterations;
+ Timeline.resize(NumInstructions);
+ TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0, 0};
+ std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry);
+
+ WaitTimeEntry NullWTEntry = {0, 0, 0};
+ std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);
+
+ std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0,
+ /* unknown buffer size */ -1};
+ std::fill(UsedBuffer.begin(), UsedBuffer.end(), NullUsedBufferEntry);
+}
+
+void TimelineView::onReservedBuffers(const InstRef &IR,
+ ArrayRef<unsigned> Buffers) {
+ if (IR.getSourceIndex() >= getSource().size())
+ return;
+
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
+ std::pair<unsigned, int> BufferInfo = {0, -1};
+ for (const unsigned Buffer : Buffers) {
+ const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer);
+ if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) {
+ BufferInfo.first = Buffer;
+ BufferInfo.second = MCDesc.BufferSize;
+ }
+ }
+
+ UsedBuffer[IR.getSourceIndex()] = BufferInfo;
+}
+
+void TimelineView::onEvent(const HWInstructionEvent &Event) {
+ const unsigned Index = Event.IR.getSourceIndex();
+ if (Index >= Timeline.size())
+ return;
+
+ switch (Event.Type) {
+ case HWInstructionEvent::Retired: {
+ TimelineViewEntry &TVEntry = Timeline[Index];
+ if (CurrentCycle < MaxCycle)
+ TVEntry.CycleRetired = CurrentCycle;
+
+ // Update the WaitTime entry which corresponds to this Index.
+ assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!");
+ unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched);
+ WaitTimeEntry &WTEntry = WaitTime[Index % getSource().size()];
+ WTEntry.CyclesSpentInSchedulerQueue +=
+ TVEntry.CycleIssued - CycleDispatched;
+ assert(CycleDispatched <= TVEntry.CycleReady &&
+ "Instruction cannot be ready if it hasn't been dispatched yet!");
+ WTEntry.CyclesSpentInSQWhileReady +=
+ TVEntry.CycleIssued - TVEntry.CycleReady;
+ if (CurrentCycle > TVEntry.CycleExecuted) {
+ WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
+ (CurrentCycle - 1) - TVEntry.CycleExecuted;
+ }
+ break;
+ }
+ case HWInstructionEvent::Ready:
+ Timeline[Index].CycleReady = CurrentCycle;
+ break;
+ case HWInstructionEvent::Issued:
+ Timeline[Index].CycleIssued = CurrentCycle;
+ break;
+ case HWInstructionEvent::Executed:
+ Timeline[Index].CycleExecuted = CurrentCycle;
+ break;
+ case HWInstructionEvent::Dispatched:
+ // There may be multiple dispatch events. Microcoded instructions that are
+ // expanded into multiple uOps may require multiple dispatch cycles. Here,
+ // we want to capture the first dispatch cycle.
+ if (Timeline[Index].CycleDispatched == -1)
+ Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle);
+ break;
+ default:
+ return;
+ }
+ if (CurrentCycle < MaxCycle)
+ LastCycle = std::max(LastCycle, CurrentCycle);
+}
+
+static raw_ostream::Colors chooseColor(unsigned CumulativeCycles,
+ unsigned Executions, int BufferSize) {
+ if (CumulativeCycles && BufferSize < 0)
+ return raw_ostream::MAGENTA;
+ unsigned Size = static_cast<unsigned>(BufferSize);
+ if (CumulativeCycles >= Size * Executions)
+ return raw_ostream::RED;
+ if ((CumulativeCycles * 2) >= Size * Executions)
+ return raw_ostream::YELLOW;
+ return raw_ostream::SAVEDCOLOR;
+}
+
+static void tryChangeColor(raw_ostream &OS, unsigned Cycles,
+ unsigned Executions, int BufferSize) {
+ if (!OS.has_colors())
+ return;
+
+ raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize);
+ if (Color == raw_ostream::SAVEDCOLOR) {
+ OS.resetColor();
+ return;
+ }
+ OS.changeColor(Color, /* bold */ true, /* BG */ false);
+}
+
+void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
+ const WaitTimeEntry &Entry,
+ unsigned SourceIndex,
+ unsigned Executions) const {
+ bool PrintingTotals = SourceIndex == getSource().size();
+ unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions;
+
+ if (!PrintingTotals)
+ OS << SourceIndex << '.';
+
+ OS.PadToColumn(7);
+
+ double AverageTime1, AverageTime2, AverageTime3;
+ AverageTime1 =
+ (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions;
+ AverageTime2 =
+ (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions;
+ AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) /
+ CumulativeExecutions;
+
+ OS << Executions;
+ OS.PadToColumn(13);
+
+ int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second;
+ if (!PrintingTotals)
+ tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions,
+ BufferSize);
+ OS << format("%.1f", floor(AverageTime1 + 0.5) / 10);
+ OS.PadToColumn(20);
+ if (!PrintingTotals)
+ tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions,
+ BufferSize);
+ OS << format("%.1f", floor(AverageTime2 + 0.5) / 10);
+ OS.PadToColumn(27);
+ if (!PrintingTotals)
+ tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire,
+ CumulativeExecutions,
+ getSubTargetInfo().getSchedModel().MicroOpBufferSize);
+ OS << format("%.1f", floor(AverageTime3 + 0.5) / 10);
+
+ if (OS.has_colors())
+ OS.resetColor();
+ OS.PadToColumn(34);
+}
+
+void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
+ std::string Header =
+ "\n\nAverage Wait times (based on the timeline view):\n"
+ "[0]: Executions\n"
+ "[1]: Average time spent waiting in a scheduler's queue\n"
+ "[2]: Average time spent waiting in a scheduler's queue while ready\n"
+ "[3]: Average time elapsed from WB until retire stage\n\n"
+ " [0] [1] [2] [3]\n";
+ OS << Header;
+ formatted_raw_ostream FOS(OS);
+ unsigned Executions = Timeline.size() / getSource().size();
+ unsigned IID = 0;
+ for (const MCInst &Inst : getSource()) {
+ printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions);
+ FOS << " " << printInstructionString(Inst) << '\n';
+ FOS.flush();
+ ++IID;
+ }
+
+ // If the timeline contains more than one instruction,
+ // let's also print global averages.
+ if (getSource().size() != 1) {
+ WaitTimeEntry TotalWaitTime = std::accumulate(
+ WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0},
+ [](const WaitTimeEntry &A, const WaitTimeEntry &B) {
+ return WaitTimeEntry{
+ A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue,
+ A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady,
+ A.CyclesSpentAfterWBAndBeforeRetire +
+ B.CyclesSpentAfterWBAndBeforeRetire};
+ });
+ printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions);
+ FOS << " "
+ << "<total>" << '\n';
+ FOS.flush();
+ }
+}
+
+void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
+ const TimelineViewEntry &Entry,
+ unsigned Iteration,
+ unsigned SourceIndex) const {
+ if (Iteration == 0 && SourceIndex == 0)
+ OS << '\n';
+ OS << '[' << Iteration << ',' << SourceIndex << ']';
+ OS.PadToColumn(10);
+ assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!");
+ unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched);
+ for (unsigned I = 0, E = CycleDispatched; I < E; ++I)
+ OS << ((I % 5 == 0) ? '.' : ' ');
+ OS << TimelineView::DisplayChar::Dispatched;
+ if (CycleDispatched != Entry.CycleExecuted) {
+ // Zero latency instructions have the same value for CycleDispatched,
+ // CycleIssued and CycleExecuted.
+ for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I)
+ OS << TimelineView::DisplayChar::Waiting;
+ if (Entry.CycleIssued == Entry.CycleExecuted)
+ OS << TimelineView::DisplayChar::DisplayChar::Executed;
+ else {
+ if (CycleDispatched != Entry.CycleIssued)
+ OS << TimelineView::DisplayChar::Executing;
+ for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E;
+ ++I)
+ OS << TimelineView::DisplayChar::Executing;
+ OS << TimelineView::DisplayChar::Executed;
+ }
+ }
+
+ for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)
+ OS << TimelineView::DisplayChar::RetireLag;
+ if (Entry.CycleExecuted < Entry.CycleRetired)
+ OS << TimelineView::DisplayChar::Retired;
+
+ // Skip other columns.
+ for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)
+ OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' ');
+}
+
+static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) {
+ OS << "\n\nTimeline view:\n";
+ if (Cycles >= 10) {
+ OS.PadToColumn(10);
+ for (unsigned I = 0; I <= Cycles; ++I) {
+ if (((I / 10) & 1) == 0)
+ OS << ' ';
+ else
+ OS << I % 10;
+ }
+ OS << '\n';
+ }
+
+ OS << "Index";
+ OS.PadToColumn(10);
+ for (unsigned I = 0; I <= Cycles; ++I) {
+ if (((I / 10) & 1) == 0)
+ OS << I % 10;
+ else
+ OS << ' ';
+ }
+ OS << '\n';
+}
+
+void TimelineView::printTimeline(raw_ostream &OS) const {
+ formatted_raw_ostream FOS(OS);
+ printTimelineHeader(FOS, LastCycle);
+ FOS.flush();
+
+ unsigned IID = 0;
+ ArrayRef<llvm::MCInst> Source = getSource();
+ const unsigned Iterations = Timeline.size() / Source.size();
+ for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) {
+ for (const MCInst &Inst : Source) {
+ const TimelineViewEntry &Entry = Timeline[IID];
+ // When an instruction is retired after timeline-max-cycles,
+ // its CycleRetired is left at 0. However, it's possible for
+ // a 0 latency instruction to be retired during cycle 0 and we
+ // don't want to early exit in that case. The CycleExecuted
+ // attribute is set correctly whether or not it is greater
+ // than timeline-max-cycles so we can use that to ensure
+ // we don't early exit because of a 0 latency instruction.
+ if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0) {
+ FOS << "Truncated display due to cycle limit\n";
+ return;
+ }
+
+ unsigned SourceIndex = IID % Source.size();
+ printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
+ FOS << " " << printInstructionString(Inst) << '\n';
+ FOS.flush();
+
+ ++IID;
+ }
+ }
+}
+
+json::Value TimelineView::toJSON() const {
+ json::Array TimelineInfo;
+
+ for (const TimelineViewEntry &TLE : Timeline) {
+ TimelineInfo.push_back(
+ json::Object({{"CycleDispatched", TLE.CycleDispatched},
+ {"CycleReady", TLE.CycleReady},
+ {"CycleIssued", TLE.CycleIssued},
+ {"CycleExecuted", TLE.CycleExecuted},
+ {"CycleRetired", TLE.CycleRetired}}));
+ }
+ return json::Object({{"TimelineInfo", std::move(TimelineInfo)}});
+}
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.h b/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.h
new file mode 100644
index 0000000000..81be8244b7
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.h
@@ -0,0 +1,188 @@
+//===--------------------- TimelineView.h -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \brief
+///
+/// This file implements a timeline view for the llvm-mca tool.
+///
+/// Class TimelineView observes events generated by the pipeline. For every
+/// instruction executed by the pipeline, it stores information related to
+/// state transition. It then plots that information in the form of a table
+/// as reported by the example below:
+///
+/// Timeline view:
+/// 0123456
+/// Index 0123456789
+///
+/// [0,0] DeER . . .. vmovshdup %xmm0, %xmm1
+/// [0,1] DeER . . .. vpermilpd $1, %xmm0, %xmm2
+/// [0,2] .DeER. . .. vpermilps $231, %xmm0, %xmm5
+/// [0,3] .DeeeER . .. vaddss %xmm1, %xmm0, %xmm3
+/// [0,4] . D==eeeER. .. vaddss %xmm3, %xmm2, %xmm4
+/// [0,5] . D=====eeeER .. vaddss %xmm4, %xmm5, %xmm6
+///
+/// [1,0] . DeE------R .. vmovshdup %xmm0, %xmm1
+/// [1,1] . DeE------R .. vpermilpd $1, %xmm0, %xmm2
+/// [1,2] . DeE-----R .. vpermilps $231, %xmm0, %xmm5
+/// [1,3] . D=eeeE--R .. vaddss %xmm1, %xmm0, %xmm3
+/// [1,4] . D===eeeER .. vaddss %xmm3, %xmm2, %xmm4
+/// [1,5] . D======eeeER vaddss %xmm4, %xmm5, %xmm6
+///
+/// There is an entry for every instruction in the input assembly sequence.
+/// The first field is a pair of numbers obtained from the instruction index.
+/// The first element of the pair is the iteration index, while the second
+/// element of the pair is a sequence number (i.e. a position in the assembly
+/// sequence).
+/// The second field of the table is the actual timeline information; each
+/// column is the information related to a specific cycle of execution.
+/// The timeline of an instruction is described by a sequence of character
+/// where each character represents the instruction state at a specific cycle.
+///
+/// Possible instruction states are:
+/// D: Instruction Dispatched
+/// e: Instruction Executing
+/// E: Instruction Executed (write-back stage)
+/// R: Instruction retired
+/// =: Instruction waiting in the Scheduler's queue
+/// -: Instruction executed, waiting to retire in order.
+///
+/// dots ('.') and empty spaces are cycles where the instruction is not
+/// in-flight.
+///
+/// The last column is the assembly instruction associated to the entry.
+///
+/// Based on the timeline view information from the example, instruction 0
+/// at iteration 0 was dispatched at cycle 0, and was retired at cycle 3.
+/// Instruction [0,1] was also dispatched at cycle 0, and it retired at
+/// the same cycle than instruction [0,0].
+/// Instruction [0,4] has been dispatched at cycle 2. However, it had to
+/// wait for two cycles before being issued. That is because operands
+/// became ready only at cycle 5.
+///
+/// This view helps further understanding bottlenecks and the impact of
+/// resource pressure on the code.
+///
+/// To better understand why instructions had to wait for multiple cycles in
+/// the scheduler's queue, class TimelineView also reports extra timing info
+/// in another table named "Average Wait times" (see example below).
+///
+///
+/// Average Wait times (based on the timeline view):
+/// [0]: Executions
+/// [1]: Average time spent waiting in a scheduler's queue
+/// [2]: Average time spent waiting in a scheduler's queue while ready
+/// [3]: Average time elapsed from WB until retire stage
+///
+/// [0] [1] [2] [3]
+/// 0. 2 1.0 1.0 3.0 vmovshdup %xmm0, %xmm1
+/// 1. 2 1.0 1.0 3.0 vpermilpd $1, %xmm0, %xmm2
+/// 2. 2 1.0 1.0 2.5 vpermilps $231, %xmm0, %xmm5
+/// 3. 2 1.5 0.5 1.0 vaddss %xmm1, %xmm0, %xmm3
+/// 4. 2 3.5 0.0 0.0 vaddss %xmm3, %xmm2, %xmm4
+/// 5. 2 6.5 0.0 0.0 vaddss %xmm4, %xmm5, %xmm6
+/// 2 2.4 0.6 1.6 <total>
+///
+/// By comparing column [2] with column [1], we get an idea about how many
+/// cycles were spent in the scheduler's queue due to data dependencies.
+///
+/// In this example, instruction 5 spent an average of ~6 cycles in the
+/// scheduler's queue. As soon as operands became ready, the instruction
+/// was immediately issued to the pipeline(s).
+/// That is expected because instruction 5 cannot transition to the "ready"
+/// state until %xmm4 is written by instruction 4.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
+
+#include "Views/InstructionView.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+/// This class listens to instruction state transition events
+/// in order to construct a timeline information.
+///
+/// For every instruction executed by the Pipeline, this class constructs
+/// a TimelineViewEntry object. TimelineViewEntry objects are then used
+/// to print the timeline information, as well as the "average wait times"
+/// for every instruction in the input assembly sequence.
+class TimelineView : public InstructionView {
+ unsigned CurrentCycle;
+ unsigned MaxCycle;
+ unsigned LastCycle;
+
+ struct TimelineViewEntry {
+ int CycleDispatched; // A negative value is an "invalid cycle".
+ unsigned CycleReady;
+ unsigned CycleIssued;
+ unsigned CycleExecuted;
+ unsigned CycleRetired;
+ };
+ std::vector<TimelineViewEntry> Timeline;
+
+ struct WaitTimeEntry {
+ unsigned CyclesSpentInSchedulerQueue;
+ unsigned CyclesSpentInSQWhileReady;
+ unsigned CyclesSpentAfterWBAndBeforeRetire;
+ };
+ std::vector<WaitTimeEntry> WaitTime;
+
+ // This field is used to map instructions to buffered resources.
+ // Elements of this vector are <resourceID, BufferSizer> pairs.
+ std::vector<std::pair<unsigned, int>> UsedBuffer;
+
+ void printTimelineViewEntry(llvm::formatted_raw_ostream &OS,
+ const TimelineViewEntry &E, unsigned Iteration,
+ unsigned SourceIndex) const;
+ void printWaitTimeEntry(llvm::formatted_raw_ostream &OS,
+ const WaitTimeEntry &E, unsigned Index,
+ unsigned Executions) const;
+
+ // Display characters for the TimelineView report output.
+ struct DisplayChar {
+ static const char Dispatched = 'D';
+ static const char Executed = 'E';
+ static const char Retired = 'R';
+ static const char Waiting = '='; // Instruction is waiting in the scheduler.
+ static const char Executing = 'e';
+ static const char RetireLag = '-'; // The instruction is waiting to retire.
+ };
+
+public:
+ TimelineView(const llvm::MCSubtargetInfo &sti, llvm::MCInstPrinter &Printer,
+ llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,
+ unsigned Cycles);
+
+ // Event handlers.
+ void onCycleEnd() override { ++CurrentCycle; }
+ void onEvent(const HWInstructionEvent &Event) override;
+ void onReservedBuffers(const InstRef &IR,
+ llvm::ArrayRef<unsigned> Buffers) override;
+
+ // print functionalities.
+ void printTimeline(llvm::raw_ostream &OS) const;
+ void printAverageWaitTimes(llvm::raw_ostream &OS) const;
+ void printView(llvm::raw_ostream &OS) const override {
+ printTimeline(OS);
+ printAverageWaitTimes(OS);
+ }
+ StringRef getNameAsString() const override { return "TimelineView"; }
+ json::Value toJSON() const override;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm16/tools/llvm-mca/llvm-mca.cpp b/contrib/libs/llvm16/tools/llvm-mca/llvm-mca.cpp
new file mode 100644
index 0000000000..73c341891a
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/llvm-mca.cpp
@@ -0,0 +1,761 @@
+//===-- llvm-mca.cpp - Machine Code Analyzer -------------------*- C++ -* -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility is a simple driver that allows static performance analysis on
+// machine code similarly to how IACA (Intel Architecture Code Analyzer) works.
+//
+// llvm-mca [options] <file-name>
+// -march <type>
+// -mcpu <cpu>
+// -o <file>
+//
+// The target defaults to the host target.
+// The cpu defaults to the 'native' host cpu.
+// The output defaults to standard output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeRegion.h"
+#include "CodeRegionGenerator.h"
+#include "PipelinePrinter.h"
+#include "Views/BottleneckAnalysis.h"
+#include "Views/DispatchStatistics.h"
+#include "Views/InstructionInfoView.h"
+#include "Views/RegisterFileStatistics.h"
+#include "Views/ResourcePressureView.h"
+#include "Views/RetireControlUnitStatistics.h"
+#include "Views/SchedulerStatistics.h"
+#include "Views/SummaryView.h"
+#include "Views/TimelineView.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/MCA/CodeEmitter.h"
+#include "llvm/MCA/Context.h"
+#include "llvm/MCA/CustomBehaviour.h"
+#include "llvm/MCA/InstrBuilder.h"
+#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/Stages/EntryStage.h"
+#include "llvm/MCA/Stages/InstructionTables.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/WithColor.h"
+
+using namespace llvm;
+
+static mc::RegisterMCTargetOptionsFlags MOF;
+
+static cl::OptionCategory ToolOptions("Tool Options");
+static cl::OptionCategory ViewOptions("View Options");
+
+static cl::opt<std::string> InputFilename(cl::Positional,
+ cl::desc("<input file>"),
+ cl::cat(ToolOptions), cl::init("-"));
+
+static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"),
+ cl::init("-"), cl::cat(ToolOptions),
+ cl::value_desc("filename"));
+
+static cl::opt<std::string>
+ ArchName("march",
+ cl::desc("Target architecture. "
+ "See -version for available targets"),
+ cl::cat(ToolOptions));
+
+static cl::opt<std::string>
+ TripleName("mtriple",
+ cl::desc("Target triple. See -version for available targets"),
+ cl::cat(ToolOptions));
+
+static cl::opt<std::string>
+ MCPU("mcpu",
+ cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+ cl::value_desc("cpu-name"), cl::cat(ToolOptions), cl::init("native"));
+
+static cl::list<std::string>
+ MATTRS("mattr", cl::CommaSeparated,
+ cl::desc("Target specific attributes (-mattr=help for details)"),
+ cl::value_desc("a1,+a2,-a3,..."), cl::cat(ToolOptions));
+
+static cl::opt<bool> PrintJson("json",
+ cl::desc("Print the output in json format"),
+ cl::cat(ToolOptions), cl::init(false));
+
+static cl::opt<int>
+ OutputAsmVariant("output-asm-variant",
+ cl::desc("Syntax variant to use for output printing"),
+ cl::cat(ToolOptions), cl::init(-1));
+
+static cl::opt<bool>
+ PrintImmHex("print-imm-hex", cl::cat(ToolOptions), cl::init(false),
+ cl::desc("Prefer hex format when printing immediate values"));
+
+static cl::opt<unsigned> Iterations("iterations",
+ cl::desc("Number of iterations to run"),
+ cl::cat(ToolOptions), cl::init(0));
+
+static cl::opt<unsigned>
+ DispatchWidth("dispatch", cl::desc("Override the processor dispatch width"),
+ cl::cat(ToolOptions), cl::init(0));
+
+static cl::opt<unsigned>
+ RegisterFileSize("register-file-size",
+ cl::desc("Maximum number of physical registers which can "
+ "be used for register mappings"),
+ cl::cat(ToolOptions), cl::init(0));
+
+static cl::opt<unsigned>
+ MicroOpQueue("micro-op-queue-size", cl::Hidden,
+ cl::desc("Number of entries in the micro-op queue"),
+ cl::cat(ToolOptions), cl::init(0));
+
+static cl::opt<unsigned>
+ DecoderThroughput("decoder-throughput", cl::Hidden,
+ cl::desc("Maximum throughput from the decoders "
+ "(instructions per cycle)"),
+ cl::cat(ToolOptions), cl::init(0));
+
+static cl::opt<bool>
+ PrintRegisterFileStats("register-file-stats",
+ cl::desc("Print register file statistics"),
+ cl::cat(ViewOptions), cl::init(false));
+
+static cl::opt<bool> PrintDispatchStats("dispatch-stats",
+ cl::desc("Print dispatch statistics"),
+ cl::cat(ViewOptions), cl::init(false));
+
+static cl::opt<bool>
+ PrintSummaryView("summary-view", cl::Hidden,
+ cl::desc("Print summary view (enabled by default)"),
+ cl::cat(ViewOptions), cl::init(true));
+
+static cl::opt<bool> PrintSchedulerStats("scheduler-stats",
+ cl::desc("Print scheduler statistics"),
+ cl::cat(ViewOptions), cl::init(false));
+
+static cl::opt<bool>
+ PrintRetireStats("retire-stats",
+ cl::desc("Print retire control unit statistics"),
+ cl::cat(ViewOptions), cl::init(false));
+
+static cl::opt<bool> PrintResourcePressureView(
+ "resource-pressure",
+ cl::desc("Print the resource pressure view (enabled by default)"),
+ cl::cat(ViewOptions), cl::init(true));
+
+static cl::opt<bool> PrintTimelineView("timeline",
+ cl::desc("Print the timeline view"),
+ cl::cat(ViewOptions), cl::init(false));
+
+static cl::opt<unsigned> TimelineMaxIterations(
+ "timeline-max-iterations",
+ cl::desc("Maximum number of iterations to print in timeline view"),
+ cl::cat(ViewOptions), cl::init(0));
+
+static cl::opt<unsigned>
+ TimelineMaxCycles("timeline-max-cycles",
+ cl::desc("Maximum number of cycles in the timeline view, "
+ "or 0 for unlimited. Defaults to 80 cycles"),
+ cl::cat(ViewOptions), cl::init(80));
+
+static cl::opt<bool>
+ AssumeNoAlias("noalias",
+ cl::desc("If set, assume that loads and stores do not alias"),
+ cl::cat(ToolOptions), cl::init(true));
+
+static cl::opt<unsigned> LoadQueueSize("lqueue",
+ cl::desc("Size of the load queue"),
+ cl::cat(ToolOptions), cl::init(0));
+
+static cl::opt<unsigned> StoreQueueSize("squeue",
+ cl::desc("Size of the store queue"),
+ cl::cat(ToolOptions), cl::init(0));
+
+static cl::opt<bool>
+ PrintInstructionTables("instruction-tables",
+ cl::desc("Print instruction tables"),
+ cl::cat(ToolOptions), cl::init(false));
+
+static cl::opt<bool> PrintInstructionInfoView(
+ "instruction-info",
+ cl::desc("Print the instruction info view (enabled by default)"),
+ cl::cat(ViewOptions), cl::init(true));
+
+static cl::opt<bool> EnableAllStats("all-stats",
+ cl::desc("Print all hardware statistics"),
+ cl::cat(ViewOptions), cl::init(false));
+
+static cl::opt<bool>
+ EnableAllViews("all-views",
+ cl::desc("Print all views including hardware statistics"),
+ cl::cat(ViewOptions), cl::init(false));
+
+static cl::opt<bool> EnableBottleneckAnalysis(
+ "bottleneck-analysis",
+ cl::desc("Enable bottleneck analysis (disabled by default)"),
+ cl::cat(ViewOptions), cl::init(false));
+
+static cl::opt<bool> ShowEncoding(
+ "show-encoding",
+ cl::desc("Print encoding information in the instruction info view"),
+ cl::cat(ViewOptions), cl::init(false));
+
+static cl::opt<bool> ShowBarriers(
+ "show-barriers",
+ cl::desc("Print memory barrier information in the instruction info view"),
+ cl::cat(ViewOptions), cl::init(false));
+
+static cl::opt<bool> DisableCustomBehaviour(
+ "disable-cb",
+ cl::desc(
+ "Disable custom behaviour (use the default class which does nothing)."),
+ cl::cat(ViewOptions), cl::init(false));
+
+static cl::opt<bool> DisableInstrumentManager(
+ "disable-im",
+ cl::desc("Disable instrumentation manager (use the default class which "
+ "ignores instruments.)."),
+ cl::cat(ViewOptions), cl::init(false));
+
+namespace {
+
+const Target *getTarget(const char *ProgName) {
+ if (TripleName.empty())
+ TripleName = Triple::normalize(sys::getDefaultTargetTriple());
+ Triple TheTriple(TripleName);
+
+ // Get the target specific parser.
+ std::string Error;
+ const Target *TheTarget =
+ TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
+ if (!TheTarget) {
+ errs() << ProgName << ": " << Error;
+ return nullptr;
+ }
+
+ // Update TripleName with the updated triple from the target lookup.
+ TripleName = TheTriple.str();
+
+ // Return the found target.
+ return TheTarget;
+}
+
+ErrorOr<std::unique_ptr<ToolOutputFile>> getOutputStream() {
+ if (OutputFilename == "")
+ OutputFilename = "-";
+ std::error_code EC;
+ auto Out = std::make_unique<ToolOutputFile>(OutputFilename, EC,
+ sys::fs::OF_TextWithCRLF);
+ if (!EC)
+ return std::move(Out);
+ return EC;
+}
+} // end of anonymous namespace
+
+static void processOptionImpl(cl::opt<bool> &O, const cl::opt<bool> &Default) {
+ if (!O.getNumOccurrences() || O.getPosition() < Default.getPosition())
+ O = Default.getValue();
+}
+
+static void processViewOptions(bool IsOutOfOrder) {
+ if (!EnableAllViews.getNumOccurrences() &&
+ !EnableAllStats.getNumOccurrences())
+ return;
+
+ if (EnableAllViews.getNumOccurrences()) {
+ processOptionImpl(PrintSummaryView, EnableAllViews);
+ if (IsOutOfOrder)
+ processOptionImpl(EnableBottleneckAnalysis, EnableAllViews);
+ processOptionImpl(PrintResourcePressureView, EnableAllViews);
+ processOptionImpl(PrintTimelineView, EnableAllViews);
+ processOptionImpl(PrintInstructionInfoView, EnableAllViews);
+ }
+
+ const cl::opt<bool> &Default =
+ EnableAllViews.getPosition() < EnableAllStats.getPosition()
+ ? EnableAllStats
+ : EnableAllViews;
+ processOptionImpl(PrintRegisterFileStats, Default);
+ processOptionImpl(PrintDispatchStats, Default);
+ processOptionImpl(PrintSchedulerStats, Default);
+ if (IsOutOfOrder)
+ processOptionImpl(PrintRetireStats, Default);
+}
+
+// Returns true on success.
+static bool runPipeline(mca::Pipeline &P) {
+ // Handle pipeline errors here.
+ Expected<unsigned> Cycles = P.run();
+ if (!Cycles) {
+ WithColor::error() << toString(Cycles.takeError());
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, char **argv) {
+ InitLLVM X(argc, argv);
+
+ // Initialize targets and assembly parsers.
+ InitializeAllTargetInfos();
+ InitializeAllTargetMCs();
+ InitializeAllAsmParsers();
+ InitializeAllTargetMCAs();
+
+ // Register the Target and CPU printer for --version.
+ cl::AddExtraVersionPrinter(sys::printDefaultTargetAndDetectedCPU);
+
+ // Enable printing of available targets when flag --version is specified.
+ cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
+
+ cl::HideUnrelatedOptions({&ToolOptions, &ViewOptions});
+
+ // Parse flags and initialize target options.
+ cl::ParseCommandLineOptions(argc, argv,
+ "llvm machine code performance analyzer.\n");
+
+ // Get the target from the triple. If a triple is not specified, then select
+ // the default triple for the host. If the triple doesn't correspond to any
+ // registered target, then exit with an error message.
+ const char *ProgName = argv[0];
+ const Target *TheTarget = getTarget(ProgName);
+ if (!TheTarget)
+ return 1;
+
+ // GetTarget() may replaced TripleName with a default triple.
+ // For safety, reconstruct the Triple object.
+ Triple TheTriple(TripleName);
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr =
+ MemoryBuffer::getFileOrSTDIN(InputFilename);
+ if (std::error_code EC = BufferPtr.getError()) {
+ WithColor::error() << InputFilename << ": " << EC.message() << '\n';
+ return 1;
+ }
+
+ if (MCPU == "native")
+ MCPU = std::string(llvm::sys::getHostCPUName());
+
+ // Package up features to be passed to target/subtarget
+ std::string FeaturesStr;
+ if (MATTRS.size()) {
+ SubtargetFeatures Features;
+ for (std::string &MAttr : MATTRS)
+ Features.AddFeature(MAttr);
+ FeaturesStr = Features.getString();
+ }
+
+ std::unique_ptr<MCSubtargetInfo> STI(
+ TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
+ assert(STI && "Unable to create subtarget info!");
+ if (!STI->isCPUStringValid(MCPU))
+ return 1;
+
+ if (!STI->getSchedModel().hasInstrSchedModel()) {
+ WithColor::error()
+ << "unable to find instruction-level scheduling information for"
+ << " target triple '" << TheTriple.normalize() << "' and cpu '" << MCPU
+ << "'.\n";
+
+ if (STI->getSchedModel().InstrItineraries)
+ WithColor::note()
+ << "cpu '" << MCPU << "' provides itineraries. However, "
+ << "instruction itineraries are currently unsupported.\n";
+ return 1;
+ }
+
+ // Apply overrides to llvm-mca specific options.
+ bool IsOutOfOrder = STI->getSchedModel().isOutOfOrder();
+ processViewOptions(IsOutOfOrder);
+
+ std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
+ assert(MRI && "Unable to create target register info!");
+
+ MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
+ std::unique_ptr<MCAsmInfo> MAI(
+ TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
+ assert(MAI && "Unable to create target asm info!");
+
+ SourceMgr SrcMgr;
+
+ // Tell SrcMgr about this buffer, which is what the parser will pick up.
+ SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc());
+
+ MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr);
+ std::unique_ptr<MCObjectFileInfo> MOFI(
+ TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
+ Ctx.setObjectFileInfo(MOFI.get());
+
+ std::unique_ptr<buffer_ostream> BOS;
+
+ std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
+ assert(MCII && "Unable to create instruction info!");
+
+ std::unique_ptr<MCInstrAnalysis> MCIA(
+ TheTarget->createMCInstrAnalysis(MCII.get()));
+
+ // Need to initialize an MCInstPrinter as it is
+ // required for initializing the MCTargetStreamer
+ // which needs to happen within the CRG.parseAnalysisRegions() call below.
+ // Without an MCTargetStreamer, certain assembly directives can trigger a
+ // segfault. (For example, the .cv_fpo_proc directive on x86 will segfault if
+ // we don't initialize the MCTargetStreamer.)
+ unsigned IPtempOutputAsmVariant =
+ OutputAsmVariant == -1 ? 0 : OutputAsmVariant;
+ std::unique_ptr<MCInstPrinter> IPtemp(TheTarget->createMCInstPrinter(
+ Triple(TripleName), IPtempOutputAsmVariant, *MAI, *MCII, *MRI));
+ if (!IPtemp) {
+ WithColor::error()
+ << "unable to create instruction printer for target triple '"
+ << TheTriple.normalize() << "' with assembly variant "
+ << IPtempOutputAsmVariant << ".\n";
+ return 1;
+ }
+
+ // Parse the input and create CodeRegions that llvm-mca can analyze.
+ mca::AsmAnalysisRegionGenerator CRG(*TheTarget, SrcMgr, Ctx, *MAI, *STI,
+ *MCII);
+ Expected<const mca::AnalysisRegions &> RegionsOrErr =
+ CRG.parseAnalysisRegions(std::move(IPtemp));
+ if (!RegionsOrErr) {
+ if (auto Err =
+ handleErrors(RegionsOrErr.takeError(), [](const StringError &E) {
+ WithColor::error() << E.getMessage() << '\n';
+ })) {
+ // Default case.
+ WithColor::error() << toString(std::move(Err)) << '\n';
+ }
+ return 1;
+ }
+ const mca::AnalysisRegions &Regions = *RegionsOrErr;
+
+ // Early exit if errors were found by the code region parsing logic.
+ if (!Regions.isValid())
+ return 1;
+
+ if (Regions.empty()) {
+ WithColor::error() << "no assembly instructions found.\n";
+ return 1;
+ }
+
+ std::unique_ptr<mca::InstrumentManager> IM;
+ if (!DisableInstrumentManager) {
+ IM = std::unique_ptr<mca::InstrumentManager>(
+ TheTarget->createInstrumentManager(*STI, *MCII));
+ }
+ if (!IM) {
+ // If the target doesn't have its own IM implemented (or the -disable-cb
+ // flag is set) then we use the base class (which does nothing).
+ IM = std::make_unique<mca::InstrumentManager>(*STI, *MCII);
+ }
+
+ // Parse the input and create InstrumentRegion that llvm-mca
+ // can use to improve analysis.
+ mca::AsmInstrumentRegionGenerator IRG(*TheTarget, SrcMgr, Ctx, *MAI, *STI,
+ *MCII, *IM);
+ Expected<const mca::InstrumentRegions &> InstrumentRegionsOrErr =
+ IRG.parseInstrumentRegions(std::move(IPtemp));
+ if (!InstrumentRegionsOrErr) {
+ if (auto Err = handleErrors(InstrumentRegionsOrErr.takeError(),
+ [](const StringError &E) {
+ WithColor::error() << E.getMessage() << '\n';
+ })) {
+ // Default case.
+ WithColor::error() << toString(std::move(Err)) << '\n';
+ }
+ return 1;
+ }
+ const mca::InstrumentRegions &InstrumentRegions = *InstrumentRegionsOrErr;
+
+ // Early exit if errors were found by the instrumentation parsing logic.
+ if (!InstrumentRegions.isValid())
+ return 1;
+
+ // Now initialize the output file.
+ auto OF = getOutputStream();
+ if (std::error_code EC = OF.getError()) {
+ WithColor::error() << EC.message() << '\n';
+ return 1;
+ }
+
+ unsigned AssemblerDialect = CRG.getAssemblerDialect();
+ if (OutputAsmVariant >= 0)
+ AssemblerDialect = static_cast<unsigned>(OutputAsmVariant);
+ std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
+ Triple(TripleName), AssemblerDialect, *MAI, *MCII, *MRI));
+ if (!IP) {
+ WithColor::error()
+ << "unable to create instruction printer for target triple '"
+ << TheTriple.normalize() << "' with assembly variant "
+ << AssemblerDialect << ".\n";
+ return 1;
+ }
+
+ // Set the display preference for hex vs. decimal immediates.
+ IP->setPrintImmHex(PrintImmHex);
+
+ std::unique_ptr<ToolOutputFile> TOF = std::move(*OF);
+
+ const MCSchedModel &SM = STI->getSchedModel();
+
+ std::unique_ptr<mca::InstrPostProcess> IPP;
+ if (!DisableCustomBehaviour) {
+ // TODO: It may be a good idea to separate CB and IPP so that they can
+ // be used independently of each other. What I mean by this is to add
+ // an extra command-line arg --disable-ipp so that CB and IPP can be
+ // toggled without needing to toggle both of them together.
+ IPP = std::unique_ptr<mca::InstrPostProcess>(
+ TheTarget->createInstrPostProcess(*STI, *MCII));
+ }
+ if (!IPP) {
+ // If the target doesn't have its own IPP implemented (or the -disable-cb
+ // flag is set) then we use the base class (which does nothing).
+ IPP = std::make_unique<mca::InstrPostProcess>(*STI, *MCII);
+ }
+
+ // Create an instruction builder.
+ mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM);
+
+ // Create a context to control ownership of the pipeline hardware.
+ mca::Context MCA(*MRI, *STI);
+
+ mca::PipelineOptions PO(MicroOpQueue, DecoderThroughput, DispatchWidth,
+ RegisterFileSize, LoadQueueSize, StoreQueueSize,
+ AssumeNoAlias, EnableBottleneckAnalysis);
+
+ // Number each region in the sequence.
+ unsigned RegionIdx = 0;
+
+ std::unique_ptr<MCCodeEmitter> MCE(
+ TheTarget->createMCCodeEmitter(*MCII, Ctx));
+ assert(MCE && "Unable to create code emitter!");
+
+ std::unique_ptr<MCAsmBackend> MAB(TheTarget->createMCAsmBackend(
+ *STI, *MRI, mc::InitMCTargetOptionsFromFlags()));
+ assert(MAB && "Unable to create asm backend!");
+
+ json::Object JSONOutput;
+ for (const std::unique_ptr<mca::AnalysisRegion> &Region : Regions) {
+ // Skip empty code regions.
+ if (Region->empty())
+ continue;
+
+ IB.clear();
+
+ // Lower the MCInst sequence into an mca::Instruction sequence.
+ ArrayRef<MCInst> Insts = Region->getInstructions();
+ mca::CodeEmitter CE(*STI, *MAB, *MCE, Insts);
+
+ IPP->resetState();
+
+ SmallVector<std::unique_ptr<mca::Instruction>> LoweredSequence;
+ for (const MCInst &MCI : Insts) {
+ SMLoc Loc = MCI.getLoc();
+ const SmallVector<mca::SharedInstrument> Instruments =
+ InstrumentRegions.getActiveInstruments(Loc);
+
+ Expected<std::unique_ptr<mca::Instruction>> Inst =
+ IB.createInstruction(MCI, Instruments);
+ if (!Inst) {
+ if (auto NewE = handleErrors(
+ Inst.takeError(),
+ [&IP, &STI](const mca::InstructionError<MCInst> &IE) {
+ std::string InstructionStr;
+ raw_string_ostream SS(InstructionStr);
+ WithColor::error() << IE.Message << '\n';
+ IP->printInst(&IE.Inst, 0, "", *STI, SS);
+ SS.flush();
+ WithColor::note()
+ << "instruction: " << InstructionStr << '\n';
+ })) {
+ // Default case.
+ WithColor::error() << toString(std::move(NewE));
+ }
+ return 1;
+ }
+
+ IPP->postProcessInstruction(Inst.get(), MCI);
+
+ LoweredSequence.emplace_back(std::move(Inst.get()));
+ }
+
+ mca::CircularSourceMgr S(LoweredSequence,
+ PrintInstructionTables ? 1 : Iterations);
+
+ if (PrintInstructionTables) {
+ // Create a pipeline, stages, and a printer.
+ auto P = std::make_unique<mca::Pipeline>();
+ P->appendStage(std::make_unique<mca::EntryStage>(S));
+ P->appendStage(std::make_unique<mca::InstructionTables>(SM));
+
+ mca::PipelinePrinter Printer(*P, *Region, RegionIdx, *STI, PO);
+ if (PrintJson) {
+ Printer.addView(
+ std::make_unique<mca::InstructionView>(*STI, *IP, Insts));
+ }
+
+ // Create the views for this pipeline, execute, and emit a report.
+ if (PrintInstructionInfoView) {
+ Printer.addView(std::make_unique<mca::InstructionInfoView>(
+ *STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
+ ShowBarriers));
+ }
+ Printer.addView(
+ std::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
+
+ if (!runPipeline(*P))
+ return 1;
+
+ if (PrintJson) {
+ Printer.printReport(JSONOutput);
+ } else {
+ Printer.printReport(TOF->os());
+ }
+
+ ++RegionIdx;
+ continue;
+ }
+
+ // Create the CustomBehaviour object for enforcing Target Specific
+ // behaviours and dependencies that aren't expressed well enough
+ // in the tablegen. CB cannot depend on the list of MCInst or
+ // the source code (but it can depend on the list of
+ // mca::Instruction or any objects that can be reconstructed
+ // from the target information).
+ std::unique_ptr<mca::CustomBehaviour> CB;
+ if (!DisableCustomBehaviour)
+ CB = std::unique_ptr<mca::CustomBehaviour>(
+ TheTarget->createCustomBehaviour(*STI, S, *MCII));
+ if (!CB)
+ // If the target doesn't have its own CB implemented (or the -disable-cb
+ // flag is set) then we use the base class (which does nothing).
+ CB = std::make_unique<mca::CustomBehaviour>(*STI, S, *MCII);
+
+ // Create a basic pipeline simulating an out-of-order backend.
+ auto P = MCA.createDefaultPipeline(PO, S, *CB);
+
+ mca::PipelinePrinter Printer(*P, *Region, RegionIdx, *STI, PO);
+
+ // Targets can define their own custom Views that exist within their
+ // /lib/Target/ directory so that the View can utilize their CustomBehaviour
+ // or other backend symbols / functionality that are not already exposed
+ // through one of the MC-layer classes. These Views will be initialized
+ // using the CustomBehaviour::getViews() variants.
+ // If a target makes a custom View that does not depend on their target
+ // CB or their backend, they should put the View within
+ // /tools/llvm-mca/Views/ instead.
+ if (!DisableCustomBehaviour) {
+ std::vector<std::unique_ptr<mca::View>> CBViews =
+ CB->getStartViews(*IP, Insts);
+ for (auto &CBView : CBViews)
+ Printer.addView(std::move(CBView));
+ }
+
+ // When we output JSON, we add a view that contains the instructions
+ // and CPU resource information.
+ if (PrintJson) {
+ auto IV = std::make_unique<mca::InstructionView>(*STI, *IP, Insts);
+ Printer.addView(std::move(IV));
+ }
+
+ if (PrintSummaryView)
+ Printer.addView(
+ std::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth));
+
+ if (EnableBottleneckAnalysis) {
+ if (!IsOutOfOrder) {
+ WithColor::warning()
+ << "bottleneck analysis is not supported for in-order CPU '" << MCPU
+ << "'.\n";
+ }
+ Printer.addView(std::make_unique<mca::BottleneckAnalysis>(
+ *STI, *IP, Insts, S.getNumIterations()));
+ }
+
+ if (PrintInstructionInfoView)
+ Printer.addView(std::make_unique<mca::InstructionInfoView>(
+ *STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
+ ShowBarriers));
+
+ // Fetch custom Views that are to be placed after the InstructionInfoView.
+ // Refer to the comment paired with the CB->getStartViews(*IP, Insts); line
+ // for more info.
+ if (!DisableCustomBehaviour) {
+ std::vector<std::unique_ptr<mca::View>> CBViews =
+ CB->getPostInstrInfoViews(*IP, Insts);
+ for (auto &CBView : CBViews)
+ Printer.addView(std::move(CBView));
+ }
+
+ if (PrintDispatchStats)
+ Printer.addView(std::make_unique<mca::DispatchStatistics>());
+
+ if (PrintSchedulerStats)
+ Printer.addView(std::make_unique<mca::SchedulerStatistics>(*STI));
+
+ if (PrintRetireStats)
+ Printer.addView(std::make_unique<mca::RetireControlUnitStatistics>(SM));
+
+ if (PrintRegisterFileStats)
+ Printer.addView(std::make_unique<mca::RegisterFileStatistics>(*STI));
+
+ if (PrintResourcePressureView)
+ Printer.addView(
+ std::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
+
+ if (PrintTimelineView) {
+ unsigned TimelineIterations =
+ TimelineMaxIterations ? TimelineMaxIterations : 10;
+ Printer.addView(std::make_unique<mca::TimelineView>(
+ *STI, *IP, Insts, std::min(TimelineIterations, S.getNumIterations()),
+ TimelineMaxCycles));
+ }
+
+ // Fetch custom Views that are to be placed after all other Views.
+ // Refer to the comment paired with the CB->getStartViews(*IP, Insts); line
+ // for more info.
+ if (!DisableCustomBehaviour) {
+ std::vector<std::unique_ptr<mca::View>> CBViews =
+ CB->getEndViews(*IP, Insts);
+ for (auto &CBView : CBViews)
+ Printer.addView(std::move(CBView));
+ }
+
+ if (!runPipeline(*P))
+ return 1;
+
+ if (PrintJson) {
+ Printer.printReport(JSONOutput);
+ } else {
+ Printer.printReport(TOF->os());
+ }
+
+ ++RegionIdx;
+ }
+
+ if (PrintJson)
+ TOF->os() << formatv("{0:2}", json::Value(std::move(JSONOutput))) << "\n";
+
+ TOF->keep();
+ return 0;
+}
diff --git a/contrib/libs/llvm16/tools/llvm-mca/ya.make b/contrib/libs/llvm16/tools/llvm-mca/ya.make
new file mode 100644
index 0000000000..db058b8c39
--- /dev/null
+++ b/contrib/libs/llvm16/tools/llvm-mca/ya.make
@@ -0,0 +1,101 @@
+# Generated by devtools/yamaker.
+
+PROGRAM()
+
+LICENSE(Apache-2.0 WITH LLVM-exception)
+
+LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
+
+PEERDIR(
+ contrib/libs/llvm16
+ contrib/libs/llvm16/lib/Analysis
+ contrib/libs/llvm16/lib/AsmParser
+ contrib/libs/llvm16/lib/BinaryFormat
+ contrib/libs/llvm16/lib/Bitcode/Reader
+ contrib/libs/llvm16/lib/Bitcode/Writer
+ contrib/libs/llvm16/lib/Bitstream/Reader
+ contrib/libs/llvm16/lib/CodeGen
+ contrib/libs/llvm16/lib/DebugInfo/CodeView
+ contrib/libs/llvm16/lib/DebugInfo/DWARF
+ contrib/libs/llvm16/lib/DebugInfo/MSF
+ contrib/libs/llvm16/lib/DebugInfo/PDB
+ contrib/libs/llvm16/lib/DebugInfo/Symbolize
+ contrib/libs/llvm16/lib/Demangle
+ contrib/libs/llvm16/lib/IR
+ contrib/libs/llvm16/lib/IRReader
+ contrib/libs/llvm16/lib/MC
+ contrib/libs/llvm16/lib/MC/MCDisassembler
+ contrib/libs/llvm16/lib/MC/MCParser
+ contrib/libs/llvm16/lib/MCA
+ contrib/libs/llvm16/lib/Object
+ contrib/libs/llvm16/lib/ProfileData
+ contrib/libs/llvm16/lib/Remarks
+ contrib/libs/llvm16/lib/Support
+ contrib/libs/llvm16/lib/Target
+ contrib/libs/llvm16/lib/Target/AArch64/AsmParser
+ contrib/libs/llvm16/lib/Target/AArch64/Disassembler
+ contrib/libs/llvm16/lib/Target/AArch64/MCTargetDesc
+ contrib/libs/llvm16/lib/Target/AArch64/TargetInfo
+ contrib/libs/llvm16/lib/Target/AArch64/Utils
+ contrib/libs/llvm16/lib/Target/ARM/AsmParser
+ contrib/libs/llvm16/lib/Target/ARM/Disassembler
+ contrib/libs/llvm16/lib/Target/ARM/MCTargetDesc
+ contrib/libs/llvm16/lib/Target/ARM/TargetInfo
+ contrib/libs/llvm16/lib/Target/ARM/Utils
+ contrib/libs/llvm16/lib/Target/BPF/AsmParser
+ contrib/libs/llvm16/lib/Target/BPF/Disassembler
+ contrib/libs/llvm16/lib/Target/BPF/MCTargetDesc
+ contrib/libs/llvm16/lib/Target/BPF/TargetInfo
+ contrib/libs/llvm16/lib/Target/LoongArch/AsmParser
+ contrib/libs/llvm16/lib/Target/LoongArch/Disassembler
+ contrib/libs/llvm16/lib/Target/LoongArch/MCTargetDesc
+ contrib/libs/llvm16/lib/Target/LoongArch/TargetInfo
+ contrib/libs/llvm16/lib/Target/NVPTX/MCTargetDesc
+ contrib/libs/llvm16/lib/Target/NVPTX/TargetInfo
+ contrib/libs/llvm16/lib/Target/PowerPC/AsmParser
+ contrib/libs/llvm16/lib/Target/PowerPC/Disassembler
+ contrib/libs/llvm16/lib/Target/PowerPC/MCTargetDesc
+ contrib/libs/llvm16/lib/Target/PowerPC/TargetInfo
+ contrib/libs/llvm16/lib/Target/WebAssembly/AsmParser
+ contrib/libs/llvm16/lib/Target/WebAssembly/Disassembler
+ contrib/libs/llvm16/lib/Target/WebAssembly/MCTargetDesc
+ contrib/libs/llvm16/lib/Target/WebAssembly/TargetInfo
+ contrib/libs/llvm16/lib/Target/WebAssembly/Utils
+ contrib/libs/llvm16/lib/Target/X86/AsmParser
+ contrib/libs/llvm16/lib/Target/X86/Disassembler
+ contrib/libs/llvm16/lib/Target/X86/MCA
+ contrib/libs/llvm16/lib/Target/X86/MCTargetDesc
+ contrib/libs/llvm16/lib/Target/X86/TargetInfo
+ contrib/libs/llvm16/lib/TargetParser
+ contrib/libs/llvm16/lib/TextAPI
+ contrib/libs/llvm16/lib/Transforms/ObjCARC
+ contrib/libs/llvm16/lib/Transforms/Scalar
+ contrib/libs/llvm16/lib/Transforms/Utils
+)
+
+ADDINCL(
+ contrib/libs/llvm16/tools/llvm-mca
+)
+
+NO_COMPILER_WARNINGS()
+
+NO_UTIL()
+
+SRCS(
+ CodeRegion.cpp
+ CodeRegionGenerator.cpp
+ PipelinePrinter.cpp
+ Views/BottleneckAnalysis.cpp
+ Views/DispatchStatistics.cpp
+ Views/InstructionInfoView.cpp
+ Views/InstructionView.cpp
+ Views/RegisterFileStatistics.cpp
+ Views/ResourcePressureView.cpp
+ Views/RetireControlUnitStatistics.cpp
+ Views/SchedulerStatistics.cpp
+ Views/SummaryView.cpp
+ Views/TimelineView.cpp
+ llvm-mca.cpp
+)
+
+END()