diff options
author | vvvv <vvvv@ydb.tech> | 2024-02-06 20:01:22 +0300 |
---|---|---|
committer | Alexander Smirnov <alex@ydb.tech> | 2024-02-09 19:18:27 +0300 |
commit | ee2b7fbda052aa09b6fdb83b8c6f0305fef3e193 (patch) | |
tree | 102765416c3866bde98a82facc7752d329ee0226 /contrib/libs/llvm16/tools/llvm-mca | |
parent | 7494ca32d3a5aca00b7ac527b5f127989335102c (diff) | |
download | ydb-ee2b7fbda052aa09b6fdb83b8c6f0305fef3e193.tar.gz |
llvm16 targets
Diffstat (limited to 'contrib/libs/llvm16/tools/llvm-mca')
28 files changed, 5099 insertions, 0 deletions
diff --git a/contrib/libs/llvm16/tools/llvm-mca/CodeRegion.cpp b/contrib/libs/llvm16/tools/llvm-mca/CodeRegion.cpp new file mode 100644 index 0000000000..c91ed759ee --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/CodeRegion.cpp @@ -0,0 +1,174 @@ +//===-------------------------- CodeRegion.cpp -----------------*- C++ -* -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements methods from the CodeRegions interface. +/// +//===----------------------------------------------------------------------===// + +#include "CodeRegion.h" + +namespace llvm { +namespace mca { + +bool CodeRegion::isLocInRange(SMLoc Loc) const { + if (RangeEnd.isValid() && Loc.getPointer() > RangeEnd.getPointer()) + return false; + if (RangeStart.isValid() && Loc.getPointer() < RangeStart.getPointer()) + return false; + return true; +} + +void CodeRegions::addInstruction(const MCInst &Instruction) { + SMLoc Loc = Instruction.getLoc(); + for (UniqueCodeRegion &Region : Regions) + if (Region->isLocInRange(Loc)) + Region->addInstruction(Instruction); +} + +AnalysisRegions::AnalysisRegions(llvm::SourceMgr &S) : CodeRegions(S) { + // Create a default region for the input code sequence. + Regions.emplace_back(std::make_unique<CodeRegion>("", SMLoc())); +} + +void AnalysisRegions::beginRegion(StringRef Description, SMLoc Loc) { + if (ActiveRegions.empty()) { + // Remove the default region if there is at least one user defined region. + // By construction, only the default region has an invalid start location. + if (Regions.size() == 1 && !Regions[0]->startLoc().isValid() && + !Regions[0]->endLoc().isValid()) { + ActiveRegions[Description] = 0; + Regions[0] = std::make_unique<CodeRegion>(Description, Loc); + return; + } + } else { + auto It = ActiveRegions.find(Description); + if (It != ActiveRegions.end()) { + const CodeRegion &R = *Regions[It->second]; + if (Description.empty()) { + SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error, + "found multiple overlapping anonymous regions"); + SM.PrintMessage(R.startLoc(), llvm::SourceMgr::DK_Note, + "Previous anonymous region was defined here"); + FoundErrors = true; + return; + } + + SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error, + "overlapping regions cannot have the same name"); + SM.PrintMessage(R.startLoc(), llvm::SourceMgr::DK_Note, + "region " + Description + " was previously defined here"); + FoundErrors = true; + return; + } + } + + ActiveRegions[Description] = Regions.size(); + Regions.emplace_back(std::make_unique<CodeRegion>(Description, Loc)); +} + +void AnalysisRegions::endRegion(StringRef Description, SMLoc Loc) { + if (Description.empty()) { + // Special case where there is only one user defined region, + // and this LLVM-MCA-END directive doesn't provide a region name. + // In this case, we assume that the user simply wanted to just terminate + // the only active region. + if (ActiveRegions.size() == 1) { + auto It = ActiveRegions.begin(); + Regions[It->second]->setEndLocation(Loc); + ActiveRegions.erase(It); + return; + } + + // Special case where the region end marker applies to the default region. + if (ActiveRegions.empty() && Regions.size() == 1 && + !Regions[0]->startLoc().isValid() && !Regions[0]->endLoc().isValid()) { + Regions[0]->setEndLocation(Loc); + return; + } + } + + auto It = ActiveRegions.find(Description); + if (It != ActiveRegions.end()) { + Regions[It->second]->setEndLocation(Loc); + ActiveRegions.erase(It); + return; + } + + FoundErrors = true; + SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error, + "found an invalid region end directive"); + if (!Description.empty()) { + SM.PrintMessage(Loc, llvm::SourceMgr::DK_Note, + "unable to find an active region named " + Description); + } else { + SM.PrintMessage(Loc, llvm::SourceMgr::DK_Note, + "unable to find an active anonymous region"); + } +} + +InstrumentRegions::InstrumentRegions(llvm::SourceMgr &S) : CodeRegions(S) {} + +void InstrumentRegions::beginRegion(StringRef Description, SMLoc Loc, + SharedInstrument I) { + if (Description.empty()) { + SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error, + "anonymous instrumentation regions are not permitted"); + FoundErrors = true; + return; + } + + auto It = ActiveRegions.find(Description); + if (It != ActiveRegions.end()) { + const CodeRegion &R = *Regions[It->second]; + SM.PrintMessage( + Loc, llvm::SourceMgr::DK_Error, + "overlapping instrumentation regions cannot be of the same kind"); + SM.PrintMessage(R.startLoc(), llvm::SourceMgr::DK_Note, + "instrumentation region " + Description + + " was previously defined here"); + FoundErrors = true; + return; + } + + ActiveRegions[Description] = Regions.size(); + Regions.emplace_back(std::make_unique<InstrumentRegion>(Description, Loc, I)); +} + +void InstrumentRegions::endRegion(StringRef Description, SMLoc Loc) { + auto It = ActiveRegions.find(Description); + if (It != ActiveRegions.end()) { + Regions[It->second]->setEndLocation(Loc); + ActiveRegions.erase(It); + return; + } + + FoundErrors = true; + SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error, + "found an invalid instrumentation region end directive"); + if (!Description.empty()) { + SM.PrintMessage(Loc, llvm::SourceMgr::DK_Note, + "unable to find an active instrumentation region named " + + Description); + } +} + +const SmallVector<SharedInstrument> +InstrumentRegions::getActiveInstruments(SMLoc Loc) const { + SmallVector<SharedInstrument> AI; + for (auto &R : Regions) { + if (R->isLocInRange(Loc)) { + InstrumentRegion *IR = static_cast<InstrumentRegion *>(R.get()); + AI.emplace_back(IR->getInstrument()); + } + } + return AI; +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/libs/llvm16/tools/llvm-mca/CodeRegion.h b/contrib/libs/llvm16/tools/llvm-mca/CodeRegion.h new file mode 100644 index 0000000000..b5b2f3a0d1 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/CodeRegion.h @@ -0,0 +1,195 @@ +//===-------------------------- CodeRegion.h -------------------*- C++ -* -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements class CodeRegion and CodeRegions, InstrumentRegion, +/// AnalysisRegions, and InstrumentRegions. +/// +/// A CodeRegion describes a region of assembly code guarded by special LLVM-MCA +/// comment directives. +/// +/// # LLVM-MCA-BEGIN foo +/// ... ## asm +/// # LLVM-MCA-END +/// +/// A comment starting with substring LLVM-MCA-BEGIN marks the beginning of a +/// new region of code. +/// A comment starting with substring LLVM-MCA-END marks the end of the +/// last-seen region of code. +/// +/// Code regions are not allowed to overlap. Each region can have a optional +/// description; internally, regions are described by a range of source +/// locations (SMLoc objects). +/// +/// An instruction (a MCInst) is added to a CodeRegion R only if its +/// location is in range [R.RangeStart, R.RangeEnd]. +/// +/// A InstrumentRegion describes a region of assembly code guarded by +/// special LLVM-MCA comment directives. +/// +/// # LLVM-MCA-<INSTRUMENTATION_TYPE> <data> +/// ... ## asm +/// +/// where INSTRUMENTATION_TYPE is a type defined in llvm and expects to use +/// data. +/// +/// A comment starting with substring LLVM-MCA-<INSTRUMENTATION_TYPE> +/// brings data into scope for llvm-mca to use in its analysis for +/// all following instructions. +/// +/// If the same INSTRUMENTATION_TYPE is found later in the instruction list, +/// then the original InstrumentRegion will be automatically ended, +/// and a new InstrumentRegion will begin. +/// +/// If there are comments containing the different INSTRUMENTATION_TYPEs, +/// then both data sets remain available. In contrast with a CodeRegion, +/// an InstrumentRegion does not need a comment to end the region. +// +// An instruction (a MCInst) is added to an InstrumentRegion R only +// if its location is in range [R.RangeStart, R.RangeEnd]. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_CODEREGION_H +#define LLVM_TOOLS_LLVM_MCA_CODEREGION_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MCA/CustomBehaviour.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" +#include <vector> + +namespace llvm { +namespace mca { + +/// A region of assembly code. +/// +/// It identifies a sequence of machine instructions. +class CodeRegion { + // An optional descriptor for this region. + llvm::StringRef Description; + // Instructions that form this region. + llvm::SmallVector<llvm::MCInst, 16> Instructions; + // Source location range. + llvm::SMLoc RangeStart; + llvm::SMLoc RangeEnd; + + CodeRegion(const CodeRegion &) = delete; + CodeRegion &operator=(const CodeRegion &) = delete; + +public: + CodeRegion(llvm::StringRef Desc, llvm::SMLoc Start) + : Description(Desc), RangeStart(Start) {} + + void addInstruction(const llvm::MCInst &Instruction) { + Instructions.emplace_back(Instruction); + } + + llvm::SMLoc startLoc() const { return RangeStart; } + llvm::SMLoc endLoc() const { return RangeEnd; } + + void setEndLocation(llvm::SMLoc End) { RangeEnd = End; } + bool empty() const { return Instructions.empty(); } + bool isLocInRange(llvm::SMLoc Loc) const; + + llvm::ArrayRef<llvm::MCInst> getInstructions() const { return Instructions; } + + llvm::StringRef getDescription() const { return Description; } +}; + +/// Alias AnalysisRegion with CodeRegion since CodeRegionGenerator +/// is absract and AnalysisRegionGenerator operates on AnalysisRegions +using AnalysisRegion = CodeRegion; + +/// A CodeRegion that contains instrumentation that can be used +/// in analysis of the region. +class InstrumentRegion : public CodeRegion { + /// Instrument for this region. + SharedInstrument Instrument; + +public: + InstrumentRegion(llvm::StringRef Desc, llvm::SMLoc Start, SharedInstrument I) + : CodeRegion(Desc, Start), Instrument(I) {} + +public: + SharedInstrument getInstrument() const { return Instrument; } +}; + +class CodeRegionParseError final : public Error {}; + +class CodeRegions { + CodeRegions(const CodeRegions &) = delete; + CodeRegions &operator=(const CodeRegions &) = delete; + +protected: + // A source manager. Used by the tool to generate meaningful warnings. + llvm::SourceMgr &SM; + + using UniqueCodeRegion = std::unique_ptr<CodeRegion>; + std::vector<UniqueCodeRegion> Regions; + llvm::StringMap<unsigned> ActiveRegions; + bool FoundErrors; + +public: + CodeRegions(llvm::SourceMgr &S) : SM(S), FoundErrors(false) {} + + typedef std::vector<UniqueCodeRegion>::iterator iterator; + typedef std::vector<UniqueCodeRegion>::const_iterator const_iterator; + + iterator begin() { return Regions.begin(); } + iterator end() { return Regions.end(); } + const_iterator begin() const { return Regions.cbegin(); } + const_iterator end() const { return Regions.cend(); } + + void addInstruction(const llvm::MCInst &Instruction); + llvm::SourceMgr &getSourceMgr() const { return SM; } + + llvm::ArrayRef<llvm::MCInst> getInstructionSequence(unsigned Idx) const { + return Regions[Idx]->getInstructions(); + } + + bool empty() const { + return llvm::all_of(Regions, [](const UniqueCodeRegion &Region) { + return Region->empty(); + }); + } + + bool isValid() const { return !FoundErrors; } + + bool isRegionActive(llvm::StringRef Description) const { + return ActiveRegions.find(Description) != ActiveRegions.end(); + } +}; + +struct AnalysisRegions : public CodeRegions { + AnalysisRegions(llvm::SourceMgr &S); + + void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc); + void endRegion(llvm::StringRef Description, llvm::SMLoc Loc); +}; + +struct InstrumentRegions : public CodeRegions { + InstrumentRegions(llvm::SourceMgr &S); + + void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc, + SharedInstrument Instrument); + void endRegion(llvm::StringRef Description, llvm::SMLoc Loc); + + const SmallVector<SharedInstrument> + getActiveInstruments(llvm::SMLoc Loc) const; +}; + +} // namespace mca +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.cpp b/contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.cpp new file mode 100644 index 0000000000..b8e10fa69c --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.cpp @@ -0,0 +1,209 @@ +//===----------------------- CodeRegionGenerator.cpp ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines classes responsible for generating llvm-mca +/// CodeRegions from various types of input. llvm-mca only analyzes CodeRegions, +/// so the classes here provide the input-to-CodeRegions translation. +// +//===----------------------------------------------------------------------===// + +#include "CodeRegionGenerator.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/SMLoc.h" +#include <memory> + +namespace llvm { +namespace mca { + +// This virtual dtor serves as the anchor for the CodeRegionGenerator class. +CodeRegionGenerator::~CodeRegionGenerator() {} + +// This class provides the callbacks that occur when parsing input assembly. +class MCStreamerWrapper final : public MCStreamer { + CodeRegions &Regions; + +public: + MCStreamerWrapper(MCContext &Context, mca::CodeRegions &R) + : MCStreamer(Context), Regions(R) {} + + // We only want to intercept the emission of new instructions. + void emitInstruction(const MCInst &Inst, + const MCSubtargetInfo & /* unused */) override { + Regions.addInstruction(Inst); + } + + bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override { + return true; + } + + void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + Align ByteAlignment) override {} + void emitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr, + uint64_t Size = 0, Align ByteAlignment = Align(1), + SMLoc Loc = SMLoc()) override {} + void emitGPRel32Value(const MCExpr *Value) override {} + void beginCOFFSymbolDef(const MCSymbol *Symbol) override {} + void emitCOFFSymbolStorageClass(int StorageClass) override {} + void emitCOFFSymbolType(int Type) override {} + void endCOFFSymbolDef() override {} + + ArrayRef<MCInst> GetInstructionSequence(unsigned Index) const { + return Regions.getInstructionSequence(Index); + } +}; + +Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions( + const std::unique_ptr<MCInstPrinter> &IP) { + MCTargetOptions Opts; + Opts.PreserveAsmComments = false; + CodeRegions &Regions = getRegions(); + MCStreamerWrapper Str(Ctx, Regions); + + // Need to initialize an MCTargetStreamer otherwise + // certain asm directives will cause a segfault. + // Using nulls() so that anything emitted by the MCTargetStreamer + // doesn't show up in the llvm-mca output. + raw_ostream &OSRef = nulls(); + formatted_raw_ostream FOSRef(OSRef); + TheTarget.createAsmTargetStreamer(Str, FOSRef, IP.get(), + /*IsVerboseAsm=*/true); + + // Create a MCAsmParser and setup the lexer to recognize llvm-mca ASM + // comments. + std::unique_ptr<MCAsmParser> Parser( + createMCAsmParser(Regions.getSourceMgr(), Ctx, Str, MAI)); + MCAsmLexer &Lexer = Parser->getLexer(); + MCACommentConsumer *CCP = getCommentConsumer(); + Lexer.setCommentConsumer(CCP); + // Enable support for MASM literal numbers (example: 05h, 101b). + Lexer.setLexMasmIntegers(true); + + std::unique_ptr<MCTargetAsmParser> TAP( + TheTarget.createMCAsmParser(STI, *Parser, MCII, Opts)); + if (!TAP) + return make_error<StringError>( + "This target does not support assembly parsing.", + inconvertibleErrorCode()); + Parser->setTargetParser(*TAP); + Parser->Run(false); + + if (CCP->hadErr()) + return make_error<StringError>("There was an error parsing comments.", + inconvertibleErrorCode()); + + // Set the assembler dialect from the input. llvm-mca will use this as the + // default dialect when printing reports. + AssemblerDialect = Parser->getAssemblerDialect(); + return Regions; +} + +void AnalysisRegionCommentConsumer::HandleComment(SMLoc Loc, + StringRef CommentText) { + // Skip empty comments. + StringRef Comment(CommentText); + if (Comment.empty()) + return; + + // Skip spaces and tabs. + unsigned Position = Comment.find_first_not_of(" \t"); + if (Position >= Comment.size()) + // We reached the end of the comment. Bail out. + return; + + Comment = Comment.drop_front(Position); + if (Comment.consume_front("LLVM-MCA-END")) { + // Skip spaces and tabs. + Position = Comment.find_first_not_of(" \t"); + if (Position < Comment.size()) + Comment = Comment.drop_front(Position); + Regions.endRegion(Comment, Loc); + return; + } + + // Try to parse the LLVM-MCA-BEGIN comment. + if (!Comment.consume_front("LLVM-MCA-BEGIN")) + return; + + // Skip spaces and tabs. + Position = Comment.find_first_not_of(" \t"); + if (Position < Comment.size()) + Comment = Comment.drop_front(Position); + // Use the rest of the string as a descriptor for this code snippet. + Regions.beginRegion(Comment, Loc); +} + +void InstrumentRegionCommentConsumer::HandleComment(SMLoc Loc, + StringRef CommentText) { + // Skip empty comments. + StringRef Comment(CommentText); + if (Comment.empty()) + return; + + // Skip spaces and tabs. + unsigned Position = Comment.find_first_not_of(" \t"); + if (Position >= Comment.size()) + // We reached the end of the comment. Bail out. + return; + Comment = Comment.drop_front(Position); + + // Bail out if not an MCA style comment + if (!Comment.consume_front("LLVM-MCA-")) + return; + + // Skip AnalysisRegion comments + if (Comment.consume_front("BEGIN") || Comment.consume_front("END")) + return; + + if (IM.shouldIgnoreInstruments()) + return; + + auto [InstrumentKind, Data] = Comment.split(" "); + + // An error if not of the form LLVM-MCA-TARGET-KIND + if (!IM.supportsInstrumentType(InstrumentKind)) { + if (InstrumentKind.empty()) + SM.PrintMessage( + Loc, llvm::SourceMgr::DK_Error, + "No instrumentation kind was provided in LLVM-MCA comment"); + else + SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error, + "Unknown instrumentation type in LLVM-MCA comment: " + + InstrumentKind); + FoundError = true; + return; + } + + SharedInstrument I = IM.createInstrument(InstrumentKind, Data); + if (!I) { + if (Data.empty()) + SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error, + "Failed to create " + InstrumentKind + + " instrument with no data"); + else + SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error, + "Failed to create " + InstrumentKind + + " instrument with data: " + Data); + FoundError = true; + return; + } + + // End InstrumentType region if one is open + if (Regions.isRegionActive(InstrumentKind)) + Regions.endRegion(InstrumentKind, Loc); + // Start new instrumentation region + Regions.beginRegion(InstrumentKind, Loc, I); +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.h b/contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.h new file mode 100644 index 0000000000..88621ed856 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/CodeRegionGenerator.h @@ -0,0 +1,205 @@ +//===----------------------- CodeRegionGenerator.h --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file declares classes responsible for generating llvm-mca +/// CodeRegions from various types of input. llvm-mca only analyzes CodeRegions, +/// so the classes here provide the input-to-CodeRegions translation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_CODEREGION_GENERATOR_H +#define LLVM_TOOLS_LLVM_MCA_CODEREGION_GENERATOR_H + +#include "CodeRegion.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/MCA/CustomBehaviour.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/SourceMgr.h" +#include <memory> + +namespace llvm { +namespace mca { + +class MCACommentConsumer : public AsmCommentConsumer { +protected: + bool FoundError; + +public: + MCACommentConsumer() : FoundError(false) {} + + bool hadErr() const { return FoundError; } +}; + +/// A comment consumer that parses strings. The only valid tokens are strings. +class AnalysisRegionCommentConsumer : public MCACommentConsumer { + AnalysisRegions &Regions; + +public: + AnalysisRegionCommentConsumer(AnalysisRegions &R) : Regions(R) {} + + /// Parses a comment. It begins a new region if it is of the form + /// LLVM-MCA-BEGIN. It ends a region if it is of the form LLVM-MCA-END. + /// Regions can be optionally named if they are of the form + /// LLVM-MCA-BEGIN <name> or LLVM-MCA-END <name>. Subregions are + /// permitted, but a region that begins while another region is active + /// must be ended before the outer region is ended. If thre is only one + /// active region, LLVM-MCA-END does not need to provide a name. + void HandleComment(SMLoc Loc, StringRef CommentText) override; +}; + +/// A comment consumer that parses strings to create InstrumentRegions. +/// The only valid tokens are strings. +class InstrumentRegionCommentConsumer : public MCACommentConsumer { + llvm::SourceMgr &SM; + + InstrumentRegions &Regions; + + InstrumentManager &IM; + +public: + InstrumentRegionCommentConsumer(llvm::SourceMgr &SM, InstrumentRegions &R, + InstrumentManager &IM) + : SM(SM), Regions(R), IM(IM) {} + + /// Parses a comment. It begins a new region if it is of the form + /// LLVM-MCA-<INSTRUMENTATION_TYPE> <data> where INSTRUMENTATION_TYPE + /// is a valid InstrumentKind. If there is already an active + /// region of type INSTRUMENATION_TYPE, then it will end the active + /// one and begin a new one using the new data. + void HandleComment(SMLoc Loc, StringRef CommentText) override; +}; + +/// This abstract class is responsible for parsing the input given to +/// the llvm-mca driver, and converting that into a CodeRegions instance. +class CodeRegionGenerator { +protected: + CodeRegionGenerator(const CodeRegionGenerator &) = delete; + CodeRegionGenerator &operator=(const CodeRegionGenerator &) = delete; + virtual Expected<const CodeRegions &> + parseCodeRegions(const std::unique_ptr<MCInstPrinter> &IP) = 0; + +public: + CodeRegionGenerator() {} + virtual ~CodeRegionGenerator(); +}; + +/// Abastract CodeRegionGenerator with AnalysisRegions member +class AnalysisRegionGenerator : public virtual CodeRegionGenerator { +protected: + AnalysisRegions Regions; + +public: + AnalysisRegionGenerator(llvm::SourceMgr &SM) : Regions(SM) {} + + virtual Expected<const AnalysisRegions &> + parseAnalysisRegions(const std::unique_ptr<MCInstPrinter> &IP) = 0; +}; + +/// Abstract CodeRegionGenerator with InstrumentRegionsRegions member +class InstrumentRegionGenerator : public virtual CodeRegionGenerator { +protected: + InstrumentRegions Regions; + +public: + InstrumentRegionGenerator(llvm::SourceMgr &SM) : Regions(SM) {} + + virtual Expected<const InstrumentRegions &> + parseInstrumentRegions(const std::unique_ptr<MCInstPrinter> &IP) = 0; +}; + +/// This abstract class is responsible for parsing input ASM and +/// generating a CodeRegions instance. +class AsmCodeRegionGenerator : public virtual CodeRegionGenerator { + const Target &TheTarget; + MCContext &Ctx; + const MCAsmInfo &MAI; + const MCSubtargetInfo &STI; + const MCInstrInfo &MCII; + unsigned AssemblerDialect; // This is set during parsing. + +public: + AsmCodeRegionGenerator(const Target &T, MCContext &C, const MCAsmInfo &A, + const MCSubtargetInfo &S, const MCInstrInfo &I) + : TheTarget(T), Ctx(C), MAI(A), STI(S), MCII(I), AssemblerDialect(0) {} + + virtual MCACommentConsumer *getCommentConsumer() = 0; + virtual CodeRegions &getRegions() = 0; + + unsigned getAssemblerDialect() const { return AssemblerDialect; } + Expected<const CodeRegions &> + parseCodeRegions(const std::unique_ptr<MCInstPrinter> &IP) override; +}; + +class AsmAnalysisRegionGenerator final : public AnalysisRegionGenerator, + public AsmCodeRegionGenerator { + AnalysisRegionCommentConsumer CC; + +public: + AsmAnalysisRegionGenerator(const Target &T, llvm::SourceMgr &SM, MCContext &C, + const MCAsmInfo &A, const MCSubtargetInfo &S, + const MCInstrInfo &I) + : AnalysisRegionGenerator(SM), AsmCodeRegionGenerator(T, C, A, S, I), + CC(Regions) {} + + MCACommentConsumer *getCommentConsumer() override { return &CC; }; + CodeRegions &getRegions() override { return Regions; }; + + Expected<const AnalysisRegions &> + parseAnalysisRegions(const std::unique_ptr<MCInstPrinter> &IP) override { + Expected<const CodeRegions &> RegionsOrErr = parseCodeRegions(IP); + if (!RegionsOrErr) + return RegionsOrErr.takeError(); + else + return static_cast<const AnalysisRegions &>(*RegionsOrErr); + } + + Expected<const CodeRegions &> + parseCodeRegions(const std::unique_ptr<MCInstPrinter> &IP) override { + return AsmCodeRegionGenerator::parseCodeRegions(IP); + } +}; + +class AsmInstrumentRegionGenerator final : public InstrumentRegionGenerator, + public AsmCodeRegionGenerator { + InstrumentRegionCommentConsumer CC; + +public: + AsmInstrumentRegionGenerator(const Target &T, llvm::SourceMgr &SM, + MCContext &C, const MCAsmInfo &A, + const MCSubtargetInfo &S, const MCInstrInfo &I, + InstrumentManager &IM) + : InstrumentRegionGenerator(SM), AsmCodeRegionGenerator(T, C, A, S, I), + CC(SM, Regions, IM) {} + + MCACommentConsumer *getCommentConsumer() override { return &CC; }; + CodeRegions &getRegions() override { return Regions; }; + + Expected<const InstrumentRegions &> + parseInstrumentRegions(const std::unique_ptr<MCInstPrinter> &IP) override { + Expected<const CodeRegions &> RegionsOrErr = parseCodeRegions(IP); + if (!RegionsOrErr) + return RegionsOrErr.takeError(); + else + return static_cast<const InstrumentRegions &>(*RegionsOrErr); + } + + Expected<const CodeRegions &> + parseCodeRegions(const std::unique_ptr<MCInstPrinter> &IP) override { + return AsmCodeRegionGenerator::parseCodeRegions(IP); + } +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_MCA_CODEREGION_GENERATOR_H diff --git a/contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.cpp b/contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.cpp new file mode 100644 index 0000000000..9d06c6a193 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.cpp @@ -0,0 +1,129 @@ +//===--------------------- PipelinePrinter.cpp ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the PipelinePrinter interface. +/// +//===----------------------------------------------------------------------===// + +#include "PipelinePrinter.h" +#include "CodeRegion.h" +#include "Views/InstructionView.h" + +namespace llvm { +namespace mca { + +void PipelinePrinter::printRegionHeader(llvm::raw_ostream &OS) const { + StringRef RegionName; + if (!Region.getDescription().empty()) + RegionName = Region.getDescription(); + + OS << "\n[" << RegionIdx << "] Code Region"; + if (!RegionName.empty()) + OS << " - " << RegionName; + OS << "\n\n"; +} + +json::Object PipelinePrinter::getJSONReportRegion() const { + json::Object JO; + + StringRef RegionName = ""; + if (!Region.getDescription().empty()) + RegionName = Region.getDescription(); + + JO.try_emplace("Name", RegionName); + for (const auto &V : Views) + if (V->isSerializable()) + JO.try_emplace(V->getNameAsString().str(), V->toJSON()); + + return JO; +} + +json::Object PipelinePrinter::getJSONSimulationParameters() const { + json::Object SimParameters({{"-mcpu", STI.getCPU()}, + {"-mtriple", STI.getTargetTriple().getTriple()}, + {"-march", STI.getTargetTriple().getArchName()}}); + + const MCSchedModel &SM = STI.getSchedModel(); + if (!SM.isOutOfOrder()) + return SimParameters; + + if (PO.RegisterFileSize) + SimParameters.try_emplace("-register-file-size", PO.RegisterFileSize); + + if (!PO.AssumeNoAlias) + SimParameters.try_emplace("-noalias", PO.AssumeNoAlias); + + if (PO.DecodersThroughput) + SimParameters.try_emplace("-decoder-throughput", PO.DecodersThroughput); + + if (PO.MicroOpQueueSize) + SimParameters.try_emplace("-micro-op-queue-size", PO.MicroOpQueueSize); + + if (PO.DispatchWidth) + SimParameters.try_emplace("-dispatch", PO.DispatchWidth); + + if (PO.LoadQueueSize) + SimParameters.try_emplace("-lqueue", PO.LoadQueueSize); + + if (PO.StoreQueueSize) + SimParameters.try_emplace("-squeue", PO.StoreQueueSize); + + return SimParameters; +} + +json::Object PipelinePrinter::getJSONTargetInfo() const { + json::Array Resources; + const MCSchedModel &SM = STI.getSchedModel(); + StringRef MCPU = STI.getCPU(); + + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + unsigned NumUnits = ProcResource.NumUnits; + if (ProcResource.SubUnitsIdxBegin || !NumUnits) + continue; + + for (unsigned J = 0; J < NumUnits; ++J) { + std::string ResourceName = ProcResource.Name; + if (NumUnits > 1) { + ResourceName += "."; + ResourceName += J; + } + + Resources.push_back(ResourceName); + } + } + + return json::Object({{"CPUName", MCPU}, {"Resources", std::move(Resources)}}); +} + +void PipelinePrinter::printReport(json::Object &JO) const { + if (!RegionIdx) { + JO.try_emplace("TargetInfo", getJSONTargetInfo()); + JO.try_emplace("SimulationParameters", getJSONSimulationParameters()); + // Construct an array of regions. + JO.try_emplace("CodeRegions", json::Array()); + } + + json::Array *Regions = JO.getArray("CodeRegions"); + assert(Regions && "This array must exist!"); + Regions->push_back(getJSONReportRegion()); +} + +void PipelinePrinter::printReport(llvm::raw_ostream &OS) const { + // Don't print the header of this region if it is the default region, and if + // it doesn't have an end location. + if (Region.startLoc().isValid() || Region.endLoc().isValid()) + printRegionHeader(OS); + + for (const auto &V : Views) + V->printView(OS); +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.h b/contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.h new file mode 100644 index 0000000000..d89e913f97 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/PipelinePrinter.h @@ -0,0 +1,69 @@ +//===--------------------- PipelinePrinter.h --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements class PipelinePrinter. +/// +/// PipelinePrinter allows the customization of the performance report. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H +#define LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/Context.h" +#include "llvm/MCA/Pipeline.h" +#include "llvm/MCA/View.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +class CodeRegion; + +/// A printer class that knows how to collects statistics on the +/// code analyzed by the llvm-mca tool. +/// +/// This class knows how to print out the analysis information collected +/// during the execution of the code. Internally, it delegates to other +/// classes the task of printing out timeline information as well as +/// resource pressure. +class PipelinePrinter { + Pipeline &P; + const CodeRegion &Region; + unsigned RegionIdx; + const MCSubtargetInfo &STI; + const PipelineOptions &PO; + llvm::SmallVector<std::unique_ptr<View>, 8> Views; + + void printRegionHeader(llvm::raw_ostream &OS) const; + json::Object getJSONReportRegion() const; + json::Object getJSONTargetInfo() const; + json::Object getJSONSimulationParameters() const; + +public: + PipelinePrinter(Pipeline &Pipe, const CodeRegion &R, unsigned Idx, + const MCSubtargetInfo &STI, const PipelineOptions &PO) + : P(Pipe), Region(R), RegionIdx(Idx), STI(STI), PO(PO) {} + + void addView(std::unique_ptr<View> V) { + P.addEventListener(V.get()); + Views.emplace_back(std::move(V)); + } + + void printReport(llvm::raw_ostream &OS) const; + void printReport(json::Object &JO) const; +}; +} // namespace mca +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.cpp new file mode 100644 index 0000000000..dc0a07e75e --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.cpp @@ -0,0 +1,644 @@ +//===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the functionalities used by the BottleneckAnalysis +/// to report bottleneck info. +/// +//===----------------------------------------------------------------------===// + +#include "Views/BottleneckAnalysis.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/Format.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +PressureTracker::PressureTracker(const MCSchedModel &Model) + : SM(Model), + ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), + ProcResID2Mask(Model.getNumProcResourceKinds(), 0), + ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), + ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) { + computeProcResourceMasks(SM, ProcResID2Mask); + + // Ignore the invalid resource at index zero. + unsigned NextResourceUsersIdx = 0; + for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx; + NextResourceUsersIdx += ProcResource.NumUnits; + uint64_t ResourceMask = ProcResID2Mask[I]; + ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I; + } + + ResourceUsers.resize(NextResourceUsersIdx); + std::fill(ResourceUsers.begin(), ResourceUsers.end(), + std::make_pair<unsigned, unsigned>(~0U, 0U)); +} + +void PressureTracker::getResourceUsers(uint64_t ResourceMask, + SmallVectorImpl<User> &Users) const { + unsigned Index = getResourceStateIndex(ResourceMask); + unsigned ProcResID = ResIdx2ProcResID[Index]; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); + for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { + const User U = getResourceUser(ProcResID, I); + if (U.second && IPI.find(U.first) != IPI.end()) + Users.emplace_back(U); + } +} + +void PressureTracker::onInstructionDispatched(unsigned IID) { + IPI.insert(std::make_pair(IID, InstructionPressureInfo())); +} + +void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); } + +void PressureTracker::handleInstructionIssuedEvent( + const HWInstructionIssuedEvent &Event) { + unsigned IID = Event.IR.getSourceIndex(); + for (const ResourceUse &Use : Event.UsedResources) { + const ResourceRef &RR = Use.first; + unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; + Index += countTrailingZeros(RR.second); + ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator()); + } +} + +void PressureTracker::updateResourcePressureDistribution( + uint64_t CumulativeMask) { + while (CumulativeMask) { + uint64_t Current = CumulativeMask & (-CumulativeMask); + unsigned ResIdx = getResourceStateIndex(Current); + unsigned ProcResID = ResIdx2ProcResID[ResIdx]; + uint64_t Mask = ProcResID2Mask[ProcResID]; + + if (Mask == Current) { + ResourcePressureDistribution[ProcResID]++; + CumulativeMask ^= Current; + continue; + } + + Mask ^= Current; + while (Mask) { + uint64_t SubUnit = Mask & (-Mask); + ResIdx = getResourceStateIndex(SubUnit); + ProcResID = ResIdx2ProcResID[ResIdx]; + ResourcePressureDistribution[ProcResID]++; + Mask ^= SubUnit; + } + + CumulativeMask ^= Current; + } +} + +void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { + assert(Event.Reason != HWPressureEvent::INVALID && + "Unexpected invalid event!"); + + switch (Event.Reason) { + default: + break; + + case HWPressureEvent::RESOURCES: { + const uint64_t ResourceMask = Event.ResourceMask; + updateResourcePressureDistribution(Event.ResourceMask); + + for (const InstRef &IR : Event.AffectedInstructions) { + const Instruction &IS = *IR.getInstruction(); + unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask; + if (!BusyResources) + continue; + + unsigned IID = IR.getSourceIndex(); + IPI[IID].ResourcePressureCycles++; + } + break; + } + + case HWPressureEvent::REGISTER_DEPS: + for (const InstRef &IR : Event.AffectedInstructions) { + unsigned IID = IR.getSourceIndex(); + IPI[IID].RegisterPressureCycles++; + } + break; + + case HWPressureEvent::MEMORY_DEPS: + for (const InstRef &IR : Event.AffectedInstructions) { + unsigned IID = IR.getSourceIndex(); + IPI[IID].MemoryPressureCycles++; + } + } +} + +#ifndef NDEBUG +void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, + const DependencyEdge &DepEdge, + MCInstPrinter &MCIP) const { + unsigned FromIID = DepEdge.FromIID; + unsigned ToIID = DepEdge.ToIID; + assert(FromIID < ToIID && "Graph should be acyclic!"); + + const DependencyEdge::Dependency &DE = DepEdge.Dep; + assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!"); + + OS << " FROM: " << FromIID << " TO: " << ToIID << " "; + if (DE.Type == DependencyEdge::DT_REGISTER) { + OS << " - REGISTER: "; + MCIP.printRegName(OS, DE.ResourceOrRegID); + } else if (DE.Type == DependencyEdge::DT_MEMORY) { + OS << " - MEMORY"; + } else { + assert(DE.Type == DependencyEdge::DT_RESOURCE && + "Unsupported dependency type!"); + OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; + } + OS << " - COST: " << DE.Cost << '\n'; +} +#endif // NDEBUG + +void DependencyGraph::pruneEdges(unsigned Iterations) { + for (DGNode &N : Nodes) { + unsigned NumPruned = 0; + const unsigned Size = N.OutgoingEdges.size(); + // Use a cut-off threshold to prune edges with a low frequency. + for (unsigned I = 0, E = Size; I < E; ++I) { + DependencyEdge &Edge = N.OutgoingEdges[I]; + if (Edge.Frequency == Iterations) + continue; + double Factor = (double)Edge.Frequency / Iterations; + if (0.10 < Factor) + continue; + Nodes[Edge.ToIID].NumPredecessors--; + std::swap(Edge, N.OutgoingEdges[E - 1]); + --E; + ++NumPruned; + } + + if (NumPruned) + N.OutgoingEdges.resize(Size - NumPruned); + } +} + +void DependencyGraph::initializeRootSet( + SmallVectorImpl<unsigned> &RootSet) const { + for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { + const DGNode &N = Nodes[I]; + if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty()) + RootSet.emplace_back(I); + } +} + +void DependencyGraph::propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet, + unsigned Iterations) { + SmallVector<unsigned, 8> ToVisit; + + // A critical sequence is computed as the longest path from a node of the + // RootSet to a leaf node (i.e. a node with no successors). The RootSet is + // composed of nodes with at least one successor, and no predecessors. + // + // Each node of the graph starts with an initial default cost of zero. The + // cost of a node is a measure of criticality: the higher the cost, the bigger + // is the performance impact. + // For register and memory dependencies, the cost is a function of the write + // latency as well as the actual delay (in cycles) caused to users. + // For processor resource dependencies, the cost is a function of the resource + // pressure. Resource interferences with low frequency values are ignored. + // + // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of + // the inner loop selects (i.e. visits) a node N from a set of `unvisited + // nodes`, and then propagates the cost of N to all its neighbors. + // + // The `unvisited nodes` set initially contains all the nodes from the + // RootSet. A node N is added to the `unvisited nodes` if all its + // predecessors have been visited already. + // + // For simplicity, every node tracks the number of unvisited incoming edges in + // field `NumVisitedPredecessors`. When the value of that field drops to + // zero, then the corresponding node is added to a `ToVisit` set. + // + // At the end of every iteration of the outer loop, set `ToVisit` becomes our + // new `unvisited nodes` set. + // + // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet) + // is empty. This algorithm works under the assumption that the graph is + // acyclic. + do { + for (unsigned IID : RootSet) { + const DGNode &N = Nodes[IID]; + for (const DependencyEdge &DepEdge : N.OutgoingEdges) { + unsigned ToIID = DepEdge.ToIID; + DGNode &To = Nodes[ToIID]; + uint64_t Cost = N.Cost + DepEdge.Dep.Cost; + // Check if this is the most expensive incoming edge seen so far. In + // case, update the total cost of the destination node (ToIID), as well + // its field `CriticalPredecessor`. + if (Cost > To.Cost) { + To.CriticalPredecessor = DepEdge; + To.Cost = Cost; + To.Depth = N.Depth + 1; + } + To.NumVisitedPredecessors++; + if (To.NumVisitedPredecessors == To.NumPredecessors) + ToVisit.emplace_back(ToIID); + } + } + + std::swap(RootSet, ToVisit); + ToVisit.clear(); + } while (!RootSet.empty()); +} + +void DependencyGraph::getCriticalSequence( + SmallVectorImpl<const DependencyEdge *> &Seq) const { + // At this stage, nodes of the graph have been already visited, and costs have + // been propagated through the edges (see method `propagateThroughEdges()`). + + // Identify the node N with the highest cost in the graph. By construction, + // that node is the last instruction of our critical sequence. + // Field N.Depth would tell us the total length of the sequence. + // + // To obtain the sequence of critical edges, we simply follow the chain of + // critical predecessors starting from node N (field + // DGNode::CriticalPredecessor). + const auto It = std::max_element( + Nodes.begin(), Nodes.end(), + [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; }); + unsigned IID = std::distance(Nodes.begin(), It); + Seq.resize(Nodes[IID].Depth); + for (const DependencyEdge *&DE : llvm::reverse(Seq)) { + const DGNode &N = Nodes[IID]; + DE = &N.CriticalPredecessor; + IID = N.CriticalPredecessor.FromIID; + } +} + +void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS, + const MCInst &MCI, + bool UseDifferentColor) const { + FOS.PadToColumn(14); + if (UseDifferentColor) + FOS.changeColor(raw_ostream::CYAN, true, false); + FOS << printInstructionString(MCI); + if (UseDifferentColor) + FOS.resetColor(); +} + +void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { + // Early exit if no bottlenecks were found during the simulation. + if (!SeenStallCycles || !BPI.PressureIncreaseCycles) + return; + + SmallVector<const DependencyEdge *, 16> Seq; + DG.getCriticalSequence(Seq); + if (Seq.empty()) + return; + + OS << "\nCritical sequence based on the simulation:\n\n"; + + const DependencyEdge &FirstEdge = *Seq[0]; + ArrayRef<llvm::MCInst> Source = getSource(); + unsigned FromIID = FirstEdge.FromIID % Source.size(); + unsigned ToIID = FirstEdge.ToIID % Source.size(); + bool IsLoopCarried = FromIID >= ToIID; + + formatted_raw_ostream FOS(OS); + FOS.PadToColumn(14); + FOS << "Instruction"; + FOS.PadToColumn(58); + FOS << "Dependency Information"; + + bool HasColors = FOS.has_colors(); + + unsigned CurrentIID = 0; + if (IsLoopCarried) { + FOS << "\n +----< " << FromIID << "."; + printInstruction(FOS, Source[FromIID], HasColors); + FOS << "\n |\n | < loop carried > \n |"; + } else { + while (CurrentIID < FromIID) { + FOS << "\n " << CurrentIID << "."; + printInstruction(FOS, Source[CurrentIID]); + CurrentIID++; + } + + FOS << "\n +----< " << CurrentIID << "."; + printInstruction(FOS, Source[CurrentIID], HasColors); + CurrentIID++; + } + + for (const DependencyEdge *&DE : Seq) { + ToIID = DE->ToIID % Source.size(); + unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID; + + while (CurrentIID < LastIID) { + FOS << "\n | " << CurrentIID << "."; + printInstruction(FOS, Source[CurrentIID]); + CurrentIID++; + } + + if (CurrentIID == ToIID) { + FOS << "\n +----> " << ToIID << "."; + printInstruction(FOS, Source[CurrentIID], HasColors); + } else { + FOS << "\n |\n | < loop carried > \n |" + << "\n +----> " << ToIID << "."; + printInstruction(FOS, Source[ToIID], HasColors); + } + FOS.PadToColumn(58); + + const DependencyEdge::Dependency &Dep = DE->Dep; + if (HasColors) + FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); + + if (Dep.Type == DependencyEdge::DT_REGISTER) { + FOS << "## REGISTER dependency: "; + if (HasColors) + FOS.changeColor(raw_ostream::MAGENTA, true, false); + getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID); + } else if (Dep.Type == DependencyEdge::DT_MEMORY) { + FOS << "## MEMORY dependency."; + } else { + assert(Dep.Type == DependencyEdge::DT_RESOURCE && + "Unsupported dependency type!"); + FOS << "## RESOURCE interference: "; + if (HasColors) + FOS.changeColor(raw_ostream::MAGENTA, true, false); + FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID); + if (HasColors) { + FOS.resetColor(); + FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); + } + FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations) + << "% ]"; + } + if (HasColors) + FOS.resetColor(); + ++CurrentIID; + } + + while (CurrentIID < Source.size()) { + FOS << "\n " << CurrentIID << "."; + printInstruction(FOS, Source[CurrentIID]); + CurrentIID++; + } + + FOS << '\n'; + FOS.flush(); +} + +#ifndef NDEBUG +void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { + OS << "\nREG DEPS\n"; + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_REGISTER) + dumpDependencyEdge(OS, DE, MCIP); + + OS << "\nMEM DEPS\n"; + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_MEMORY) + dumpDependencyEdge(OS, DE, MCIP); + + OS << "\nRESOURCE DEPS\n"; + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) + dumpDependencyEdge(OS, DE, MCIP); +} +#endif // NDEBUG + +void DependencyGraph::addDependency(unsigned From, unsigned To, + DependencyEdge::Dependency &&Dep) { + DGNode &NodeFrom = Nodes[From]; + DGNode &NodeTo = Nodes[To]; + SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; + + auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) { + return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; + }); + + if (It != Vec.end()) { + It->Dep.Cost += Dep.Cost; + It->Frequency++; + return; + } + + DependencyEdge DE = {Dep, From, To, 1}; + Vec.emplace_back(DE); + NodeTo.NumPredecessors++; +} + +BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, + MCInstPrinter &Printer, + ArrayRef<MCInst> S, unsigned NumIter) + : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()), + DG(S.size() * 3), Iterations(NumIter), TotalCycles(0), + PressureIncreasedBecauseOfResources(false), + PressureIncreasedBecauseOfRegisterDependencies(false), + PressureIncreasedBecauseOfMemoryDependencies(false), + SeenStallCycles(false), BPI() {} + +void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, + unsigned RegID, unsigned Cost) { + bool IsLoopCarried = From >= To; + unsigned SourceSize = getSource().size(); + if (IsLoopCarried) { + DG.addRegisterDep(From, To + SourceSize, RegID, Cost); + DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); + return; + } + DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost); +} + +void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, + unsigned Cost) { + bool IsLoopCarried = From >= To; + unsigned SourceSize = getSource().size(); + if (IsLoopCarried) { + DG.addMemoryDep(From, To + SourceSize, Cost); + DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); + return; + } + DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost); +} + +void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, + uint64_t Mask, unsigned Cost) { + bool IsLoopCarried = From >= To; + unsigned SourceSize = getSource().size(); + if (IsLoopCarried) { + DG.addResourceDep(From, To + SourceSize, Mask, Cost); + DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); + return; + } + DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost); +} + +void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { + const unsigned IID = Event.IR.getSourceIndex(); + if (Event.Type == HWInstructionEvent::Dispatched) { + Tracker.onInstructionDispatched(IID); + return; + } + if (Event.Type == HWInstructionEvent::Executed) { + Tracker.onInstructionExecuted(IID); + return; + } + + if (Event.Type != HWInstructionEvent::Issued) + return; + + ArrayRef<llvm::MCInst> Source = getSource(); + const Instruction &IS = *Event.IR.getInstruction(); + unsigned To = IID % Source.size(); + + unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID); + uint64_t ResourceMask = IS.getCriticalResourceMask(); + SmallVector<std::pair<unsigned, unsigned>, 4> Users; + while (ResourceMask) { + uint64_t Current = ResourceMask & (-ResourceMask); + Tracker.getResourceUsers(Current, Users); + for (const std::pair<unsigned, unsigned> &U : Users) + addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles); + Users.clear(); + ResourceMask ^= Current; + } + + const CriticalDependency &RegDep = IS.getCriticalRegDep(); + if (RegDep.Cycles) { + Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID); + unsigned From = RegDep.IID % Source.size(); + addRegisterDep(From, To, RegDep.RegID, Cycles); + } + + const CriticalDependency &MemDep = IS.getCriticalMemDep(); + if (MemDep.Cycles) { + Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID); + unsigned From = MemDep.IID % Source.size(); + addMemoryDep(From, To, Cycles); + } + + Tracker.handleInstructionIssuedEvent( + static_cast<const HWInstructionIssuedEvent &>(Event)); + + // Check if this is the last simulated instruction. + if (IID == ((Iterations * Source.size()) - 1)) + DG.finalizeGraph(Iterations); +} + +void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { + assert(Event.Reason != HWPressureEvent::INVALID && + "Unexpected invalid event!"); + + Tracker.handlePressureEvent(Event); + + switch (Event.Reason) { + default: + break; + + case HWPressureEvent::RESOURCES: + PressureIncreasedBecauseOfResources = true; + break; + case HWPressureEvent::REGISTER_DEPS: + PressureIncreasedBecauseOfRegisterDependencies = true; + break; + case HWPressureEvent::MEMORY_DEPS: + PressureIncreasedBecauseOfMemoryDependencies = true; + break; + } +} + +void BottleneckAnalysis::onCycleEnd() { + ++TotalCycles; + + bool PressureIncreasedBecauseOfDataDependencies = + PressureIncreasedBecauseOfRegisterDependencies || + PressureIncreasedBecauseOfMemoryDependencies; + if (!PressureIncreasedBecauseOfResources && + !PressureIncreasedBecauseOfDataDependencies) + return; + + ++BPI.PressureIncreaseCycles; + if (PressureIncreasedBecauseOfRegisterDependencies) + ++BPI.RegisterDependencyCycles; + if (PressureIncreasedBecauseOfMemoryDependencies) + ++BPI.MemoryDependencyCycles; + if (PressureIncreasedBecauseOfDataDependencies) + ++BPI.DataDependencyCycles; + if (PressureIncreasedBecauseOfResources) + ++BPI.ResourcePressureCycles; + PressureIncreasedBecauseOfResources = false; + PressureIncreasedBecauseOfRegisterDependencies = false; + PressureIncreasedBecauseOfMemoryDependencies = false; +} + +void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { + if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { + OS << "\n\nNo resource or data dependency bottlenecks discovered.\n"; + return; + } + + double PressurePerCycle = + (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; + double ResourcePressurePerCycle = + (double)BPI.ResourcePressureCycles * 100 / TotalCycles; + double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; + double RegDepPressurePerCycle = + (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; + double MemDepPressurePerCycle = + (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; + + OS << "\n\nCycles with backend pressure increase [ " + << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]"; + + OS << "\nThroughput Bottlenecks: " + << "\n Resource Pressure [ " + << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100) + << "% ]"; + + if (BPI.PressureIncreaseCycles) { + ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); + const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); + for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { + unsigned ResourceCycles = Distribution[I]; + if (ResourceCycles) { + double Frequency = (double)ResourceCycles * 100 / TotalCycles; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(I); + OS << "\n - " << PRDesc.Name << " [ " + << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; + } + } + } + + OS << "\n Data Dependencies: [ " + << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; + OS << "\n - Register Dependencies [ " + << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) + << "% ]"; + OS << "\n - Memory Dependencies [ " + << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) + << "% ]\n"; +} + +void BottleneckAnalysis::printView(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + printBottleneckHints(TempStream); + TempStream.flush(); + OS << Buffer; + printCriticalSequence(OS); +} + +} // namespace mca. +} // namespace llvm diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.h b/contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.h new file mode 100644 index 0000000000..cd5af0afcf --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/BottleneckAnalysis.h @@ -0,0 +1,348 @@ +//===--------------------- BottleneckAnalysis.h -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the bottleneck analysis view. +/// +/// This view internally observes backend pressure increase events in order to +/// identify problematic data dependencies and processor resource interferences. +/// +/// Example of bottleneck analysis report for a dot-product on X86 btver2: +/// +/// Cycles with backend pressure increase [ 40.76% ] +/// Throughput Bottlenecks: +/// Resource Pressure [ 39.34% ] +/// - JFPA [ 39.34% ] +/// - JFPU0 [ 39.34% ] +/// Data Dependencies: [ 1.42% ] +/// - Register Dependencies [ 1.42% ] +/// - Memory Dependencies [ 0.00% ] +/// +/// According to the example, backend pressure increased during the 40.76% of +/// the simulated cycles. In particular, the major cause of backend pressure +/// increases was the contention on floating point adder JFPA accessible from +/// pipeline resource JFPU0. +/// +/// At the end of each cycle, if pressure on the simulated out-of-order buffers +/// has increased, a backend pressure event is reported. +/// In particular, this occurs when there is a delta between the number of uOps +/// dispatched and the number of uOps issued to the underlying pipelines. +/// +/// The bottleneck analysis view is also responsible for identifying and +/// printing the most "critical" sequence of dependent instructions according to +/// the simulated run. +/// +/// Below is the critical sequence computed for the dot-product example on +/// btver2: +/// +/// Instruction Dependency Information +/// +----< 2. vhaddps %xmm3, %xmm3, %xmm4 +/// | +/// | < loop carried > +/// | +/// | 0. vmulps %xmm0, %xmm0, %xmm2 +/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ] +/// +----> 2. vhaddps %xmm3, %xmm3, %xmm4 ## REGISTER dependency: %xmm3 +/// | +/// | < loop carried > +/// | +/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ] +/// +/// +/// The algorithm that computes the critical sequence is very similar to a +/// critical path analysis. +/// +/// A dependency graph is used internally to track dependencies between nodes. +/// Nodes of the graph represent instructions from the input assembly sequence, +/// and edges of the graph represent data dependencies or processor resource +/// interferences. +/// +/// Edges are dynamically 'discovered' by observing instruction state +/// transitions and backend pressure increase events. Edges are internally +/// ranked based on their "criticality". A dependency is considered to be +/// critical if it takes a long time to execute, and if it contributes to +/// backend pressure increases. Criticality is internally measured in terms of +/// cycles; it is computed for every edge in the graph as a function of the edge +/// latency and the number of backend pressure increase cycles contributed by +/// that edge. +/// +/// At the end of simulation, costs are propagated to nodes through the edges of +/// the graph, and the most expensive path connecting the root-set (a +/// set of nodes with no predecessors) to a leaf node is reported as critical +/// sequence. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H +#define LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H + +#include "Views/InstructionView.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +class PressureTracker { + const MCSchedModel &SM; + + // Resource pressure distribution. There is an element for every processor + // resource declared by the scheduling model. Quantities are number of cycles. + SmallVector<unsigned, 4> ResourcePressureDistribution; + + // Each processor resource is associated with a so-called processor resource + // mask. This vector allows to correlate processor resource IDs with processor + // resource masks. There is exactly one element per each processor resource + // declared by the scheduling model. + SmallVector<uint64_t, 4> ProcResID2Mask; + + // Maps processor resource state indices (returned by calls to + // `getResourceStateIndex(Mask)` to processor resource identifiers. + SmallVector<unsigned, 4> ResIdx2ProcResID; + + // Maps Processor Resource identifiers to ResourceUsers indices. + SmallVector<unsigned, 4> ProcResID2ResourceUsersIndex; + + // Identifies the last user of a processor resource unit. + // This vector is updated on every instruction issued event. + // There is one entry for every processor resource unit declared by the + // processor model. An all_ones value is treated like an invalid instruction + // identifier. + using User = std::pair<unsigned, unsigned>; + SmallVector<User, 4> ResourceUsers; + + struct InstructionPressureInfo { + unsigned RegisterPressureCycles; + unsigned MemoryPressureCycles; + unsigned ResourcePressureCycles; + }; + DenseMap<unsigned, InstructionPressureInfo> IPI; + + void updateResourcePressureDistribution(uint64_t CumulativeMask); + + User getResourceUser(unsigned ProcResID, unsigned UnitID) const { + unsigned Index = ProcResID2ResourceUsersIndex[ProcResID]; + return ResourceUsers[Index + UnitID]; + } + +public: + PressureTracker(const MCSchedModel &Model); + + ArrayRef<unsigned> getResourcePressureDistribution() const { + return ResourcePressureDistribution; + } + + void getResourceUsers(uint64_t ResourceMask, + SmallVectorImpl<User> &Users) const; + + unsigned getRegisterPressureCycles(unsigned IID) const { + assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!"); + const InstructionPressureInfo &Info = IPI.find(IID)->second; + return Info.RegisterPressureCycles; + } + + unsigned getMemoryPressureCycles(unsigned IID) const { + assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!"); + const InstructionPressureInfo &Info = IPI.find(IID)->second; + return Info.MemoryPressureCycles; + } + + unsigned getResourcePressureCycles(unsigned IID) const { + assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!"); + const InstructionPressureInfo &Info = IPI.find(IID)->second; + return Info.ResourcePressureCycles; + } + + const char *resolveResourceName(uint64_t ResourceMask) const { + unsigned Index = getResourceStateIndex(ResourceMask); + unsigned ProcResID = ResIdx2ProcResID[Index]; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); + return PRDesc.Name; + } + + void onInstructionDispatched(unsigned IID); + void onInstructionExecuted(unsigned IID); + + void handlePressureEvent(const HWPressureEvent &Event); + void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event); +}; + +// A dependency edge. +struct DependencyEdge { + enum DependencyType { DT_INVALID, DT_REGISTER, DT_MEMORY, DT_RESOURCE }; + + // Dependency edge descriptor. + // + // It specifies the dependency type, as well as the edge cost in cycles. + struct Dependency { + DependencyType Type; + uint64_t ResourceOrRegID; + uint64_t Cost; + }; + Dependency Dep; + + unsigned FromIID; + unsigned ToIID; + + // Used by the bottleneck analysis to compute the interference + // probability for processor resources. + unsigned Frequency; +}; + +// A dependency graph used by the bottleneck analysis to describe data +// dependencies and processor resource interferences between instructions. +// +// There is a node (an instance of struct DGNode) for every instruction in the +// input assembly sequence. Edges of the graph represent dependencies between +// instructions. +// +// Each edge of the graph is associated with a cost value which is used +// internally to rank dependency based on their impact on the runtime +// performance (see field DependencyEdge::Dependency::Cost). In general, the +// higher the cost of an edge, the higher the impact on performance. +// +// The cost of a dependency is a function of both the latency and the number of +// cycles where the dependency has been seen as critical (i.e. contributing to +// back-pressure increases). +// +// Loop carried dependencies are carefully expanded by the bottleneck analysis +// to guarantee that the graph stays acyclic. To this end, extra nodes are +// pre-allocated at construction time to describe instructions from "past and +// future" iterations. The graph is kept acyclic mainly because it simplifies +// the complexity of the algorithm that computes the critical sequence. +class DependencyGraph { + struct DGNode { + unsigned NumPredecessors; + unsigned NumVisitedPredecessors; + uint64_t Cost; + unsigned Depth; + + DependencyEdge CriticalPredecessor; + SmallVector<DependencyEdge, 8> OutgoingEdges; + }; + SmallVector<DGNode, 16> Nodes; + + DependencyGraph(const DependencyGraph &) = delete; + DependencyGraph &operator=(const DependencyGraph &) = delete; + + void addDependency(unsigned From, unsigned To, + DependencyEdge::Dependency &&DE); + + void pruneEdges(unsigned Iterations); + void initializeRootSet(SmallVectorImpl<unsigned> &RootSet) const; + void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet, + unsigned Iterations); + +#ifndef NDEBUG + void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE, + MCInstPrinter &MCIP) const; +#endif + +public: + DependencyGraph(unsigned Size) : Nodes(Size) {} + + void addRegisterDep(unsigned From, unsigned To, unsigned RegID, + unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_REGISTER, RegID, Cost}); + } + + void addMemoryDep(unsigned From, unsigned To, unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_MEMORY, /* unused */ 0, Cost}); + } + + void addResourceDep(unsigned From, unsigned To, uint64_t Mask, + unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_RESOURCE, Mask, Cost}); + } + + // Called by the bottleneck analysis at the end of simulation to propagate + // costs through the edges of the graph, and compute a critical path. + void finalizeGraph(unsigned Iterations) { + SmallVector<unsigned, 16> RootSet; + pruneEdges(Iterations); + initializeRootSet(RootSet); + propagateThroughEdges(RootSet, Iterations); + } + + // Returns a sequence of edges representing the critical sequence based on the + // simulated run. It assumes that the graph has already been finalized (i.e. + // method `finalizeGraph()` has already been called on this graph). + void getCriticalSequence(SmallVectorImpl<const DependencyEdge *> &Seq) const; + +#ifndef NDEBUG + void dump(raw_ostream &OS, MCInstPrinter &MCIP) const; +#endif +}; + +/// A view that collects and prints a few performance numbers. +class BottleneckAnalysis : public InstructionView { + PressureTracker Tracker; + DependencyGraph DG; + + unsigned Iterations; + unsigned TotalCycles; + + bool PressureIncreasedBecauseOfResources; + bool PressureIncreasedBecauseOfRegisterDependencies; + bool PressureIncreasedBecauseOfMemoryDependencies; + // True if throughput was affected by dispatch stalls. + bool SeenStallCycles; + + struct BackPressureInfo { + // Cycles where backpressure increased. + unsigned PressureIncreaseCycles; + // Cycles where backpressure increased because of pipeline pressure. + unsigned ResourcePressureCycles; + // Cycles where backpressure increased because of data dependencies. + unsigned DataDependencyCycles; + // Cycles where backpressure increased because of register dependencies. + unsigned RegisterDependencyCycles; + // Cycles where backpressure increased because of memory dependencies. + unsigned MemoryDependencyCycles; + }; + BackPressureInfo BPI; + + // Used to populate the dependency graph DG. + void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy); + void addMemoryDep(unsigned From, unsigned To, unsigned Cy); + void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy); + + void printInstruction(formatted_raw_ostream &FOS, const MCInst &MCI, + bool UseDifferentColor = false) const; + + // Prints a bottleneck message to OS. + void printBottleneckHints(raw_ostream &OS) const; + void printCriticalSequence(raw_ostream &OS) const; + +public: + BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP, + ArrayRef<MCInst> Sequence, unsigned Iterations); + + void onCycleEnd() override; + void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; } + void onEvent(const HWPressureEvent &Event) override; + void onEvent(const HWInstructionEvent &Event) override; + + void printView(raw_ostream &OS) const override; + StringRef getNameAsString() const override { return "BottleneckAnalysis"; } + bool isSerializable() const override { return false; } + +#ifndef NDEBUG + void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); } +#endif +}; + +} // namespace mca +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.cpp new file mode 100644 index 0000000000..3dc17c8754 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.cpp @@ -0,0 +1,98 @@ +//===--------------------- DispatchStatistics.cpp ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the DispatchStatistics interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/DispatchStatistics.h" +#include "llvm/Support/Format.h" + +namespace llvm { +namespace mca { + +void DispatchStatistics::onEvent(const HWStallEvent &Event) { + if (Event.Type < HWStallEvent::LastGenericEvent) + HWStalls[Event.Type]++; +} + +void DispatchStatistics::onEvent(const HWInstructionEvent &Event) { + if (Event.Type != HWInstructionEvent::Dispatched) + return; + + const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event); + NumDispatched += DE.MicroOpcodes; +} + +void DispatchStatistics::printDispatchHistogram(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + TempStream << "\n\nDispatch Logic - " + << "number of cycles where we saw N micro opcodes dispatched:\n"; + TempStream << "[# dispatched], [# cycles]\n"; + for (const std::pair<const unsigned, unsigned> &Entry : + DispatchGroupSizePerCycle) { + double Percentage = ((double)Entry.second / NumCycles) * 100.0; + TempStream << " " << Entry.first << ", " << Entry.second + << " (" << format("%.1f", floor((Percentage * 10) + 0.5) / 10) + << "%)\n"; + } + + TempStream.flush(); + OS << Buffer; +} + +static void printStalls(raw_ostream &OS, unsigned NumStalls, + unsigned NumCycles) { + if (!NumStalls) { + OS << NumStalls; + return; + } + + double Percentage = ((double)NumStalls / NumCycles) * 100.0; + OS << NumStalls << " (" + << format("%.1f", floor((Percentage * 10) + 0.5) / 10) << "%)"; +} + +void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream SS(Buffer); + SS << "\n\nDynamic Dispatch Stall Cycles:\n"; + SS << "RAT - Register unavailable: "; + printStalls(SS, HWStalls[HWStallEvent::RegisterFileStall], NumCycles); + SS << "\nRCU - Retire tokens unavailable: "; + printStalls(SS, HWStalls[HWStallEvent::RetireControlUnitStall], NumCycles); + SS << "\nSCHEDQ - Scheduler full: "; + printStalls(SS, HWStalls[HWStallEvent::SchedulerQueueFull], NumCycles); + SS << "\nLQ - Load queue full: "; + printStalls(SS, HWStalls[HWStallEvent::LoadQueueFull], NumCycles); + SS << "\nSQ - Store queue full: "; + printStalls(SS, HWStalls[HWStallEvent::StoreQueueFull], NumCycles); + SS << "\nGROUP - Static restrictions on the dispatch group: "; + printStalls(SS, HWStalls[HWStallEvent::DispatchGroupStall], NumCycles); + SS << "\nUSH - Uncategorised Structural Hazard: "; + printStalls(SS, HWStalls[HWStallEvent::CustomBehaviourStall], NumCycles); + SS << '\n'; + SS.flush(); + OS << Buffer; +} + +json::Value DispatchStatistics::toJSON() const { + json::Object JO({{"RAT", HWStalls[HWStallEvent::RegisterFileStall]}, + {"RCU", HWStalls[HWStallEvent::RetireControlUnitStall]}, + {"SCHEDQ", HWStalls[HWStallEvent::SchedulerQueueFull]}, + {"LQ", HWStalls[HWStallEvent::LoadQueueFull]}, + {"SQ", HWStalls[HWStallEvent::StoreQueueFull]}, + {"GROUP", HWStalls[HWStallEvent::DispatchGroupStall]}, + {"USH", HWStalls[HWStallEvent::CustomBehaviourStall]}}); + return JO; +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.h b/contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.h new file mode 100644 index 0000000000..cfd12691c0 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/DispatchStatistics.h @@ -0,0 +1,87 @@ +//===--------------------- DispatchStatistics.h -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements a view that prints a few statistics related to the +/// dispatch logic. It collects and analyzes instruction dispatch events as +/// well as static/dynamic dispatch stall events. +/// +/// Example: +/// ======== +/// +/// Dynamic Dispatch Stall Cycles: +/// RAT - Register unavailable: 0 +/// RCU - Retire tokens unavailable: 0 +/// SCHEDQ - Scheduler full: 42 +/// LQ - Load queue full: 0 +/// SQ - Store queue full: 0 +/// GROUP - Static restrictions on the dispatch group: 0 +/// +/// +/// Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: +/// [# dispatched], [# cycles] +/// 0, 15 (11.5%) +/// 2, 4 (3.1%) +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H +#define LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/View.h" +#include <map> + +namespace llvm { +namespace mca { + +class DispatchStatistics : public View { + unsigned NumDispatched; + unsigned NumCycles; + + // Counts dispatch stall events caused by unavailability of resources. There + // is one counter for every generic stall kind (see class HWStallEvent). + llvm::SmallVector<unsigned, 8> HWStalls; + + using Histogram = std::map<unsigned, unsigned>; + Histogram DispatchGroupSizePerCycle; + + void updateHistograms() { + DispatchGroupSizePerCycle[NumDispatched]++; + NumDispatched = 0; + } + + void printDispatchHistogram(llvm::raw_ostream &OS) const; + + void printDispatchStalls(llvm::raw_ostream &OS) const; + +public: + DispatchStatistics() + : NumDispatched(0), NumCycles(0), + HWStalls(HWStallEvent::LastGenericEvent) {} + + void onEvent(const HWStallEvent &Event) override; + + void onEvent(const HWInstructionEvent &Event) override; + + void onCycleBegin() override { NumCycles++; } + + void onCycleEnd() override { updateHistograms(); } + + void printView(llvm::raw_ostream &OS) const override { + printDispatchStalls(OS); + printDispatchHistogram(OS); + } + StringRef getNameAsString() const override { return "DispatchStatistics"; } + json::Value toJSON() const override; +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.cpp new file mode 100644 index 0000000000..257fdca8cb --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.cpp @@ -0,0 +1,177 @@ +//===--------------------- InstructionInfoView.cpp --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the InstructionInfoView API. +/// +//===----------------------------------------------------------------------===// + +#include "Views/InstructionInfoView.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/JSON.h" + +namespace llvm { +namespace mca { + +void InstructionInfoView::printView(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + + ArrayRef<llvm::MCInst> Source = getSource(); + if (!Source.size()) + return; + + IIVDVec IIVD(Source.size()); + collectData(IIVD); + + TempStream << "\n\nInstruction Info:\n"; + TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n" + << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n"; + if (PrintBarriers) { + TempStream << "[7]: LoadBarrier\n[8]: StoreBarrier\n"; + } + if (PrintEncodings) { + if (PrintBarriers) { + TempStream << "[9]: Encoding Size\n"; + TempStream << "\n[1] [2] [3] [4] [5] [6] [7] [8] " + << "[9] Encodings: Instructions:\n"; + } else { + TempStream << "[7]: Encoding Size\n"; + TempStream << "\n[1] [2] [3] [4] [5] [6] [7] " + << "Encodings: Instructions:\n"; + } + } else { + if (PrintBarriers) { + TempStream << "\n[1] [2] [3] [4] [5] [6] [7] [8] " + << "Instructions:\n"; + } else { + TempStream << "\n[1] [2] [3] [4] [5] [6] " + << "Instructions:\n"; + } + } + + int Index = 0; + for (const auto &I : enumerate(zip(IIVD, Source))) { + const InstructionInfoViewData &IIVDEntry = std::get<0>(I.value()); + + TempStream << ' ' << IIVDEntry.NumMicroOpcodes << " "; + if (IIVDEntry.NumMicroOpcodes < 10) + TempStream << " "; + else if (IIVDEntry.NumMicroOpcodes < 100) + TempStream << ' '; + TempStream << IIVDEntry.Latency << " "; + if (IIVDEntry.Latency < 10) + TempStream << " "; + else if (IIVDEntry.Latency < 100) + TempStream << ' '; + + if (IIVDEntry.RThroughput) { + double RT = *IIVDEntry.RThroughput; + TempStream << format("%.2f", RT) << ' '; + if (RT < 10.0) + TempStream << " "; + else if (RT < 100.0) + TempStream << ' '; + } else { + TempStream << " - "; + } + TempStream << (IIVDEntry.mayLoad ? " * " : " "); + TempStream << (IIVDEntry.mayStore ? " * " : " "); + TempStream << (IIVDEntry.hasUnmodeledSideEffects ? " U " : " "); + + if (PrintBarriers) { + TempStream << (LoweredInsts[Index]->isALoadBarrier() ? " * " + : " "); + TempStream << (LoweredInsts[Index]->isAStoreBarrier() ? " * " + : " "); + } + + if (PrintEncodings) { + StringRef Encoding(CE.getEncoding(I.index())); + unsigned EncodingSize = Encoding.size(); + TempStream << " " << EncodingSize + << (EncodingSize < 10 ? " " : " "); + TempStream.flush(); + formatted_raw_ostream FOS(TempStream); + for (unsigned i = 0, e = Encoding.size(); i != e; ++i) + FOS << format("%02x ", (uint8_t)Encoding[i]); + FOS.PadToColumn(30); + FOS.flush(); + } + + const MCInst &Inst = std::get<1>(I.value()); + TempStream << printInstructionString(Inst) << '\n'; + ++Index; + } + + TempStream.flush(); + OS << Buffer; +} + +void InstructionInfoView::collectData( + MutableArrayRef<InstructionInfoViewData> IIVD) const { + const llvm::MCSubtargetInfo &STI = getSubTargetInfo(); + const MCSchedModel &SM = STI.getSchedModel(); + for (const auto I : zip(getSource(), IIVD)) { + const MCInst &Inst = std::get<0>(I); + InstructionInfoViewData &IIVDEntry = std::get<1>(I); + const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode()); + + // Obtain the scheduling class information from the instruction. + unsigned SchedClassID = MCDesc.getSchedClass(); + unsigned CPUID = SM.getProcessorID(); + + // Try to solve variant scheduling classes. + while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) + SchedClassID = + STI.resolveVariantSchedClass(SchedClassID, &Inst, &MCII, CPUID); + + const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); + IIVDEntry.NumMicroOpcodes = SCDesc.NumMicroOps; + IIVDEntry.Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); + // Add extra latency due to delays in the forwarding data paths. + IIVDEntry.Latency += MCSchedModel::getForwardingDelayCycles( + STI.getReadAdvanceEntries(SCDesc)); + IIVDEntry.RThroughput = MCSchedModel::getReciprocalThroughput(STI, SCDesc); + IIVDEntry.mayLoad = MCDesc.mayLoad(); + IIVDEntry.mayStore = MCDesc.mayStore(); + IIVDEntry.hasUnmodeledSideEffects = MCDesc.hasUnmodeledSideEffects(); + } +} + +// Construct a JSON object from a single InstructionInfoViewData object. +json::Object +InstructionInfoView::toJSON(const InstructionInfoViewData &IIVD) const { + json::Object JO({{"NumMicroOpcodes", IIVD.NumMicroOpcodes}, + {"Latency", IIVD.Latency}, + {"mayLoad", IIVD.mayLoad}, + {"mayStore", IIVD.mayStore}, + {"hasUnmodeledSideEffects", IIVD.hasUnmodeledSideEffects}}); + JO.try_emplace("RThroughput", IIVD.RThroughput.value_or(0.0)); + return JO; +} + +json::Value InstructionInfoView::toJSON() const { + ArrayRef<llvm::MCInst> Source = getSource(); + if (!Source.size()) + return json::Value(0); + + IIVDVec IIVD(Source.size()); + collectData(IIVD); + + json::Array InstInfo; + for (const auto &I : enumerate(IIVD)) { + const InstructionInfoViewData &IIVDEntry = I.value(); + json::Object JO = toJSON(IIVDEntry); + JO.try_emplace("Instruction", (unsigned)I.index()); + InstInfo.push_back(std::move(JO)); + } + return json::Object({{"InstructionList", json::Value(std::move(InstInfo))}}); +} +} // namespace mca. +} // namespace llvm diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.h b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.h new file mode 100644 index 0000000000..bddd01a086 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionInfoView.h @@ -0,0 +1,93 @@ +//===--------------------- InstructionInfoView.h ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the instruction info view. +/// +/// The goal fo the instruction info view is to print the latency and reciprocal +/// throughput information for every instruction in the input sequence. +/// This section also reports extra information related to the number of micro +/// opcodes, and opcode properties (i.e. 'MayLoad', 'MayStore', 'HasSideEffects) +/// +/// Example: +/// +/// Instruction Info: +/// [1]: #uOps +/// [2]: Latency +/// [3]: RThroughput +/// [4]: MayLoad +/// [5]: MayStore +/// [6]: HasSideEffects +/// +/// [1] [2] [3] [4] [5] [6] Instructions: +/// 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2 +/// 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3 +/// 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H +#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H + +#include "Views/InstructionView.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/CodeEmitter.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +/// A view that prints out generic instruction information. +class InstructionInfoView : public InstructionView { + const llvm::MCInstrInfo &MCII; + CodeEmitter &CE; + bool PrintEncodings; + bool PrintBarriers; + using UniqueInst = std::unique_ptr<Instruction>; + ArrayRef<UniqueInst> LoweredInsts; + + struct InstructionInfoViewData { + unsigned NumMicroOpcodes = 0; + unsigned Latency = 0; + std::optional<double> RThroughput = 0.0; + bool mayLoad = false; + bool mayStore = false; + bool hasUnmodeledSideEffects = false; + }; + using IIVDVec = SmallVector<InstructionInfoViewData, 16>; + + /// Place the data into the array of InstructionInfoViewData IIVD. + void collectData(MutableArrayRef<InstructionInfoViewData> IIVD) const; + +public: + InstructionInfoView(const llvm::MCSubtargetInfo &ST, + const llvm::MCInstrInfo &II, CodeEmitter &C, + bool ShouldPrintEncodings, llvm::ArrayRef<llvm::MCInst> S, + llvm::MCInstPrinter &IP, + ArrayRef<UniqueInst> LoweredInsts, + bool ShouldPrintBarriers) + : InstructionView(ST, IP, S), MCII(II), CE(C), + PrintEncodings(ShouldPrintEncodings), + PrintBarriers(ShouldPrintBarriers), LoweredInsts(LoweredInsts) {} + + void printView(llvm::raw_ostream &OS) const override; + StringRef getNameAsString() const override { return "InstructionInfoView"; } + json::Value toJSON() const override; + json::Object toJSON(const InstructionInfoViewData &IIVD) const; +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.cpp new file mode 100644 index 0000000000..3b174a0649 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.cpp @@ -0,0 +1,43 @@ +//===----------------------- InstructionView.cpp ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the member functions of the class InstructionView. +/// +//===----------------------------------------------------------------------===// + +#include "Views/InstructionView.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { +namespace mca { + +InstructionView::~InstructionView() = default; + +StringRef +InstructionView::printInstructionString(const llvm::MCInst &MCI) const { + InstructionString = ""; + MCIP.printInst(&MCI, 0, "", STI, InstrStream); + InstrStream.flush(); + // Remove any tabs or spaces at the beginning of the instruction. + return StringRef(InstructionString).ltrim(); +} + +json::Value InstructionView::toJSON() const { + json::Array SourceInfo; + for (const auto &MCI : getSource()) { + StringRef Instruction = printInstructionString(MCI); + SourceInfo.push_back(Instruction.str()); + } + return SourceInfo; +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.h b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.h new file mode 100644 index 0000000000..ae57246fc3 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/InstructionView.h @@ -0,0 +1,60 @@ +//===----------------------- InstructionView.h ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the main interface for Views that examine and reference +/// a sequence of machine instructions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONVIEW_H +#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONVIEW_H + +#include "llvm/MCA/View.h" +#include "llvm/Support/JSON.h" + +namespace llvm { +class MCInstPrinter; + +namespace mca { + +// The base class for views that deal with individual machine instructions. +class InstructionView : public View { + const llvm::MCSubtargetInfo &STI; + llvm::MCInstPrinter &MCIP; + llvm::ArrayRef<llvm::MCInst> Source; + + mutable std::string InstructionString; + mutable raw_string_ostream InstrStream; + +public: + void printView(llvm::raw_ostream &) const override {} + InstructionView(const llvm::MCSubtargetInfo &STI, + llvm::MCInstPrinter &Printer, llvm::ArrayRef<llvm::MCInst> S) + : STI(STI), MCIP(Printer), Source(S), InstrStream(InstructionString) {} + + virtual ~InstructionView(); + + StringRef getNameAsString() const override { return "Instructions"; } + + // Return a reference to a string representing a given machine instruction. + // The result should be used or copied before the next call to + // printInstructionString() as it will overwrite the previous result. + StringRef printInstructionString(const llvm::MCInst &MCI) const; + const llvm::MCSubtargetInfo &getSubTargetInfo() const { return STI; } + + llvm::MCInstPrinter &getInstPrinter() const { return MCIP; } + llvm::ArrayRef<llvm::MCInst> getSource() const { return Source; } + + json::Value toJSON() const override; +}; + +} // namespace mca +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.cpp new file mode 100644 index 0000000000..4ef8053bff --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.cpp @@ -0,0 +1,170 @@ +//===--------------------- RegisterFileStatistics.cpp -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the RegisterFileStatistics interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/RegisterFileStatistics.h" +#include "llvm/Support/Format.h" + +namespace llvm { +namespace mca { + +RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti) + : STI(sti) { + const MCSchedModel &SM = STI.getSchedModel(); + RegisterFileUsage RFUEmpty = {0, 0, 0}; + MoveEliminationInfo MEIEmpty = {0, 0, 0, 0, 0}; + if (!SM.hasExtraProcessorInfo()) { + // Assume a single register file. + PRFUsage.emplace_back(RFUEmpty); + MoveElimInfo.emplace_back(MEIEmpty); + return; + } + + // Initialize a RegisterFileUsage for every user defined register file, plus + // the default register file which is always at index #0. + const MCExtraProcessorInfo &PI = SM.getExtraProcessorInfo(); + // There is always an "InvalidRegisterFile" entry in tablegen. That entry can + // be skipped. If there are no user defined register files, then reserve a + // single entry for the default register file at index #0. + unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U); + + PRFUsage.resize(NumRegFiles); + std::fill(PRFUsage.begin(), PRFUsage.end(), RFUEmpty); + + MoveElimInfo.resize(NumRegFiles); + std::fill(MoveElimInfo.begin(), MoveElimInfo.end(), MEIEmpty); +} + +void RegisterFileStatistics::updateRegisterFileUsage( + ArrayRef<unsigned> UsedPhysRegs) { + for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) { + RegisterFileUsage &RFU = PRFUsage[I]; + unsigned NumUsedPhysRegs = UsedPhysRegs[I]; + RFU.CurrentlyUsedMappings += NumUsedPhysRegs; + RFU.TotalMappings += NumUsedPhysRegs; + RFU.MaxUsedMappings = + std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings); + } +} + +void RegisterFileStatistics::updateMoveElimInfo(const Instruction &Inst) { + if (!Inst.isOptimizableMove()) + return; + + if (Inst.getDefs().size() != Inst.getUses().size()) + return; + + for (size_t I = 0, E = Inst.getDefs().size(); I < E; ++I) { + const WriteState &WS = Inst.getDefs()[I]; + const ReadState &RS = Inst.getUses()[E - (I + 1)]; + + MoveEliminationInfo &Info = + MoveElimInfo[Inst.getDefs()[0].getRegisterFileID()]; + Info.TotalMoveEliminationCandidates++; + if (WS.isEliminated()) + Info.CurrentMovesEliminated++; + if (WS.isWriteZero() && RS.isReadZero()) + Info.TotalMovesThatPropagateZero++; + } +} + +void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) { + switch (Event.Type) { + default: + break; + case HWInstructionEvent::Retired: { + const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event); + for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) + PRFUsage[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I]; + break; + } + case HWInstructionEvent::Dispatched: { + const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event); + updateRegisterFileUsage(DE.UsedPhysRegs); + updateMoveElimInfo(*DE.IR.getInstruction()); + } + } +} + +void RegisterFileStatistics::onCycleEnd() { + for (MoveEliminationInfo &MEI : MoveElimInfo) { + unsigned &CurrentMax = MEI.MaxMovesEliminatedPerCycle; + CurrentMax = std::max(CurrentMax, MEI.CurrentMovesEliminated); + MEI.TotalMovesEliminated += MEI.CurrentMovesEliminated; + MEI.CurrentMovesEliminated = 0; + } +} + +void RegisterFileStatistics::printView(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + + TempStream << "\n\nRegister File statistics:"; + const RegisterFileUsage &GlobalUsage = PRFUsage[0]; + TempStream << "\nTotal number of mappings created: " + << GlobalUsage.TotalMappings; + TempStream << "\nMax number of mappings used: " + << GlobalUsage.MaxUsedMappings << '\n'; + + for (unsigned I = 1, E = PRFUsage.size(); I < E; ++I) { + const RegisterFileUsage &RFU = PRFUsage[I]; + // Obtain the register file descriptor from the scheduling model. + assert(STI.getSchedModel().hasExtraProcessorInfo() && + "Unable to find register file info!"); + const MCExtraProcessorInfo &PI = + STI.getSchedModel().getExtraProcessorInfo(); + assert(I <= PI.NumRegisterFiles && "Unexpected register file index!"); + const MCRegisterFileDesc &RFDesc = PI.RegisterFiles[I]; + // Skip invalid register files. + if (!RFDesc.NumPhysRegs) + continue; + + TempStream << "\n* Register File #" << I; + TempStream << " -- " << StringRef(RFDesc.Name) << ':'; + TempStream << "\n Number of physical registers: "; + if (!RFDesc.NumPhysRegs) + TempStream << "unbounded"; + else + TempStream << RFDesc.NumPhysRegs; + TempStream << "\n Total number of mappings created: " + << RFU.TotalMappings; + TempStream << "\n Max number of mappings used: " + << RFU.MaxUsedMappings << '\n'; + const MoveEliminationInfo &MEI = MoveElimInfo[I]; + + if (MEI.TotalMoveEliminationCandidates) { + TempStream << " Number of optimizable moves: " + << MEI.TotalMoveEliminationCandidates; + double EliminatedMovProportion = (double)MEI.TotalMovesEliminated / + MEI.TotalMoveEliminationCandidates * + 100.0; + double ZeroMovProportion = (double)MEI.TotalMovesThatPropagateZero / + MEI.TotalMoveEliminationCandidates * 100.0; + TempStream << "\n Number of moves eliminated: " + << MEI.TotalMovesEliminated << " " + << format("(%.1f%%)", + floor((EliminatedMovProportion * 10) + 0.5) / 10); + TempStream << "\n Number of zero moves: " + << MEI.TotalMovesThatPropagateZero << " " + << format("(%.1f%%)", + floor((ZeroMovProportion * 10) + 0.5) / 10); + TempStream << "\n Max moves eliminated per cycle: " + << MEI.MaxMovesEliminatedPerCycle << '\n'; + } + } + + TempStream.flush(); + OS << Buffer; +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.h b/contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.h new file mode 100644 index 0000000000..3de2a22ac3 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/RegisterFileStatistics.h @@ -0,0 +1,84 @@ +//===--------------------- RegisterFileStatistics.h -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This view collects and prints register file usage statistics. +/// +/// Example (-mcpu=btver2): +/// ======================== +/// +/// Register File statistics: +/// Total number of mappings created: 6 +/// Max number of mappings used: 3 +/// +/// * Register File #1 -- FpuPRF: +/// Number of physical registers: 72 +/// Total number of mappings created: 0 +/// Max number of mappings used: 0 +/// Number of optimizable moves: 200 +/// Number of moves eliminated: 200 (100.0%) +/// Number of zero moves: 200 (100.0%) +/// Max moves eliminated per cycle: 2 +/// +/// * Register File #2 -- IntegerPRF: +/// Number of physical registers: 64 +/// Total number of mappings created: 6 +/// Max number of mappings used: 3 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H +#define LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/View.h" + +namespace llvm { +namespace mca { + +class RegisterFileStatistics : public View { + const llvm::MCSubtargetInfo &STI; + + // Used to track the number of physical registers used in a register file. + struct RegisterFileUsage { + unsigned TotalMappings; + unsigned MaxUsedMappings; + unsigned CurrentlyUsedMappings; + }; + + struct MoveEliminationInfo { + unsigned TotalMoveEliminationCandidates; + unsigned TotalMovesEliminated; + unsigned TotalMovesThatPropagateZero; + unsigned MaxMovesEliminatedPerCycle; + unsigned CurrentMovesEliminated; + }; + + // There is one entry for each register file implemented by the processor. + llvm::SmallVector<RegisterFileUsage, 4> PRFUsage; + llvm::SmallVector<MoveEliminationInfo, 4> MoveElimInfo; + + void updateRegisterFileUsage(ArrayRef<unsigned> UsedPhysRegs); + void updateMoveElimInfo(const Instruction &Inst); + +public: + RegisterFileStatistics(const llvm::MCSubtargetInfo &sti); + + void onCycleEnd() override; + void onEvent(const HWInstructionEvent &Event) override; + void printView(llvm::raw_ostream &OS) const override; + StringRef getNameAsString() const override { + return "RegisterFileStatistics"; + } + bool isSerializable() const override { return false; } +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.cpp new file mode 100644 index 0000000000..77b3ba0b7c --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.cpp @@ -0,0 +1,200 @@ +//===--------------------- ResourcePressureView.cpp -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements methods in the ResourcePressureView interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/ResourcePressureView.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +ResourcePressureView::ResourcePressureView(const llvm::MCSubtargetInfo &sti, + MCInstPrinter &Printer, + ArrayRef<MCInst> S) + : InstructionView(sti, Printer, S), LastInstructionIdx(0) { + // Populate the map of resource descriptors. + unsigned R2VIndex = 0; + const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + unsigned NumUnits = ProcResource.NumUnits; + // Skip groups and invalid resources with zero units. + if (ProcResource.SubUnitsIdxBegin || !NumUnits) + continue; + + Resource2VecIndex.insert(std::pair<unsigned, unsigned>(I, R2VIndex)); + R2VIndex += ProcResource.NumUnits; + } + + NumResourceUnits = R2VIndex; + ResourceUsage.resize(NumResourceUnits * (getSource().size() + 1)); + std::fill(ResourceUsage.begin(), ResourceUsage.end(), 0.0); +} + +void ResourcePressureView::onEvent(const HWInstructionEvent &Event) { + if (Event.Type == HWInstructionEvent::Dispatched) { + LastInstructionIdx = Event.IR.getSourceIndex(); + return; + } + + // We're only interested in Issue events. + if (Event.Type != HWInstructionEvent::Issued) + return; + + const auto &IssueEvent = static_cast<const HWInstructionIssuedEvent &>(Event); + ArrayRef<llvm::MCInst> Source = getSource(); + const unsigned SourceIdx = Event.IR.getSourceIndex() % Source.size(); + for (const std::pair<ResourceRef, ResourceCycles> &Use : + IssueEvent.UsedResources) { + const ResourceRef &RR = Use.first; + assert(Resource2VecIndex.find(RR.first) != Resource2VecIndex.end()); + unsigned R2VIndex = Resource2VecIndex[RR.first]; + R2VIndex += countTrailingZeros(RR.second); + ResourceUsage[R2VIndex + NumResourceUnits * SourceIdx] += Use.second; + ResourceUsage[R2VIndex + NumResourceUnits * Source.size()] += Use.second; + } +} + +static void printColumnNames(formatted_raw_ostream &OS, + const MCSchedModel &SM) { + unsigned Column = OS.getColumn(); + for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds(); + I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + unsigned NumUnits = ProcResource.NumUnits; + // Skip groups and invalid resources with zero units. + if (ProcResource.SubUnitsIdxBegin || !NumUnits) + continue; + + for (unsigned J = 0; J < NumUnits; ++J) { + Column += 7; + OS << "[" << ResourceIndex; + if (NumUnits > 1) + OS << '.' << J; + OS << ']'; + OS.PadToColumn(Column); + } + + ResourceIndex++; + } +} + +static void printResourcePressure(formatted_raw_ostream &OS, double Pressure, + unsigned Col) { + if (!Pressure || Pressure < 0.005) { + OS << " - "; + } else { + // Round to the value to the nearest hundredth and then print it. + OS << format("%.2f", floor((Pressure * 100) + 0.5) / 100); + } + OS.PadToColumn(Col); +} + +void ResourcePressureView::printResourcePressurePerIter(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + formatted_raw_ostream FOS(TempStream); + + FOS << "\n\nResources:\n"; + const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); + for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds(); + I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + unsigned NumUnits = ProcResource.NumUnits; + // Skip groups and invalid resources with zero units. + if (ProcResource.SubUnitsIdxBegin || !NumUnits) + continue; + + for (unsigned J = 0; J < NumUnits; ++J) { + FOS << '[' << ResourceIndex; + if (NumUnits > 1) + FOS << '.' << J; + FOS << ']'; + FOS.PadToColumn(6); + FOS << "- " << ProcResource.Name << '\n'; + } + + ResourceIndex++; + } + + FOS << "\n\nResource pressure per iteration:\n"; + FOS.flush(); + printColumnNames(FOS, SM); + FOS << '\n'; + FOS.flush(); + + ArrayRef<llvm::MCInst> Source = getSource(); + const unsigned Executions = LastInstructionIdx / Source.size() + 1; + for (unsigned I = 0, E = NumResourceUnits; I < E; ++I) { + double Usage = ResourceUsage[I + Source.size() * E]; + printResourcePressure(FOS, Usage / Executions, (I + 1) * 7); + } + + FOS.flush(); + OS << Buffer; +} + +void ResourcePressureView::printResourcePressurePerInst(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + formatted_raw_ostream FOS(TempStream); + + FOS << "\n\nResource pressure by instruction:\n"; + printColumnNames(FOS, getSubTargetInfo().getSchedModel()); + FOS << "Instructions:\n"; + + unsigned InstrIndex = 0; + ArrayRef<llvm::MCInst> Source = getSource(); + const unsigned Executions = LastInstructionIdx / Source.size() + 1; + for (const MCInst &MCI : Source) { + unsigned BaseEltIdx = InstrIndex * NumResourceUnits; + for (unsigned J = 0; J < NumResourceUnits; ++J) { + double Usage = ResourceUsage[J + BaseEltIdx]; + printResourcePressure(FOS, Usage / Executions, (J + 1) * 7); + } + + FOS << printInstructionString(MCI) << '\n'; + FOS.flush(); + OS << Buffer; + Buffer = ""; + + ++InstrIndex; + } +} + +json::Value ResourcePressureView::toJSON() const { + // We're dumping the instructions and the ResourceUsage array. + json::Array ResourcePressureInfo; + + // The ResourceUsage matrix is sparse, so we only consider + // non-zero values. + ArrayRef<llvm::MCInst> Source = getSource(); + const unsigned Executions = LastInstructionIdx / Source.size() + 1; + for (const auto &R : enumerate(ResourceUsage)) { + const ResourceCycles &RU = R.value(); + if (RU.getNumerator() == 0) + continue; + unsigned InstructionIndex = R.index() / NumResourceUnits; + unsigned ResourceIndex = R.index() % NumResourceUnits; + double Usage = RU / Executions; + ResourcePressureInfo.push_back( + json::Object({{"InstructionIndex", InstructionIndex}, + {"ResourceIndex", ResourceIndex}, + {"ResourceUsage", Usage}})); + } + + json::Object JO({{"ResourcePressureInfo", std::move(ResourcePressureInfo)}}); + return JO; +} +} // namespace mca +} // namespace llvm diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.h b/contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.h new file mode 100644 index 0000000000..c3993a08c1 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/ResourcePressureView.h @@ -0,0 +1,103 @@ +//===--------------------- ResourcePressureView.h ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file define class ResourcePressureView. +/// Class ResourcePressureView observes hardware events generated by +/// the Pipeline object and collects statistics related to resource usage at +/// instruction granularity. +/// Resource pressure information is then printed out to a stream in the +/// form of a table like the one from the example below: +/// +/// Resources: +/// [0] - JALU0 +/// [1] - JALU1 +/// [2] - JDiv +/// [3] - JFPM +/// [4] - JFPU0 +/// [5] - JFPU1 +/// [6] - JLAGU +/// [7] - JSAGU +/// [8] - JSTC +/// [9] - JVIMUL +/// +/// Resource pressure per iteration: +/// [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] +/// 0.00 0.00 0.00 0.00 2.00 2.00 0.00 0.00 0.00 0.00 +/// +/// Resource pressure by instruction: +/// [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +/// - - - - - 1.00 - - - - vpermilpd $1, %xmm0, +/// %xmm1 +/// - - - - 1.00 - - - - - vaddps %xmm0, %xmm1, +/// %xmm2 +/// - - - - - 1.00 - - - - vmovshdup %xmm2, %xmm3 +/// - - - - 1.00 - - - - - vaddss %xmm2, %xmm3, +/// %xmm4 +/// +/// In this example, we have AVX code executed on AMD Jaguar (btver2). +/// Both shuffles and vector floating point add operations on XMM registers have +/// a reciprocal throughput of 1cy. +/// Each add is issued to pipeline JFPU0, while each shuffle is issued to +/// pipeline JFPU1. The overall pressure per iteration is reported by two +/// tables: the first smaller table is the resource pressure per iteration; +/// the second table reports resource pressure per instruction. Values are the +/// average resource cycles consumed by an instruction. +/// Every vector add from the example uses resource JFPU0 for an average of 1cy +/// per iteration. Consequently, the resource pressure on JFPU0 is of 2cy per +/// iteration. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H +#define LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H + +#include "Views/InstructionView.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/JSON.h" + +namespace llvm { +namespace mca { + +/// This class collects resource pressure statistics and it is able to print +/// out all the collected information as a table to an output stream. +class ResourcePressureView : public InstructionView { + unsigned LastInstructionIdx; + + // Map to quickly obtain the ResourceUsage column index from a processor + // resource ID. + llvm::DenseMap<unsigned, unsigned> Resource2VecIndex; + + // Table of resources used by instructions. + std::vector<ResourceCycles> ResourceUsage; + unsigned NumResourceUnits; + + void printResourcePressurePerIter(llvm::raw_ostream &OS) const; + void printResourcePressurePerInst(llvm::raw_ostream &OS) const; + +public: + ResourcePressureView(const llvm::MCSubtargetInfo &sti, + llvm::MCInstPrinter &Printer, + llvm::ArrayRef<llvm::MCInst> S); + + void onEvent(const HWInstructionEvent &Event) override; + void printView(llvm::raw_ostream &OS) const override { + printResourcePressurePerIter(OS); + printResourcePressurePerInst(OS); + } + StringRef getNameAsString() const override { return "ResourcePressureView"; } + json::Value toJSON() const override; +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp new file mode 100644 index 0000000000..1c40428fb0 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp @@ -0,0 +1,91 @@ +//===--------------------- RetireControlUnitStatistics.cpp ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the RetireControlUnitStatistics interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/RetireControlUnitStatistics.h" +#include "llvm/Support/Format.h" + +namespace llvm { +namespace mca { + +RetireControlUnitStatistics::RetireControlUnitStatistics(const MCSchedModel &SM) + : NumRetired(0), NumCycles(0), EntriesInUse(0), MaxUsedEntries(0), + SumOfUsedEntries(0) { + TotalROBEntries = SM.MicroOpBufferSize; + if (SM.hasExtraProcessorInfo()) { + const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); + if (EPI.ReorderBufferSize) + TotalROBEntries = EPI.ReorderBufferSize; + } +} + +void RetireControlUnitStatistics::onEvent(const HWInstructionEvent &Event) { + if (Event.Type == HWInstructionEvent::Dispatched) { + unsigned NumEntries = + static_cast<const HWInstructionDispatchedEvent &>(Event).MicroOpcodes; + EntriesInUse += NumEntries; + } + + if (Event.Type == HWInstructionEvent::Retired) { + unsigned ReleasedEntries = Event.IR.getInstruction()->getDesc().NumMicroOps; + assert(EntriesInUse >= ReleasedEntries && "Invalid internal state!"); + EntriesInUse -= ReleasedEntries; + ++NumRetired; + } +} + +void RetireControlUnitStatistics::onCycleEnd() { + // Update histogram + RetiredPerCycle[NumRetired]++; + NumRetired = 0; + ++NumCycles; + MaxUsedEntries = std::max(MaxUsedEntries, EntriesInUse); + SumOfUsedEntries += EntriesInUse; +} + +void RetireControlUnitStatistics::printView(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + TempStream << "\n\nRetire Control Unit - " + << "number of cycles where we saw N instructions retired:\n"; + TempStream << "[# retired], [# cycles]\n"; + + for (const std::pair<const unsigned, unsigned> &Entry : RetiredPerCycle) { + TempStream << " " << Entry.first; + if (Entry.first < 10) + TempStream << ", "; + else + TempStream << ", "; + TempStream << Entry.second << " (" + << format("%.1f", ((double)Entry.second / NumCycles) * 100.0) + << "%)\n"; + } + + unsigned AvgUsage = (double)SumOfUsedEntries / NumCycles; + double MaxUsagePercentage = + ((double)MaxUsedEntries / TotalROBEntries) * 100.0; + double NormalizedMaxPercentage = floor((MaxUsagePercentage * 10) + 0.5) / 10; + double AvgUsagePercentage = ((double)AvgUsage / TotalROBEntries) * 100.0; + double NormalizedAvgPercentage = floor((AvgUsagePercentage * 10) + 0.5) / 10; + + TempStream << "\nTotal ROB Entries: " << TotalROBEntries + << "\nMax Used ROB Entries: " << MaxUsedEntries + << format(" ( %.1f%% )", NormalizedMaxPercentage) + << "\nAverage Used ROB Entries per cy: " << AvgUsage + << format(" ( %.1f%% )\n", NormalizedAvgPercentage); + + TempStream.flush(); + OS << Buffer; +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.h b/contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.h new file mode 100644 index 0000000000..ed3736c645 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/RetireControlUnitStatistics.h @@ -0,0 +1,64 @@ +//===--------------------- RetireControlUnitStatistics.h --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines class RetireControlUnitStatistics: a view that knows how +/// to print general statistics related to the retire control unit. +/// +/// Example: +/// ======== +/// +/// Retire Control Unit - number of cycles where we saw N instructions retired: +/// [# retired], [# cycles] +/// 0, 109 (17.9%) +/// 1, 102 (16.7%) +/// 2, 399 (65.4%) +/// +/// Total ROB Entries: 64 +/// Max Used ROB Entries: 35 ( 54.7% ) +/// Average Used ROB Entries per cy: 32 ( 50.0% ) +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H +#define LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H + +#include "llvm/MC/MCSchedule.h" +#include "llvm/MCA/View.h" +#include <map> + +namespace llvm { +namespace mca { + +class RetireControlUnitStatistics : public View { + using Histogram = std::map<unsigned, unsigned>; + Histogram RetiredPerCycle; + + unsigned NumRetired; + unsigned NumCycles; + unsigned TotalROBEntries; + unsigned EntriesInUse; + unsigned MaxUsedEntries; + unsigned SumOfUsedEntries; + +public: + RetireControlUnitStatistics(const MCSchedModel &SM); + + void onEvent(const HWInstructionEvent &Event) override; + void onCycleEnd() override; + void printView(llvm::raw_ostream &OS) const override; + StringRef getNameAsString() const override { + return "RetireControlUnitStatistics"; + } + bool isSerializable() const override { return false; } +}; + +} // namespace mca +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.cpp new file mode 100644 index 0000000000..06caeda344 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.cpp @@ -0,0 +1,178 @@ +//===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the SchedulerStatistics interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/SchedulerStatistics.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" + +namespace llvm { +namespace mca { + +SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI) + : SM(STI.getSchedModel()), LQResourceID(0), SQResourceID(0), NumIssued(0), + NumCycles(0), MostRecentLoadDispatched(~0U), + MostRecentStoreDispatched(~0U), + Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) { + if (SM.hasExtraProcessorInfo()) { + const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); + LQResourceID = EPI.LoadQueueID; + SQResourceID = EPI.StoreQueueID; + } +} + +// FIXME: This implementation works under the assumption that load/store queue +// entries are reserved at 'instruction dispatched' stage, and released at +// 'instruction executed' stage. This currently matches the behavior of LSUnit. +// +// The current design minimizes the number of events generated by the +// Dispatch/Execute stages, at the cost of doing extra bookkeeping in method +// `onEvent`. However, it introduces a subtle dependency between this view and +// how the LSUnit works. +// +// In future we should add a new "memory queue" event type, so that we stop +// making assumptions on how LSUnit internally works (See PR39828). +void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) { + if (Event.Type == HWInstructionEvent::Issued) { + const Instruction &Inst = *Event.IR.getInstruction(); + NumIssued += Inst.getDesc().NumMicroOps; + } else if (Event.Type == HWInstructionEvent::Dispatched) { + const Instruction &Inst = *Event.IR.getInstruction(); + const unsigned Index = Event.IR.getSourceIndex(); + if (LQResourceID && Inst.getMayLoad() && + MostRecentLoadDispatched != Index) { + Usage[LQResourceID].SlotsInUse++; + MostRecentLoadDispatched = Index; + } + if (SQResourceID && Inst.getMayStore() && + MostRecentStoreDispatched != Index) { + Usage[SQResourceID].SlotsInUse++; + MostRecentStoreDispatched = Index; + } + } else if (Event.Type == HWInstructionEvent::Executed) { + const Instruction &Inst = *Event.IR.getInstruction(); + if (LQResourceID && Inst.getMayLoad()) { + assert(Usage[LQResourceID].SlotsInUse); + Usage[LQResourceID].SlotsInUse--; + } + if (SQResourceID && Inst.getMayStore()) { + assert(Usage[SQResourceID].SlotsInUse); + Usage[SQResourceID].SlotsInUse--; + } + } +} + +void SchedulerStatistics::onReservedBuffers(const InstRef & /* unused */, + ArrayRef<unsigned> Buffers) { + for (const unsigned Buffer : Buffers) { + if (Buffer == LQResourceID || Buffer == SQResourceID) + continue; + Usage[Buffer].SlotsInUse++; + } +} + +void SchedulerStatistics::onReleasedBuffers(const InstRef & /* unused */, + ArrayRef<unsigned> Buffers) { + for (const unsigned Buffer : Buffers) { + if (Buffer == LQResourceID || Buffer == SQResourceID) + continue; + Usage[Buffer].SlotsInUse--; + } +} + +void SchedulerStatistics::updateHistograms() { + for (BufferUsage &BU : Usage) { + BU.CumulativeNumUsedSlots += BU.SlotsInUse; + BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse); + } + + IssueWidthPerCycle[NumIssued]++; + NumIssued = 0; +} + +void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const { + OS << "\n\nSchedulers - " + << "number of cycles where we saw N micro opcodes issued:\n"; + OS << "[# issued], [# cycles]\n"; + + bool HasColors = OS.has_colors(); + const auto It = + std::max_element(IssueWidthPerCycle.begin(), IssueWidthPerCycle.end()); + for (const std::pair<const unsigned, unsigned> &Entry : IssueWidthPerCycle) { + unsigned NumIssued = Entry.first; + if (NumIssued == It->first && HasColors) + OS.changeColor(raw_ostream::SAVEDCOLOR, true, false); + + unsigned IPC = Entry.second; + OS << " " << NumIssued << ", " << IPC << " (" + << format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n"; + if (HasColors) + OS.resetColor(); + } +} + +void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const { + assert(NumCycles && "Unexpected number of cycles!"); + + OS << "\nScheduler's queue usage:\n"; + if (all_of(Usage, [](const BufferUsage &BU) { return !BU.MaxUsedSlots; })) { + OS << "No scheduler resources used.\n"; + return; + } + + OS << "[1] Resource name.\n" + << "[2] Average number of used buffer entries.\n" + << "[3] Maximum number of used buffer entries.\n" + << "[4] Total number of buffer entries.\n\n" + << " [1] [2] [3] [4]\n"; + + formatted_raw_ostream FOS(OS); + bool HasColors = FOS.has_colors(); + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + if (ProcResource.BufferSize <= 0) + continue; + + const BufferUsage &BU = Usage[I]; + double AvgUsage = (double)BU.CumulativeNumUsedSlots / NumCycles; + double AlmostFullThreshold = (double)(ProcResource.BufferSize * 4) / 5; + unsigned NormalizedAvg = floor((AvgUsage * 10) + 0.5) / 10; + unsigned NormalizedThreshold = floor((AlmostFullThreshold * 10) + 0.5) / 10; + + FOS << ProcResource.Name; + FOS.PadToColumn(17); + if (HasColors && NormalizedAvg >= NormalizedThreshold) + FOS.changeColor(raw_ostream::YELLOW, true, false); + FOS << NormalizedAvg; + if (HasColors) + FOS.resetColor(); + FOS.PadToColumn(28); + if (HasColors && + BU.MaxUsedSlots == static_cast<unsigned>(ProcResource.BufferSize)) + FOS.changeColor(raw_ostream::RED, true, false); + FOS << BU.MaxUsedSlots; + if (HasColors) + FOS.resetColor(); + FOS.PadToColumn(39); + FOS << ProcResource.BufferSize << '\n'; + } + + FOS.flush(); +} + +void SchedulerStatistics::printView(raw_ostream &OS) const { + printSchedulerStats(OS); + printSchedulerUsage(OS); +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.h b/contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.h new file mode 100644 index 0000000000..9d2f71c13e --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/SchedulerStatistics.h @@ -0,0 +1,97 @@ +//===--------------------- SchedulerStatistics.h ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines class SchedulerStatistics. Class SchedulerStatistics is a +/// View that listens to instruction issue events in order to print general +/// statistics related to the hardware schedulers. +/// +/// Example: +/// ======== +/// +/// Schedulers - number of cycles where we saw N instructions issued: +/// [# issued], [# cycles] +/// 0, 6 (2.9%) +/// 1, 106 (50.7%) +/// 2, 97 (46.4%) +/// +/// Scheduler's queue usage: +/// [1] Resource name. +/// [2] Average number of used buffer entries. +/// [3] Maximum number of used buffer entries. +/// [4] Total number of buffer entries. +/// +/// [1] [2] [3] [4] +/// JALU01 0 0 20 +/// JFPU01 15 18 18 +/// JLSAGU 0 0 12 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H +#define LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/View.h" +#include <map> + +namespace llvm { +namespace mca { + +class SchedulerStatistics final : public View { + const llvm::MCSchedModel &SM; + unsigned LQResourceID; + unsigned SQResourceID; + + unsigned NumIssued; + unsigned NumCycles; + + unsigned MostRecentLoadDispatched; + unsigned MostRecentStoreDispatched; + + // Tracks the usage of a scheduler's queue. + struct BufferUsage { + unsigned SlotsInUse; + unsigned MaxUsedSlots; + uint64_t CumulativeNumUsedSlots; + }; + + using Histogram = std::map<unsigned, unsigned>; + Histogram IssueWidthPerCycle; + + std::vector<BufferUsage> Usage; + + void updateHistograms(); + void printSchedulerStats(llvm::raw_ostream &OS) const; + void printSchedulerUsage(llvm::raw_ostream &OS) const; + +public: + SchedulerStatistics(const llvm::MCSubtargetInfo &STI); + void onEvent(const HWInstructionEvent &Event) override; + void onCycleBegin() override { NumCycles++; } + void onCycleEnd() override { updateHistograms(); } + + // Increases the number of used scheduler queue slots of every buffered + // resource in the Buffers set. + void onReservedBuffers(const InstRef &IR, + llvm::ArrayRef<unsigned> Buffers) override; + + // Decreases by one the number of used scheduler queue slots of every + // buffered resource in the Buffers set. + void onReleasedBuffers(const InstRef &IR, + llvm::ArrayRef<unsigned> Buffers) override; + + void printView(llvm::raw_ostream &OS) const override; + StringRef getNameAsString() const override { return "SchedulerStatistics"; } + bool isSerializable() const override { return false; } +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.cpp new file mode 100644 index 0000000000..bf258b4c26 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.cpp @@ -0,0 +1,113 @@ +//===--------------------- SummaryView.cpp ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the functionalities used by the SummaryView to print +/// the report information. +/// +//===----------------------------------------------------------------------===// + +#include "Views/SummaryView.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/Format.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +SummaryView::SummaryView(const MCSchedModel &Model, ArrayRef<MCInst> S, + unsigned Width) + : SM(Model), Source(S), DispatchWidth(Width ? Width : Model.IssueWidth), + LastInstructionIdx(0), TotalCycles(0), NumMicroOps(0), + ProcResourceUsage(Model.getNumProcResourceKinds(), 0), + ProcResourceMasks(Model.getNumProcResourceKinds()), + ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0) { + computeProcResourceMasks(SM, ProcResourceMasks); + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + unsigned Index = getResourceStateIndex(ProcResourceMasks[I]); + ResIdx2ProcResID[Index] = I; + } +} + +void SummaryView::onEvent(const HWInstructionEvent &Event) { + if (Event.Type == HWInstructionEvent::Dispatched) + LastInstructionIdx = Event.IR.getSourceIndex(); + + // We are only interested in the "instruction retired" events generated by + // the retire stage for instructions that are part of iteration #0. + if (Event.Type != HWInstructionEvent::Retired || + Event.IR.getSourceIndex() >= Source.size()) + return; + + // Update the cumulative number of resource cycles based on the processor + // resource usage information available from the instruction descriptor. We + // need to compute the cumulative number of resource cycles for every + // processor resource which is consumed by an instruction of the block. + const Instruction &Inst = *Event.IR.getInstruction(); + const InstrDesc &Desc = Inst.getDesc(); + NumMicroOps += Desc.NumMicroOps; + for (const std::pair<uint64_t, ResourceUsage> &RU : Desc.Resources) { + if (RU.second.size()) { + unsigned ProcResID = ResIdx2ProcResID[getResourceStateIndex(RU.first)]; + ProcResourceUsage[ProcResID] += RU.second.size(); + } + } +} + +void SummaryView::printView(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + DisplayValues DV; + + collectData(DV); + TempStream << "Iterations: " << DV.Iterations; + TempStream << "\nInstructions: " << DV.TotalInstructions; + TempStream << "\nTotal Cycles: " << DV.TotalCycles; + TempStream << "\nTotal uOps: " << DV.TotalUOps << '\n'; + TempStream << "\nDispatch Width: " << DV.DispatchWidth; + TempStream << "\nuOps Per Cycle: " + << format("%.2f", floor((DV.UOpsPerCycle * 100) + 0.5) / 100); + TempStream << "\nIPC: " + << format("%.2f", floor((DV.IPC * 100) + 0.5) / 100); + TempStream << "\nBlock RThroughput: " + << format("%.1f", floor((DV.BlockRThroughput * 10) + 0.5) / 10) + << '\n'; + TempStream.flush(); + OS << Buffer; +} + +void SummaryView::collectData(DisplayValues &DV) const { + DV.Instructions = Source.size(); + DV.Iterations = (LastInstructionIdx / DV.Instructions) + 1; + DV.TotalInstructions = DV.Instructions * DV.Iterations; + DV.TotalCycles = TotalCycles; + DV.DispatchWidth = DispatchWidth; + DV.TotalUOps = NumMicroOps * DV.Iterations; + DV.UOpsPerCycle = (double)DV.TotalUOps / TotalCycles; + DV.IPC = (double)DV.TotalInstructions / TotalCycles; + DV.BlockRThroughput = computeBlockRThroughput(SM, DispatchWidth, NumMicroOps, + ProcResourceUsage); +} + +json::Value SummaryView::toJSON() const { + DisplayValues DV; + collectData(DV); + json::Object JO({{"Iterations", DV.Iterations}, + {"Instructions", DV.TotalInstructions}, + {"TotalCycles", DV.TotalCycles}, + {"TotaluOps", DV.TotalUOps}, + {"DispatchWidth", DV.DispatchWidth}, + {"uOpsPerCycle", DV.UOpsPerCycle}, + {"IPC", DV.IPC}, + {"BlockRThroughput", DV.BlockRThroughput}}); + return JO; +} +} // namespace mca. +} // namespace llvm diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.h b/contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.h new file mode 100644 index 0000000000..21f3fad23c --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/SummaryView.h @@ -0,0 +1,90 @@ +//===--------------------- SummaryView.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the summary view. +/// +/// The goal of the summary view is to give a very quick overview of the +/// performance throughput. Below is an example of summary view: +/// +/// +/// Iterations: 300 +/// Instructions: 900 +/// Total Cycles: 610 +/// Dispatch Width: 2 +/// IPC: 1.48 +/// Block RThroughput: 2.0 +/// +/// The summary view collects a few performance numbers. The two main +/// performance indicators are 'Total Cycles' and IPC (Instructions Per Cycle). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H +#define LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/MCA/View.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +/// A view that collects and prints a few performance numbers. +class SummaryView : public View { + const llvm::MCSchedModel &SM; + llvm::ArrayRef<llvm::MCInst> Source; + const unsigned DispatchWidth; + unsigned LastInstructionIdx; + unsigned TotalCycles; + // The total number of micro opcodes contributed by a block of instructions. + unsigned NumMicroOps; + + struct DisplayValues { + unsigned Instructions; + unsigned Iterations; + unsigned TotalInstructions; + unsigned TotalCycles; + unsigned DispatchWidth; + unsigned TotalUOps; + double IPC; + double UOpsPerCycle; + double BlockRThroughput; + }; + + // For each processor resource, this vector stores the cumulative number of + // resource cycles consumed by the analyzed code block. + llvm::SmallVector<unsigned, 8> ProcResourceUsage; + + // Each processor resource is associated with a so-called processor resource + // mask. This vector allows to correlate processor resource IDs with processor + // resource masks. There is exactly one element per each processor resource + // declared by the scheduling model. + llvm::SmallVector<uint64_t, 8> ProcResourceMasks; + + // Used to map resource indices to actual processor resource IDs. + llvm::SmallVector<unsigned, 8> ResIdx2ProcResID; + + /// Compute the data we want to print out in the object DV. + void collectData(DisplayValues &DV) const; + +public: + SummaryView(const llvm::MCSchedModel &Model, llvm::ArrayRef<llvm::MCInst> S, + unsigned Width); + + void onCycleEnd() override { ++TotalCycles; } + void onEvent(const HWInstructionEvent &Event) override; + void printView(llvm::raw_ostream &OS) const override; + StringRef getNameAsString() const override { return "SummaryView"; } + json::Value toJSON() const override; +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.cpp b/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.cpp new file mode 100644 index 0000000000..5c05edbdea --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.cpp @@ -0,0 +1,328 @@ +//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \brief +/// +/// This file implements the TimelineView interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/TimelineView.h" +#include <numeric> + +namespace llvm { +namespace mca { + +TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer, + llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations, + unsigned Cycles) + : InstructionView(sti, Printer, S), CurrentCycle(0), + MaxCycle(Cycles == 0 ? std::numeric_limits<unsigned>::max() : Cycles), + LastCycle(0), WaitTime(S.size()), UsedBuffer(S.size()) { + unsigned NumInstructions = getSource().size(); + assert(Iterations && "Invalid number of iterations specified!"); + NumInstructions *= Iterations; + Timeline.resize(NumInstructions); + TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0, 0}; + std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry); + + WaitTimeEntry NullWTEntry = {0, 0, 0}; + std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry); + + std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0, + /* unknown buffer size */ -1}; + std::fill(UsedBuffer.begin(), UsedBuffer.end(), NullUsedBufferEntry); +} + +void TimelineView::onReservedBuffers(const InstRef &IR, + ArrayRef<unsigned> Buffers) { + if (IR.getSourceIndex() >= getSource().size()) + return; + + const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); + std::pair<unsigned, int> BufferInfo = {0, -1}; + for (const unsigned Buffer : Buffers) { + const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer); + if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) { + BufferInfo.first = Buffer; + BufferInfo.second = MCDesc.BufferSize; + } + } + + UsedBuffer[IR.getSourceIndex()] = BufferInfo; +} + +void TimelineView::onEvent(const HWInstructionEvent &Event) { + const unsigned Index = Event.IR.getSourceIndex(); + if (Index >= Timeline.size()) + return; + + switch (Event.Type) { + case HWInstructionEvent::Retired: { + TimelineViewEntry &TVEntry = Timeline[Index]; + if (CurrentCycle < MaxCycle) + TVEntry.CycleRetired = CurrentCycle; + + // Update the WaitTime entry which corresponds to this Index. + assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!"); + unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched); + WaitTimeEntry &WTEntry = WaitTime[Index % getSource().size()]; + WTEntry.CyclesSpentInSchedulerQueue += + TVEntry.CycleIssued - CycleDispatched; + assert(CycleDispatched <= TVEntry.CycleReady && + "Instruction cannot be ready if it hasn't been dispatched yet!"); + WTEntry.CyclesSpentInSQWhileReady += + TVEntry.CycleIssued - TVEntry.CycleReady; + if (CurrentCycle > TVEntry.CycleExecuted) { + WTEntry.CyclesSpentAfterWBAndBeforeRetire += + (CurrentCycle - 1) - TVEntry.CycleExecuted; + } + break; + } + case HWInstructionEvent::Ready: + Timeline[Index].CycleReady = CurrentCycle; + break; + case HWInstructionEvent::Issued: + Timeline[Index].CycleIssued = CurrentCycle; + break; + case HWInstructionEvent::Executed: + Timeline[Index].CycleExecuted = CurrentCycle; + break; + case HWInstructionEvent::Dispatched: + // There may be multiple dispatch events. Microcoded instructions that are + // expanded into multiple uOps may require multiple dispatch cycles. Here, + // we want to capture the first dispatch cycle. + if (Timeline[Index].CycleDispatched == -1) + Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle); + break; + default: + return; + } + if (CurrentCycle < MaxCycle) + LastCycle = std::max(LastCycle, CurrentCycle); +} + +static raw_ostream::Colors chooseColor(unsigned CumulativeCycles, + unsigned Executions, int BufferSize) { + if (CumulativeCycles && BufferSize < 0) + return raw_ostream::MAGENTA; + unsigned Size = static_cast<unsigned>(BufferSize); + if (CumulativeCycles >= Size * Executions) + return raw_ostream::RED; + if ((CumulativeCycles * 2) >= Size * Executions) + return raw_ostream::YELLOW; + return raw_ostream::SAVEDCOLOR; +} + +static void tryChangeColor(raw_ostream &OS, unsigned Cycles, + unsigned Executions, int BufferSize) { + if (!OS.has_colors()) + return; + + raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize); + if (Color == raw_ostream::SAVEDCOLOR) { + OS.resetColor(); + return; + } + OS.changeColor(Color, /* bold */ true, /* BG */ false); +} + +void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, + const WaitTimeEntry &Entry, + unsigned SourceIndex, + unsigned Executions) const { + bool PrintingTotals = SourceIndex == getSource().size(); + unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions; + + if (!PrintingTotals) + OS << SourceIndex << '.'; + + OS.PadToColumn(7); + + double AverageTime1, AverageTime2, AverageTime3; + AverageTime1 = + (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions; + AverageTime2 = + (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions; + AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) / + CumulativeExecutions; + + OS << Executions; + OS.PadToColumn(13); + + int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second; + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions, + BufferSize); + OS << format("%.1f", floor(AverageTime1 + 0.5) / 10); + OS.PadToColumn(20); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions, + BufferSize); + OS << format("%.1f", floor(AverageTime2 + 0.5) / 10); + OS.PadToColumn(27); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, + CumulativeExecutions, + getSubTargetInfo().getSchedModel().MicroOpBufferSize); + OS << format("%.1f", floor(AverageTime3 + 0.5) / 10); + + if (OS.has_colors()) + OS.resetColor(); + OS.PadToColumn(34); +} + +void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { + std::string Header = + "\n\nAverage Wait times (based on the timeline view):\n" + "[0]: Executions\n" + "[1]: Average time spent waiting in a scheduler's queue\n" + "[2]: Average time spent waiting in a scheduler's queue while ready\n" + "[3]: Average time elapsed from WB until retire stage\n\n" + " [0] [1] [2] [3]\n"; + OS << Header; + formatted_raw_ostream FOS(OS); + unsigned Executions = Timeline.size() / getSource().size(); + unsigned IID = 0; + for (const MCInst &Inst : getSource()) { + printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions); + FOS << " " << printInstructionString(Inst) << '\n'; + FOS.flush(); + ++IID; + } + + // If the timeline contains more than one instruction, + // let's also print global averages. + if (getSource().size() != 1) { + WaitTimeEntry TotalWaitTime = std::accumulate( + WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0}, + [](const WaitTimeEntry &A, const WaitTimeEntry &B) { + return WaitTimeEntry{ + A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue, + A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady, + A.CyclesSpentAfterWBAndBeforeRetire + + B.CyclesSpentAfterWBAndBeforeRetire}; + }); + printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions); + FOS << " " + << "<total>" << '\n'; + FOS.flush(); + } +} + +void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS, + const TimelineViewEntry &Entry, + unsigned Iteration, + unsigned SourceIndex) const { + if (Iteration == 0 && SourceIndex == 0) + OS << '\n'; + OS << '[' << Iteration << ',' << SourceIndex << ']'; + OS.PadToColumn(10); + assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!"); + unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched); + for (unsigned I = 0, E = CycleDispatched; I < E; ++I) + OS << ((I % 5 == 0) ? '.' : ' '); + OS << TimelineView::DisplayChar::Dispatched; + if (CycleDispatched != Entry.CycleExecuted) { + // Zero latency instructions have the same value for CycleDispatched, + // CycleIssued and CycleExecuted. + for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I) + OS << TimelineView::DisplayChar::Waiting; + if (Entry.CycleIssued == Entry.CycleExecuted) + OS << TimelineView::DisplayChar::DisplayChar::Executed; + else { + if (CycleDispatched != Entry.CycleIssued) + OS << TimelineView::DisplayChar::Executing; + for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E; + ++I) + OS << TimelineView::DisplayChar::Executing; + OS << TimelineView::DisplayChar::Executed; + } + } + + for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I) + OS << TimelineView::DisplayChar::RetireLag; + if (Entry.CycleExecuted < Entry.CycleRetired) + OS << TimelineView::DisplayChar::Retired; + + // Skip other columns. + for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I) + OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' '); +} + +static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) { + OS << "\n\nTimeline view:\n"; + if (Cycles >= 10) { + OS.PadToColumn(10); + for (unsigned I = 0; I <= Cycles; ++I) { + if (((I / 10) & 1) == 0) + OS << ' '; + else + OS << I % 10; + } + OS << '\n'; + } + + OS << "Index"; + OS.PadToColumn(10); + for (unsigned I = 0; I <= Cycles; ++I) { + if (((I / 10) & 1) == 0) + OS << I % 10; + else + OS << ' '; + } + OS << '\n'; +} + +void TimelineView::printTimeline(raw_ostream &OS) const { + formatted_raw_ostream FOS(OS); + printTimelineHeader(FOS, LastCycle); + FOS.flush(); + + unsigned IID = 0; + ArrayRef<llvm::MCInst> Source = getSource(); + const unsigned Iterations = Timeline.size() / Source.size(); + for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) { + for (const MCInst &Inst : Source) { + const TimelineViewEntry &Entry = Timeline[IID]; + // When an instruction is retired after timeline-max-cycles, + // its CycleRetired is left at 0. However, it's possible for + // a 0 latency instruction to be retired during cycle 0 and we + // don't want to early exit in that case. The CycleExecuted + // attribute is set correctly whether or not it is greater + // than timeline-max-cycles so we can use that to ensure + // we don't early exit because of a 0 latency instruction. + if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0) { + FOS << "Truncated display due to cycle limit\n"; + return; + } + + unsigned SourceIndex = IID % Source.size(); + printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex); + FOS << " " << printInstructionString(Inst) << '\n'; + FOS.flush(); + + ++IID; + } + } +} + +json::Value TimelineView::toJSON() const { + json::Array TimelineInfo; + + for (const TimelineViewEntry &TLE : Timeline) { + TimelineInfo.push_back( + json::Object({{"CycleDispatched", TLE.CycleDispatched}, + {"CycleReady", TLE.CycleReady}, + {"CycleIssued", TLE.CycleIssued}, + {"CycleExecuted", TLE.CycleExecuted}, + {"CycleRetired", TLE.CycleRetired}})); + } + return json::Object({{"TimelineInfo", std::move(TimelineInfo)}}); +} +} // namespace mca +} // namespace llvm diff --git a/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.h b/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.h new file mode 100644 index 0000000000..81be8244b7 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/Views/TimelineView.h @@ -0,0 +1,188 @@ +//===--------------------- TimelineView.h -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \brief +/// +/// This file implements a timeline view for the llvm-mca tool. +/// +/// Class TimelineView observes events generated by the pipeline. For every +/// instruction executed by the pipeline, it stores information related to +/// state transition. It then plots that information in the form of a table +/// as reported by the example below: +/// +/// Timeline view: +/// 0123456 +/// Index 0123456789 +/// +/// [0,0] DeER . . .. vmovshdup %xmm0, %xmm1 +/// [0,1] DeER . . .. vpermilpd $1, %xmm0, %xmm2 +/// [0,2] .DeER. . .. vpermilps $231, %xmm0, %xmm5 +/// [0,3] .DeeeER . .. vaddss %xmm1, %xmm0, %xmm3 +/// [0,4] . D==eeeER. .. vaddss %xmm3, %xmm2, %xmm4 +/// [0,5] . D=====eeeER .. vaddss %xmm4, %xmm5, %xmm6 +/// +/// [1,0] . DeE------R .. vmovshdup %xmm0, %xmm1 +/// [1,1] . DeE------R .. vpermilpd $1, %xmm0, %xmm2 +/// [1,2] . DeE-----R .. vpermilps $231, %xmm0, %xmm5 +/// [1,3] . D=eeeE--R .. vaddss %xmm1, %xmm0, %xmm3 +/// [1,4] . D===eeeER .. vaddss %xmm3, %xmm2, %xmm4 +/// [1,5] . D======eeeER vaddss %xmm4, %xmm5, %xmm6 +/// +/// There is an entry for every instruction in the input assembly sequence. +/// The first field is a pair of numbers obtained from the instruction index. +/// The first element of the pair is the iteration index, while the second +/// element of the pair is a sequence number (i.e. a position in the assembly +/// sequence). +/// The second field of the table is the actual timeline information; each +/// column is the information related to a specific cycle of execution. +/// The timeline of an instruction is described by a sequence of character +/// where each character represents the instruction state at a specific cycle. +/// +/// Possible instruction states are: +/// D: Instruction Dispatched +/// e: Instruction Executing +/// E: Instruction Executed (write-back stage) +/// R: Instruction retired +/// =: Instruction waiting in the Scheduler's queue +/// -: Instruction executed, waiting to retire in order. +/// +/// dots ('.') and empty spaces are cycles where the instruction is not +/// in-flight. +/// +/// The last column is the assembly instruction associated to the entry. +/// +/// Based on the timeline view information from the example, instruction 0 +/// at iteration 0 was dispatched at cycle 0, and was retired at cycle 3. +/// Instruction [0,1] was also dispatched at cycle 0, and it retired at +/// the same cycle than instruction [0,0]. +/// Instruction [0,4] has been dispatched at cycle 2. However, it had to +/// wait for two cycles before being issued. That is because operands +/// became ready only at cycle 5. +/// +/// This view helps further understanding bottlenecks and the impact of +/// resource pressure on the code. +/// +/// To better understand why instructions had to wait for multiple cycles in +/// the scheduler's queue, class TimelineView also reports extra timing info +/// in another table named "Average Wait times" (see example below). +/// +/// +/// Average Wait times (based on the timeline view): +/// [0]: Executions +/// [1]: Average time spent waiting in a scheduler's queue +/// [2]: Average time spent waiting in a scheduler's queue while ready +/// [3]: Average time elapsed from WB until retire stage +/// +/// [0] [1] [2] [3] +/// 0. 2 1.0 1.0 3.0 vmovshdup %xmm0, %xmm1 +/// 1. 2 1.0 1.0 3.0 vpermilpd $1, %xmm0, %xmm2 +/// 2. 2 1.0 1.0 2.5 vpermilps $231, %xmm0, %xmm5 +/// 3. 2 1.5 0.5 1.0 vaddss %xmm1, %xmm0, %xmm3 +/// 4. 2 3.5 0.0 0.0 vaddss %xmm3, %xmm2, %xmm4 +/// 5. 2 6.5 0.0 0.0 vaddss %xmm4, %xmm5, %xmm6 +/// 2 2.4 0.6 1.6 <total> +/// +/// By comparing column [2] with column [1], we get an idea about how many +/// cycles were spent in the scheduler's queue due to data dependencies. +/// +/// In this example, instruction 5 spent an average of ~6 cycles in the +/// scheduler's queue. As soon as operands became ready, the instruction +/// was immediately issued to the pipeline(s). +/// That is expected because instruction 5 cannot transition to the "ready" +/// state until %xmm4 is written by instruction 4. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H +#define LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H + +#include "Views/InstructionView.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +/// This class listens to instruction state transition events +/// in order to construct a timeline information. +/// +/// For every instruction executed by the Pipeline, this class constructs +/// a TimelineViewEntry object. TimelineViewEntry objects are then used +/// to print the timeline information, as well as the "average wait times" +/// for every instruction in the input assembly sequence. +class TimelineView : public InstructionView { + unsigned CurrentCycle; + unsigned MaxCycle; + unsigned LastCycle; + + struct TimelineViewEntry { + int CycleDispatched; // A negative value is an "invalid cycle". + unsigned CycleReady; + unsigned CycleIssued; + unsigned CycleExecuted; + unsigned CycleRetired; + }; + std::vector<TimelineViewEntry> Timeline; + + struct WaitTimeEntry { + unsigned CyclesSpentInSchedulerQueue; + unsigned CyclesSpentInSQWhileReady; + unsigned CyclesSpentAfterWBAndBeforeRetire; + }; + std::vector<WaitTimeEntry> WaitTime; + + // This field is used to map instructions to buffered resources. + // Elements of this vector are <resourceID, BufferSizer> pairs. + std::vector<std::pair<unsigned, int>> UsedBuffer; + + void printTimelineViewEntry(llvm::formatted_raw_ostream &OS, + const TimelineViewEntry &E, unsigned Iteration, + unsigned SourceIndex) const; + void printWaitTimeEntry(llvm::formatted_raw_ostream &OS, + const WaitTimeEntry &E, unsigned Index, + unsigned Executions) const; + + // Display characters for the TimelineView report output. + struct DisplayChar { + static const char Dispatched = 'D'; + static const char Executed = 'E'; + static const char Retired = 'R'; + static const char Waiting = '='; // Instruction is waiting in the scheduler. + static const char Executing = 'e'; + static const char RetireLag = '-'; // The instruction is waiting to retire. + }; + +public: + TimelineView(const llvm::MCSubtargetInfo &sti, llvm::MCInstPrinter &Printer, + llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations, + unsigned Cycles); + + // Event handlers. + void onCycleEnd() override { ++CurrentCycle; } + void onEvent(const HWInstructionEvent &Event) override; + void onReservedBuffers(const InstRef &IR, + llvm::ArrayRef<unsigned> Buffers) override; + + // print functionalities. + void printTimeline(llvm::raw_ostream &OS) const; + void printAverageWaitTimes(llvm::raw_ostream &OS) const; + void printView(llvm::raw_ostream &OS) const override { + printTimeline(OS); + printAverageWaitTimes(OS); + } + StringRef getNameAsString() const override { return "TimelineView"; } + json::Value toJSON() const override; +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm16/tools/llvm-mca/llvm-mca.cpp b/contrib/libs/llvm16/tools/llvm-mca/llvm-mca.cpp new file mode 100644 index 0000000000..73c341891a --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/llvm-mca.cpp @@ -0,0 +1,761 @@ +//===-- llvm-mca.cpp - Machine Code Analyzer -------------------*- C++ -* -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This utility is a simple driver that allows static performance analysis on +// machine code similarly to how IACA (Intel Architecture Code Analyzer) works. +// +// llvm-mca [options] <file-name> +// -march <type> +// -mcpu <cpu> +// -o <file> +// +// The target defaults to the host target. +// The cpu defaults to the 'native' host cpu. +// The output defaults to standard output. +// +//===----------------------------------------------------------------------===// + +#include "CodeRegion.h" +#include "CodeRegionGenerator.h" +#include "PipelinePrinter.h" +#include "Views/BottleneckAnalysis.h" +#include "Views/DispatchStatistics.h" +#include "Views/InstructionInfoView.h" +#include "Views/RegisterFileStatistics.h" +#include "Views/ResourcePressureView.h" +#include "Views/RetireControlUnitStatistics.h" +#include "Views/SchedulerStatistics.h" +#include "Views/SummaryView.h" +#include "Views/TimelineView.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/MCA/CodeEmitter.h" +#include "llvm/MCA/Context.h" +#include "llvm/MCA/CustomBehaviour.h" +#include "llvm/MCA/InstrBuilder.h" +#include "llvm/MCA/Pipeline.h" +#include "llvm/MCA/Stages/EntryStage.h" +#include "llvm/MCA/Stages/InstructionTables.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/WithColor.h" + +using namespace llvm; + +static mc::RegisterMCTargetOptionsFlags MOF; + +static cl::OptionCategory ToolOptions("Tool Options"); +static cl::OptionCategory ViewOptions("View Options"); + +static cl::opt<std::string> InputFilename(cl::Positional, + cl::desc("<input file>"), + cl::cat(ToolOptions), cl::init("-")); + +static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"), + cl::init("-"), cl::cat(ToolOptions), + cl::value_desc("filename")); + +static cl::opt<std::string> + ArchName("march", + cl::desc("Target architecture. " + "See -version for available targets"), + cl::cat(ToolOptions)); + +static cl::opt<std::string> + TripleName("mtriple", + cl::desc("Target triple. See -version for available targets"), + cl::cat(ToolOptions)); + +static cl::opt<std::string> + MCPU("mcpu", + cl::desc("Target a specific cpu type (-mcpu=help for details)"), + cl::value_desc("cpu-name"), cl::cat(ToolOptions), cl::init("native")); + +static cl::list<std::string> + MATTRS("mattr", cl::CommaSeparated, + cl::desc("Target specific attributes (-mattr=help for details)"), + cl::value_desc("a1,+a2,-a3,..."), cl::cat(ToolOptions)); + +static cl::opt<bool> PrintJson("json", + cl::desc("Print the output in json format"), + cl::cat(ToolOptions), cl::init(false)); + +static cl::opt<int> + OutputAsmVariant("output-asm-variant", + cl::desc("Syntax variant to use for output printing"), + cl::cat(ToolOptions), cl::init(-1)); + +static cl::opt<bool> + PrintImmHex("print-imm-hex", cl::cat(ToolOptions), cl::init(false), + cl::desc("Prefer hex format when printing immediate values")); + +static cl::opt<unsigned> Iterations("iterations", + cl::desc("Number of iterations to run"), + cl::cat(ToolOptions), cl::init(0)); + +static cl::opt<unsigned> + DispatchWidth("dispatch", cl::desc("Override the processor dispatch width"), + cl::cat(ToolOptions), cl::init(0)); + +static cl::opt<unsigned> + RegisterFileSize("register-file-size", + cl::desc("Maximum number of physical registers which can " + "be used for register mappings"), + cl::cat(ToolOptions), cl::init(0)); + +static cl::opt<unsigned> + MicroOpQueue("micro-op-queue-size", cl::Hidden, + cl::desc("Number of entries in the micro-op queue"), + cl::cat(ToolOptions), cl::init(0)); + +static cl::opt<unsigned> + DecoderThroughput("decoder-throughput", cl::Hidden, + cl::desc("Maximum throughput from the decoders " + "(instructions per cycle)"), + cl::cat(ToolOptions), cl::init(0)); + +static cl::opt<bool> + PrintRegisterFileStats("register-file-stats", + cl::desc("Print register file statistics"), + cl::cat(ViewOptions), cl::init(false)); + +static cl::opt<bool> PrintDispatchStats("dispatch-stats", + cl::desc("Print dispatch statistics"), + cl::cat(ViewOptions), cl::init(false)); + +static cl::opt<bool> + PrintSummaryView("summary-view", cl::Hidden, + cl::desc("Print summary view (enabled by default)"), + cl::cat(ViewOptions), cl::init(true)); + +static cl::opt<bool> PrintSchedulerStats("scheduler-stats", + cl::desc("Print scheduler statistics"), + cl::cat(ViewOptions), cl::init(false)); + +static cl::opt<bool> + PrintRetireStats("retire-stats", + cl::desc("Print retire control unit statistics"), + cl::cat(ViewOptions), cl::init(false)); + +static cl::opt<bool> PrintResourcePressureView( + "resource-pressure", + cl::desc("Print the resource pressure view (enabled by default)"), + cl::cat(ViewOptions), cl::init(true)); + +static cl::opt<bool> PrintTimelineView("timeline", + cl::desc("Print the timeline view"), + cl::cat(ViewOptions), cl::init(false)); + +static cl::opt<unsigned> TimelineMaxIterations( + "timeline-max-iterations", + cl::desc("Maximum number of iterations to print in timeline view"), + cl::cat(ViewOptions), cl::init(0)); + +static cl::opt<unsigned> + TimelineMaxCycles("timeline-max-cycles", + cl::desc("Maximum number of cycles in the timeline view, " + "or 0 for unlimited. Defaults to 80 cycles"), + cl::cat(ViewOptions), cl::init(80)); + +static cl::opt<bool> + AssumeNoAlias("noalias", + cl::desc("If set, assume that loads and stores do not alias"), + cl::cat(ToolOptions), cl::init(true)); + +static cl::opt<unsigned> LoadQueueSize("lqueue", + cl::desc("Size of the load queue"), + cl::cat(ToolOptions), cl::init(0)); + +static cl::opt<unsigned> StoreQueueSize("squeue", + cl::desc("Size of the store queue"), + cl::cat(ToolOptions), cl::init(0)); + +static cl::opt<bool> + PrintInstructionTables("instruction-tables", + cl::desc("Print instruction tables"), + cl::cat(ToolOptions), cl::init(false)); + +static cl::opt<bool> PrintInstructionInfoView( + "instruction-info", + cl::desc("Print the instruction info view (enabled by default)"), + cl::cat(ViewOptions), cl::init(true)); + +static cl::opt<bool> EnableAllStats("all-stats", + cl::desc("Print all hardware statistics"), + cl::cat(ViewOptions), cl::init(false)); + +static cl::opt<bool> + EnableAllViews("all-views", + cl::desc("Print all views including hardware statistics"), + cl::cat(ViewOptions), cl::init(false)); + +static cl::opt<bool> EnableBottleneckAnalysis( + "bottleneck-analysis", + cl::desc("Enable bottleneck analysis (disabled by default)"), + cl::cat(ViewOptions), cl::init(false)); + +static cl::opt<bool> ShowEncoding( + "show-encoding", + cl::desc("Print encoding information in the instruction info view"), + cl::cat(ViewOptions), cl::init(false)); + +static cl::opt<bool> ShowBarriers( + "show-barriers", + cl::desc("Print memory barrier information in the instruction info view"), + cl::cat(ViewOptions), cl::init(false)); + +static cl::opt<bool> DisableCustomBehaviour( + "disable-cb", + cl::desc( + "Disable custom behaviour (use the default class which does nothing)."), + cl::cat(ViewOptions), cl::init(false)); + +static cl::opt<bool> DisableInstrumentManager( + "disable-im", + cl::desc("Disable instrumentation manager (use the default class which " + "ignores instruments.)."), + cl::cat(ViewOptions), cl::init(false)); + +namespace { + +const Target *getTarget(const char *ProgName) { + if (TripleName.empty()) + TripleName = Triple::normalize(sys::getDefaultTargetTriple()); + Triple TheTriple(TripleName); + + // Get the target specific parser. + std::string Error; + const Target *TheTarget = + TargetRegistry::lookupTarget(ArchName, TheTriple, Error); + if (!TheTarget) { + errs() << ProgName << ": " << Error; + return nullptr; + } + + // Update TripleName with the updated triple from the target lookup. + TripleName = TheTriple.str(); + + // Return the found target. + return TheTarget; +} + +ErrorOr<std::unique_ptr<ToolOutputFile>> getOutputStream() { + if (OutputFilename == "") + OutputFilename = "-"; + std::error_code EC; + auto Out = std::make_unique<ToolOutputFile>(OutputFilename, EC, + sys::fs::OF_TextWithCRLF); + if (!EC) + return std::move(Out); + return EC; +} +} // end of anonymous namespace + +static void processOptionImpl(cl::opt<bool> &O, const cl::opt<bool> &Default) { + if (!O.getNumOccurrences() || O.getPosition() < Default.getPosition()) + O = Default.getValue(); +} + +static void processViewOptions(bool IsOutOfOrder) { + if (!EnableAllViews.getNumOccurrences() && + !EnableAllStats.getNumOccurrences()) + return; + + if (EnableAllViews.getNumOccurrences()) { + processOptionImpl(PrintSummaryView, EnableAllViews); + if (IsOutOfOrder) + processOptionImpl(EnableBottleneckAnalysis, EnableAllViews); + processOptionImpl(PrintResourcePressureView, EnableAllViews); + processOptionImpl(PrintTimelineView, EnableAllViews); + processOptionImpl(PrintInstructionInfoView, EnableAllViews); + } + + const cl::opt<bool> &Default = + EnableAllViews.getPosition() < EnableAllStats.getPosition() + ? EnableAllStats + : EnableAllViews; + processOptionImpl(PrintRegisterFileStats, Default); + processOptionImpl(PrintDispatchStats, Default); + processOptionImpl(PrintSchedulerStats, Default); + if (IsOutOfOrder) + processOptionImpl(PrintRetireStats, Default); +} + +// Returns true on success. +static bool runPipeline(mca::Pipeline &P) { + // Handle pipeline errors here. + Expected<unsigned> Cycles = P.run(); + if (!Cycles) { + WithColor::error() << toString(Cycles.takeError()); + return false; + } + return true; +} + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + + // Initialize targets and assembly parsers. + InitializeAllTargetInfos(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllTargetMCAs(); + + // Register the Target and CPU printer for --version. + cl::AddExtraVersionPrinter(sys::printDefaultTargetAndDetectedCPU); + + // Enable printing of available targets when flag --version is specified. + cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); + + cl::HideUnrelatedOptions({&ToolOptions, &ViewOptions}); + + // Parse flags and initialize target options. + cl::ParseCommandLineOptions(argc, argv, + "llvm machine code performance analyzer.\n"); + + // Get the target from the triple. If a triple is not specified, then select + // the default triple for the host. If the triple doesn't correspond to any + // registered target, then exit with an error message. + const char *ProgName = argv[0]; + const Target *TheTarget = getTarget(ProgName); + if (!TheTarget) + return 1; + + // GetTarget() may replaced TripleName with a default triple. + // For safety, reconstruct the Triple object. + Triple TheTriple(TripleName); + + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr = + MemoryBuffer::getFileOrSTDIN(InputFilename); + if (std::error_code EC = BufferPtr.getError()) { + WithColor::error() << InputFilename << ": " << EC.message() << '\n'; + return 1; + } + + if (MCPU == "native") + MCPU = std::string(llvm::sys::getHostCPUName()); + + // Package up features to be passed to target/subtarget + std::string FeaturesStr; + if (MATTRS.size()) { + SubtargetFeatures Features; + for (std::string &MAttr : MATTRS) + Features.AddFeature(MAttr); + FeaturesStr = Features.getString(); + } + + std::unique_ptr<MCSubtargetInfo> STI( + TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr)); + assert(STI && "Unable to create subtarget info!"); + if (!STI->isCPUStringValid(MCPU)) + return 1; + + if (!STI->getSchedModel().hasInstrSchedModel()) { + WithColor::error() + << "unable to find instruction-level scheduling information for" + << " target triple '" << TheTriple.normalize() << "' and cpu '" << MCPU + << "'.\n"; + + if (STI->getSchedModel().InstrItineraries) + WithColor::note() + << "cpu '" << MCPU << "' provides itineraries. However, " + << "instruction itineraries are currently unsupported.\n"; + return 1; + } + + // Apply overrides to llvm-mca specific options. + bool IsOutOfOrder = STI->getSchedModel().isOutOfOrder(); + processViewOptions(IsOutOfOrder); + + std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName)); + assert(MRI && "Unable to create target register info!"); + + MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); + std::unique_ptr<MCAsmInfo> MAI( + TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); + assert(MAI && "Unable to create target asm info!"); + + SourceMgr SrcMgr; + + // Tell SrcMgr about this buffer, which is what the parser will pick up. + SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc()); + + MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr); + std::unique_ptr<MCObjectFileInfo> MOFI( + TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false)); + Ctx.setObjectFileInfo(MOFI.get()); + + std::unique_ptr<buffer_ostream> BOS; + + std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo()); + assert(MCII && "Unable to create instruction info!"); + + std::unique_ptr<MCInstrAnalysis> MCIA( + TheTarget->createMCInstrAnalysis(MCII.get())); + + // Need to initialize an MCInstPrinter as it is + // required for initializing the MCTargetStreamer + // which needs to happen within the CRG.parseAnalysisRegions() call below. + // Without an MCTargetStreamer, certain assembly directives can trigger a + // segfault. (For example, the .cv_fpo_proc directive on x86 will segfault if + // we don't initialize the MCTargetStreamer.) + unsigned IPtempOutputAsmVariant = + OutputAsmVariant == -1 ? 0 : OutputAsmVariant; + std::unique_ptr<MCInstPrinter> IPtemp(TheTarget->createMCInstPrinter( + Triple(TripleName), IPtempOutputAsmVariant, *MAI, *MCII, *MRI)); + if (!IPtemp) { + WithColor::error() + << "unable to create instruction printer for target triple '" + << TheTriple.normalize() << "' with assembly variant " + << IPtempOutputAsmVariant << ".\n"; + return 1; + } + + // Parse the input and create CodeRegions that llvm-mca can analyze. + mca::AsmAnalysisRegionGenerator CRG(*TheTarget, SrcMgr, Ctx, *MAI, *STI, + *MCII); + Expected<const mca::AnalysisRegions &> RegionsOrErr = + CRG.parseAnalysisRegions(std::move(IPtemp)); + if (!RegionsOrErr) { + if (auto Err = + handleErrors(RegionsOrErr.takeError(), [](const StringError &E) { + WithColor::error() << E.getMessage() << '\n'; + })) { + // Default case. + WithColor::error() << toString(std::move(Err)) << '\n'; + } + return 1; + } + const mca::AnalysisRegions &Regions = *RegionsOrErr; + + // Early exit if errors were found by the code region parsing logic. + if (!Regions.isValid()) + return 1; + + if (Regions.empty()) { + WithColor::error() << "no assembly instructions found.\n"; + return 1; + } + + std::unique_ptr<mca::InstrumentManager> IM; + if (!DisableInstrumentManager) { + IM = std::unique_ptr<mca::InstrumentManager>( + TheTarget->createInstrumentManager(*STI, *MCII)); + } + if (!IM) { + // If the target doesn't have its own IM implemented (or the -disable-cb + // flag is set) then we use the base class (which does nothing). + IM = std::make_unique<mca::InstrumentManager>(*STI, *MCII); + } + + // Parse the input and create InstrumentRegion that llvm-mca + // can use to improve analysis. + mca::AsmInstrumentRegionGenerator IRG(*TheTarget, SrcMgr, Ctx, *MAI, *STI, + *MCII, *IM); + Expected<const mca::InstrumentRegions &> InstrumentRegionsOrErr = + IRG.parseInstrumentRegions(std::move(IPtemp)); + if (!InstrumentRegionsOrErr) { + if (auto Err = handleErrors(InstrumentRegionsOrErr.takeError(), + [](const StringError &E) { + WithColor::error() << E.getMessage() << '\n'; + })) { + // Default case. + WithColor::error() << toString(std::move(Err)) << '\n'; + } + return 1; + } + const mca::InstrumentRegions &InstrumentRegions = *InstrumentRegionsOrErr; + + // Early exit if errors were found by the instrumentation parsing logic. + if (!InstrumentRegions.isValid()) + return 1; + + // Now initialize the output file. + auto OF = getOutputStream(); + if (std::error_code EC = OF.getError()) { + WithColor::error() << EC.message() << '\n'; + return 1; + } + + unsigned AssemblerDialect = CRG.getAssemblerDialect(); + if (OutputAsmVariant >= 0) + AssemblerDialect = static_cast<unsigned>(OutputAsmVariant); + std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter( + Triple(TripleName), AssemblerDialect, *MAI, *MCII, *MRI)); + if (!IP) { + WithColor::error() + << "unable to create instruction printer for target triple '" + << TheTriple.normalize() << "' with assembly variant " + << AssemblerDialect << ".\n"; + return 1; + } + + // Set the display preference for hex vs. decimal immediates. + IP->setPrintImmHex(PrintImmHex); + + std::unique_ptr<ToolOutputFile> TOF = std::move(*OF); + + const MCSchedModel &SM = STI->getSchedModel(); + + std::unique_ptr<mca::InstrPostProcess> IPP; + if (!DisableCustomBehaviour) { + // TODO: It may be a good idea to separate CB and IPP so that they can + // be used independently of each other. What I mean by this is to add + // an extra command-line arg --disable-ipp so that CB and IPP can be + // toggled without needing to toggle both of them together. + IPP = std::unique_ptr<mca::InstrPostProcess>( + TheTarget->createInstrPostProcess(*STI, *MCII)); + } + if (!IPP) { + // If the target doesn't have its own IPP implemented (or the -disable-cb + // flag is set) then we use the base class (which does nothing). + IPP = std::make_unique<mca::InstrPostProcess>(*STI, *MCII); + } + + // Create an instruction builder. + mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM); + + // Create a context to control ownership of the pipeline hardware. + mca::Context MCA(*MRI, *STI); + + mca::PipelineOptions PO(MicroOpQueue, DecoderThroughput, DispatchWidth, + RegisterFileSize, LoadQueueSize, StoreQueueSize, + AssumeNoAlias, EnableBottleneckAnalysis); + + // Number each region in the sequence. + unsigned RegionIdx = 0; + + std::unique_ptr<MCCodeEmitter> MCE( + TheTarget->createMCCodeEmitter(*MCII, Ctx)); + assert(MCE && "Unable to create code emitter!"); + + std::unique_ptr<MCAsmBackend> MAB(TheTarget->createMCAsmBackend( + *STI, *MRI, mc::InitMCTargetOptionsFromFlags())); + assert(MAB && "Unable to create asm backend!"); + + json::Object JSONOutput; + for (const std::unique_ptr<mca::AnalysisRegion> &Region : Regions) { + // Skip empty code regions. + if (Region->empty()) + continue; + + IB.clear(); + + // Lower the MCInst sequence into an mca::Instruction sequence. + ArrayRef<MCInst> Insts = Region->getInstructions(); + mca::CodeEmitter CE(*STI, *MAB, *MCE, Insts); + + IPP->resetState(); + + SmallVector<std::unique_ptr<mca::Instruction>> LoweredSequence; + for (const MCInst &MCI : Insts) { + SMLoc Loc = MCI.getLoc(); + const SmallVector<mca::SharedInstrument> Instruments = + InstrumentRegions.getActiveInstruments(Loc); + + Expected<std::unique_ptr<mca::Instruction>> Inst = + IB.createInstruction(MCI, Instruments); + if (!Inst) { + if (auto NewE = handleErrors( + Inst.takeError(), + [&IP, &STI](const mca::InstructionError<MCInst> &IE) { + std::string InstructionStr; + raw_string_ostream SS(InstructionStr); + WithColor::error() << IE.Message << '\n'; + IP->printInst(&IE.Inst, 0, "", *STI, SS); + SS.flush(); + WithColor::note() + << "instruction: " << InstructionStr << '\n'; + })) { + // Default case. + WithColor::error() << toString(std::move(NewE)); + } + return 1; + } + + IPP->postProcessInstruction(Inst.get(), MCI); + + LoweredSequence.emplace_back(std::move(Inst.get())); + } + + mca::CircularSourceMgr S(LoweredSequence, + PrintInstructionTables ? 1 : Iterations); + + if (PrintInstructionTables) { + // Create a pipeline, stages, and a printer. + auto P = std::make_unique<mca::Pipeline>(); + P->appendStage(std::make_unique<mca::EntryStage>(S)); + P->appendStage(std::make_unique<mca::InstructionTables>(SM)); + + mca::PipelinePrinter Printer(*P, *Region, RegionIdx, *STI, PO); + if (PrintJson) { + Printer.addView( + std::make_unique<mca::InstructionView>(*STI, *IP, Insts)); + } + + // Create the views for this pipeline, execute, and emit a report. + if (PrintInstructionInfoView) { + Printer.addView(std::make_unique<mca::InstructionInfoView>( + *STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence, + ShowBarriers)); + } + Printer.addView( + std::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts)); + + if (!runPipeline(*P)) + return 1; + + if (PrintJson) { + Printer.printReport(JSONOutput); + } else { + Printer.printReport(TOF->os()); + } + + ++RegionIdx; + continue; + } + + // Create the CustomBehaviour object for enforcing Target Specific + // behaviours and dependencies that aren't expressed well enough + // in the tablegen. CB cannot depend on the list of MCInst or + // the source code (but it can depend on the list of + // mca::Instruction or any objects that can be reconstructed + // from the target information). + std::unique_ptr<mca::CustomBehaviour> CB; + if (!DisableCustomBehaviour) + CB = std::unique_ptr<mca::CustomBehaviour>( + TheTarget->createCustomBehaviour(*STI, S, *MCII)); + if (!CB) + // If the target doesn't have its own CB implemented (or the -disable-cb + // flag is set) then we use the base class (which does nothing). + CB = std::make_unique<mca::CustomBehaviour>(*STI, S, *MCII); + + // Create a basic pipeline simulating an out-of-order backend. + auto P = MCA.createDefaultPipeline(PO, S, *CB); + + mca::PipelinePrinter Printer(*P, *Region, RegionIdx, *STI, PO); + + // Targets can define their own custom Views that exist within their + // /lib/Target/ directory so that the View can utilize their CustomBehaviour + // or other backend symbols / functionality that are not already exposed + // through one of the MC-layer classes. These Views will be initialized + // using the CustomBehaviour::getViews() variants. + // If a target makes a custom View that does not depend on their target + // CB or their backend, they should put the View within + // /tools/llvm-mca/Views/ instead. + if (!DisableCustomBehaviour) { + std::vector<std::unique_ptr<mca::View>> CBViews = + CB->getStartViews(*IP, Insts); + for (auto &CBView : CBViews) + Printer.addView(std::move(CBView)); + } + + // When we output JSON, we add a view that contains the instructions + // and CPU resource information. + if (PrintJson) { + auto IV = std::make_unique<mca::InstructionView>(*STI, *IP, Insts); + Printer.addView(std::move(IV)); + } + + if (PrintSummaryView) + Printer.addView( + std::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth)); + + if (EnableBottleneckAnalysis) { + if (!IsOutOfOrder) { + WithColor::warning() + << "bottleneck analysis is not supported for in-order CPU '" << MCPU + << "'.\n"; + } + Printer.addView(std::make_unique<mca::BottleneckAnalysis>( + *STI, *IP, Insts, S.getNumIterations())); + } + + if (PrintInstructionInfoView) + Printer.addView(std::make_unique<mca::InstructionInfoView>( + *STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence, + ShowBarriers)); + + // Fetch custom Views that are to be placed after the InstructionInfoView. + // Refer to the comment paired with the CB->getStartViews(*IP, Insts); line + // for more info. + if (!DisableCustomBehaviour) { + std::vector<std::unique_ptr<mca::View>> CBViews = + CB->getPostInstrInfoViews(*IP, Insts); + for (auto &CBView : CBViews) + Printer.addView(std::move(CBView)); + } + + if (PrintDispatchStats) + Printer.addView(std::make_unique<mca::DispatchStatistics>()); + + if (PrintSchedulerStats) + Printer.addView(std::make_unique<mca::SchedulerStatistics>(*STI)); + + if (PrintRetireStats) + Printer.addView(std::make_unique<mca::RetireControlUnitStatistics>(SM)); + + if (PrintRegisterFileStats) + Printer.addView(std::make_unique<mca::RegisterFileStatistics>(*STI)); + + if (PrintResourcePressureView) + Printer.addView( + std::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts)); + + if (PrintTimelineView) { + unsigned TimelineIterations = + TimelineMaxIterations ? TimelineMaxIterations : 10; + Printer.addView(std::make_unique<mca::TimelineView>( + *STI, *IP, Insts, std::min(TimelineIterations, S.getNumIterations()), + TimelineMaxCycles)); + } + + // Fetch custom Views that are to be placed after all other Views. + // Refer to the comment paired with the CB->getStartViews(*IP, Insts); line + // for more info. + if (!DisableCustomBehaviour) { + std::vector<std::unique_ptr<mca::View>> CBViews = + CB->getEndViews(*IP, Insts); + for (auto &CBView : CBViews) + Printer.addView(std::move(CBView)); + } + + if (!runPipeline(*P)) + return 1; + + if (PrintJson) { + Printer.printReport(JSONOutput); + } else { + Printer.printReport(TOF->os()); + } + + ++RegionIdx; + } + + if (PrintJson) + TOF->os() << formatv("{0:2}", json::Value(std::move(JSONOutput))) << "\n"; + + TOF->keep(); + return 0; +} diff --git a/contrib/libs/llvm16/tools/llvm-mca/ya.make b/contrib/libs/llvm16/tools/llvm-mca/ya.make new file mode 100644 index 0000000000..db058b8c39 --- /dev/null +++ b/contrib/libs/llvm16/tools/llvm-mca/ya.make @@ -0,0 +1,101 @@ +# Generated by devtools/yamaker. + +PROGRAM() + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/libs/llvm16 + contrib/libs/llvm16/lib/Analysis + contrib/libs/llvm16/lib/AsmParser + contrib/libs/llvm16/lib/BinaryFormat + contrib/libs/llvm16/lib/Bitcode/Reader + contrib/libs/llvm16/lib/Bitcode/Writer + contrib/libs/llvm16/lib/Bitstream/Reader + contrib/libs/llvm16/lib/CodeGen + contrib/libs/llvm16/lib/DebugInfo/CodeView + contrib/libs/llvm16/lib/DebugInfo/DWARF + contrib/libs/llvm16/lib/DebugInfo/MSF + contrib/libs/llvm16/lib/DebugInfo/PDB + contrib/libs/llvm16/lib/DebugInfo/Symbolize + contrib/libs/llvm16/lib/Demangle + contrib/libs/llvm16/lib/IR + contrib/libs/llvm16/lib/IRReader + contrib/libs/llvm16/lib/MC + contrib/libs/llvm16/lib/MC/MCDisassembler + contrib/libs/llvm16/lib/MC/MCParser + contrib/libs/llvm16/lib/MCA + contrib/libs/llvm16/lib/Object + contrib/libs/llvm16/lib/ProfileData + contrib/libs/llvm16/lib/Remarks + contrib/libs/llvm16/lib/Support + contrib/libs/llvm16/lib/Target + contrib/libs/llvm16/lib/Target/AArch64/AsmParser + contrib/libs/llvm16/lib/Target/AArch64/Disassembler + contrib/libs/llvm16/lib/Target/AArch64/MCTargetDesc + contrib/libs/llvm16/lib/Target/AArch64/TargetInfo + contrib/libs/llvm16/lib/Target/AArch64/Utils + contrib/libs/llvm16/lib/Target/ARM/AsmParser + contrib/libs/llvm16/lib/Target/ARM/Disassembler + contrib/libs/llvm16/lib/Target/ARM/MCTargetDesc + contrib/libs/llvm16/lib/Target/ARM/TargetInfo + contrib/libs/llvm16/lib/Target/ARM/Utils + contrib/libs/llvm16/lib/Target/BPF/AsmParser + contrib/libs/llvm16/lib/Target/BPF/Disassembler + contrib/libs/llvm16/lib/Target/BPF/MCTargetDesc + contrib/libs/llvm16/lib/Target/BPF/TargetInfo + contrib/libs/llvm16/lib/Target/LoongArch/AsmParser + contrib/libs/llvm16/lib/Target/LoongArch/Disassembler + contrib/libs/llvm16/lib/Target/LoongArch/MCTargetDesc + contrib/libs/llvm16/lib/Target/LoongArch/TargetInfo + contrib/libs/llvm16/lib/Target/NVPTX/MCTargetDesc + contrib/libs/llvm16/lib/Target/NVPTX/TargetInfo + contrib/libs/llvm16/lib/Target/PowerPC/AsmParser + contrib/libs/llvm16/lib/Target/PowerPC/Disassembler + contrib/libs/llvm16/lib/Target/PowerPC/MCTargetDesc + contrib/libs/llvm16/lib/Target/PowerPC/TargetInfo + contrib/libs/llvm16/lib/Target/WebAssembly/AsmParser + contrib/libs/llvm16/lib/Target/WebAssembly/Disassembler + contrib/libs/llvm16/lib/Target/WebAssembly/MCTargetDesc + contrib/libs/llvm16/lib/Target/WebAssembly/TargetInfo + contrib/libs/llvm16/lib/Target/WebAssembly/Utils + contrib/libs/llvm16/lib/Target/X86/AsmParser + contrib/libs/llvm16/lib/Target/X86/Disassembler + contrib/libs/llvm16/lib/Target/X86/MCA + contrib/libs/llvm16/lib/Target/X86/MCTargetDesc + contrib/libs/llvm16/lib/Target/X86/TargetInfo + contrib/libs/llvm16/lib/TargetParser + contrib/libs/llvm16/lib/TextAPI + contrib/libs/llvm16/lib/Transforms/ObjCARC + contrib/libs/llvm16/lib/Transforms/Scalar + contrib/libs/llvm16/lib/Transforms/Utils +) + +ADDINCL( + contrib/libs/llvm16/tools/llvm-mca +) + +NO_COMPILER_WARNINGS() + +NO_UTIL() + +SRCS( + CodeRegion.cpp + CodeRegionGenerator.cpp + PipelinePrinter.cpp + Views/BottleneckAnalysis.cpp + Views/DispatchStatistics.cpp + Views/InstructionInfoView.cpp + Views/InstructionView.cpp + Views/RegisterFileStatistics.cpp + Views/ResourcePressureView.cpp + Views/RetireControlUnitStatistics.cpp + Views/SchedulerStatistics.cpp + Views/SummaryView.cpp + Views/TimelineView.cpp + llvm-mca.cpp +) + +END() |