diff options
author | vitalyisaev <vitalyisaev@yandex-team.com> | 2023-06-29 10:00:50 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@yandex-team.com> | 2023-06-29 10:00:50 +0300 |
commit | 6ffe9e53658409f212834330e13564e4952558f6 (patch) | |
tree | 85b1e00183517648b228aafa7c8fb07f5276f419 /contrib/libs/llvm14/tools/llvm-exegesis/lib | |
parent | 726057070f9c5a91fc10fde0d5024913d10f1ab9 (diff) | |
download | ydb-6ffe9e53658409f212834330e13564e4952558f6.tar.gz |
YQ Connector: support managed ClickHouse
Со стороны dqrun можно обратиться к инстансу коннектора, который работает на streaming стенде, и извлечь данные из облачного CH.
Diffstat (limited to 'contrib/libs/llvm14/tools/llvm-exegesis/lib')
53 files changed, 8555 insertions, 0 deletions
diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/AArch64/Target.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/AArch64/Target.cpp new file mode 100644 index 0000000000..c778b89032 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -0,0 +1,76 @@ +//===-- Target.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "../Target.h" +#include "AArch64.h" +#include "AArch64RegisterInfo.h" + +namespace llvm { +namespace exegesis { + +static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) { + switch (RegBitWidth) { + case 32: + return AArch64::MOVi32imm; + case 64: + return AArch64::MOVi64imm; + } + llvm_unreachable("Invalid Value Width"); +} + +// Generates instruction to load an immediate value into a register. +static MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth, + const APInt &Value) { + if (Value.getBitWidth() > RegBitWidth) + llvm_unreachable("Value must fit in the Register"); + return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth)) + .addReg(Reg) + .addImm(Value.getZExtValue()); +} + +#include "AArch64GenExegesis.inc" + +namespace { + +class ExegesisAArch64Target : public ExegesisTarget { +public: + ExegesisAArch64Target() : ExegesisTarget(AArch64CpuPfmCounters) {} + +private: + std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg, + const APInt &Value) const override { + if (AArch64::GPR32RegClass.contains(Reg)) + return {loadImmediate(Reg, 32, Value)}; + if (AArch64::GPR64RegClass.contains(Reg)) + return {loadImmediate(Reg, 64, Value)}; + errs() << "setRegTo is not implemented, results will be unreliable\n"; + return {}; + } + + bool matchesArch(Triple::ArchType Arch) const override { + return Arch == Triple::aarch64 || Arch == Triple::aarch64_be; + } + + void addTargetSpecificPasses(PassManagerBase &PM) const override { + // Function return is a pseudo-instruction that needs to be expanded + PM.add(createAArch64ExpandPseudoPass()); + } +}; + +} // namespace + +static ExegesisTarget *getTheExegesisAArch64Target() { + static ExegesisAArch64Target Target; + return &Target; +} + +void InitializeAArch64ExegesisTarget() { + ExegesisTarget::registerTarget(getTheExegesisAArch64Target()); +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/AArch64/ya.make b/contrib/libs/llvm14/tools/llvm-exegesis/lib/AArch64/ya.make new file mode 100644 index 0000000000..084a26238e --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/AArch64/ya.make @@ -0,0 +1,37 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/libs/llvm14 + contrib/libs/llvm14/include + contrib/libs/llvm14/lib/IR + contrib/libs/llvm14/lib/Support + contrib/libs/llvm14/lib/Target/AArch64 + contrib/libs/llvm14/lib/Target/AArch64/AsmParser + contrib/libs/llvm14/lib/Target/AArch64/Disassembler + contrib/libs/llvm14/lib/Target/AArch64/MCTargetDesc + contrib/libs/llvm14/lib/Target/AArch64/TargetInfo + contrib/libs/llvm14/lib/Target/AArch64/Utils + contrib/libs/llvm14/tools/llvm-exegesis/lib +) + +ADDINCL( + ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm14/lib/Target/AArch64 + contrib/libs/llvm14/lib/Target/AArch64 + contrib/libs/llvm14/tools/llvm-exegesis/lib/AArch64 +) + +NO_COMPILER_WARNINGS() + +NO_UTIL() + +SRCS( + Target.cpp +) + +END() diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/Analysis.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Analysis.cpp new file mode 100644 index 0000000000..b12f872a28 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Analysis.cpp @@ -0,0 +1,608 @@ +//===-- Analysis.cpp --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Analysis.h" +#include "BenchmarkResult.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Support/FormatVariadic.h" +#include <limits> +#include <unordered_set> +#include <vector> + +namespace llvm { +namespace exegesis { + +static const char kCsvSep = ','; + +namespace { + +enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString }; + +template <EscapeTag Tag> void writeEscaped(raw_ostream &OS, const StringRef S); + +template <> void writeEscaped<kEscapeCsv>(raw_ostream &OS, const StringRef S) { + if (!llvm::is_contained(S, kCsvSep)) { + OS << S; + } else { + // Needs escaping. + OS << '"'; + for (const char C : S) { + if (C == '"') + OS << "\"\""; + else + OS << C; + } + OS << '"'; + } +} + +template <> void writeEscaped<kEscapeHtml>(raw_ostream &OS, const StringRef S) { + for (const char C : S) { + if (C == '<') + OS << "<"; + else if (C == '>') + OS << ">"; + else if (C == '&') + OS << "&"; + else + OS << C; + } +} + +template <> +void writeEscaped<kEscapeHtmlString>(raw_ostream &OS, const StringRef S) { + for (const char C : S) { + if (C == '"') + OS << "\\\""; + else + OS << C; + } +} + +} // namespace + +template <EscapeTag Tag> +static void +writeClusterId(raw_ostream &OS, + const InstructionBenchmarkClustering::ClusterId &CID) { + if (CID.isNoise()) + writeEscaped<Tag>(OS, "[noise]"); + else if (CID.isError()) + writeEscaped<Tag>(OS, "[error]"); + else + OS << CID.getId(); +} + +template <EscapeTag Tag> +static void writeMeasurementValue(raw_ostream &OS, const double Value) { + // Given Value, if we wanted to serialize it to a string, + // how many base-10 digits will we need to store, max? + static constexpr auto MaxDigitCount = + std::numeric_limits<decltype(Value)>::max_digits10; + // Also, we will need a decimal separator. + static constexpr auto DecimalSeparatorLen = 1; // '.' e.g. + // So how long of a string will the serialization produce, max? + static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen; + + // WARNING: when changing the format, also adjust the small-size estimate ^. + static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}"); + + writeEscaped<Tag>( + OS, formatv(SimpleFloatFormat.data(), Value).sstr<SerializationLen>()); +} + +template <typename EscapeTag, EscapeTag Tag> +void Analysis::writeSnippet(raw_ostream &OS, ArrayRef<uint8_t> Bytes, + const char *Separator) const { + SmallVector<std::string, 3> Lines; + // Parse the asm snippet and print it. + while (!Bytes.empty()) { + MCInst MI; + uint64_t MISize = 0; + if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, nulls())) { + writeEscaped<Tag>(OS, join(Lines, Separator)); + writeEscaped<Tag>(OS, Separator); + writeEscaped<Tag>(OS, "[error decoding asm snippet]"); + return; + } + SmallString<128> InstPrinterStr; // FIXME: magic number. + raw_svector_ostream OSS(InstPrinterStr); + InstPrinter_->printInst(&MI, 0, "", *SubtargetInfo_, OSS); + Bytes = Bytes.drop_front(MISize); + Lines.emplace_back(InstPrinterStr.str().trim()); + } + writeEscaped<Tag>(OS, join(Lines, Separator)); +} + +// Prints a row representing an instruction, along with scheduling info and +// point coordinates (measurements). +void Analysis::printInstructionRowCsv(const size_t PointId, + raw_ostream &OS) const { + const InstructionBenchmark &Point = Clustering_.getPoints()[PointId]; + writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId)); + OS << kCsvSep; + writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; "); + OS << kCsvSep; + writeEscaped<kEscapeCsv>(OS, Point.Key.Config); + OS << kCsvSep; + assert(!Point.Key.Instructions.empty()); + const MCInst &MCI = Point.keyInstruction(); + unsigned SchedClassId; + std::tie(SchedClassId, std::ignore) = ResolvedSchedClass::resolveSchedClassId( + *SubtargetInfo_, *InstrInfo_, MCI); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + const MCSchedClassDesc *const SCDesc = + SubtargetInfo_->getSchedModel().getSchedClassDesc(SchedClassId); + writeEscaped<kEscapeCsv>(OS, SCDesc->Name); +#else + OS << SchedClassId; +#endif + for (const auto &Measurement : Point.Measurements) { + OS << kCsvSep; + writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue); + } + OS << "\n"; +} + +Analysis::Analysis(const Target &Target, + std::unique_ptr<MCSubtargetInfo> SubtargetInfo, + std::unique_ptr<MCInstrInfo> InstrInfo, + const InstructionBenchmarkClustering &Clustering, + double AnalysisInconsistencyEpsilon, + bool AnalysisDisplayUnstableOpcodes, + const std::string &ForceCpuName) + : Clustering_(Clustering), SubtargetInfo_(std::move(SubtargetInfo)), + InstrInfo_(std::move(InstrInfo)), + AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon * + AnalysisInconsistencyEpsilon), + AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) { + if (Clustering.getPoints().empty()) + return; + + const InstructionBenchmark &FirstPoint = Clustering.getPoints().front(); + const std::string CpuName = + ForceCpuName.empty() ? FirstPoint.CpuName : ForceCpuName; + RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple)); + MCTargetOptions MCOptions; + AsmInfo_.reset( + Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple, MCOptions)); + SubtargetInfo_.reset( + Target.createMCSubtargetInfo(FirstPoint.LLVMTriple, CpuName, "")); + InstPrinter_.reset(Target.createMCInstPrinter( + Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_, + *InstrInfo_, *RegInfo_)); + + Context_ = + std::make_unique<MCContext>(Triple(FirstPoint.LLVMTriple), AsmInfo_.get(), + RegInfo_.get(), SubtargetInfo_.get()); + Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_)); + assert(Disasm_ && "cannot create MCDisassembler. missing call to " + "InitializeXXXTargetDisassembler ?"); +} + +template <> +Error Analysis::run<Analysis::PrintClusters>(raw_ostream &OS) const { + if (Clustering_.getPoints().empty()) + return Error::success(); + + // Write the header. + OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config" + << kCsvSep << "sched_class"; + for (const auto &Measurement : Clustering_.getPoints().front().Measurements) { + OS << kCsvSep; + writeEscaped<kEscapeCsv>(OS, Measurement.Key); + } + OS << "\n"; + + // Write the points. + for (const auto &ClusterIt : Clustering_.getValidClusters()) { + for (const size_t PointId : ClusterIt.PointIndices) { + printInstructionRowCsv(PointId, OS); + } + OS << "\n\n"; + } + return Error::success(); +} + +Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints( + ResolvedSchedClass &&RSC) + : RSC(std::move(RSC)) {} + +std::vector<Analysis::ResolvedSchedClassAndPoints> +Analysis::makePointsPerSchedClass() const { + std::vector<ResolvedSchedClassAndPoints> Entries; + // Maps SchedClassIds to index in result. + std::unordered_map<unsigned, size_t> SchedClassIdToIndex; + const auto &Points = Clustering_.getPoints(); + for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) { + const InstructionBenchmark &Point = Points[PointId]; + if (!Point.Error.empty()) + continue; + assert(!Point.Key.Instructions.empty()); + // FIXME: we should be using the tuple of classes for instructions in the + // snippet as key. + const MCInst &MCI = Point.keyInstruction(); + unsigned SchedClassId; + bool WasVariant; + std::tie(SchedClassId, WasVariant) = + ResolvedSchedClass::resolveSchedClassId(*SubtargetInfo_, *InstrInfo_, + MCI); + const auto IndexIt = SchedClassIdToIndex.find(SchedClassId); + if (IndexIt == SchedClassIdToIndex.end()) { + // Create a new entry. + SchedClassIdToIndex.emplace(SchedClassId, Entries.size()); + ResolvedSchedClassAndPoints Entry( + ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant)); + Entry.PointIds.push_back(PointId); + Entries.push_back(std::move(Entry)); + } else { + // Append to the existing entry. + Entries[IndexIt->second].PointIds.push_back(PointId); + } + } + return Entries; +} + +// Parallel benchmarks repeat the same opcode multiple times. Just show this +// opcode and show the whole snippet only on hover. +static void writeParallelSnippetHtml(raw_ostream &OS, + const std::vector<MCInst> &Instructions, + const MCInstrInfo &InstrInfo) { + if (Instructions.empty()) + return; + writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode())); + if (Instructions.size() > 1) + OS << " (x" << Instructions.size() << ")"; +} + +// Latency tries to find a serial path. Just show the opcode path and show the +// whole snippet only on hover. +static void writeLatencySnippetHtml(raw_ostream &OS, + const std::vector<MCInst> &Instructions, + const MCInstrInfo &InstrInfo) { + bool First = true; + for (const MCInst &Instr : Instructions) { + if (First) + First = false; + else + OS << " → "; + writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode())); + } +} + +void Analysis::printPointHtml(const InstructionBenchmark &Point, + llvm::raw_ostream &OS) const { + OS << "<li><span class=\"mono\" title=\""; + writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet, "\n"); + OS << "\">"; + switch (Point.Mode) { + case InstructionBenchmark::Latency: + writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); + break; + case InstructionBenchmark::Uops: + case InstructionBenchmark::InverseThroughput: + writeParallelSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); + break; + default: + llvm_unreachable("invalid mode"); + } + OS << "</span> <span class=\"mono\">"; + writeEscaped<kEscapeHtml>(OS, Point.Key.Config); + OS << "</span></li>"; +} + +void Analysis::printSchedClassClustersHtml( + const std::vector<SchedClassCluster> &Clusters, + const ResolvedSchedClass &RSC, raw_ostream &OS) const { + const auto &Points = Clustering_.getPoints(); + OS << "<table class=\"sched-class-clusters\">"; + OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>"; + assert(!Clusters.empty()); + for (const auto &Measurement : + Points[Clusters[0].getPointIds()[0]].Measurements) { + OS << "<th>"; + writeEscaped<kEscapeHtml>(OS, Measurement.Key); + OS << "</th>"; + } + OS << "</tr>"; + for (const SchedClassCluster &Cluster : Clusters) { + OS << "<tr class=\"" + << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_, + AnalysisInconsistencyEpsilonSquared_) + ? "good-cluster" + : "bad-cluster") + << "\"><td>"; + writeClusterId<kEscapeHtml>(OS, Cluster.id()); + OS << "</td><td><ul>"; + for (const size_t PointId : Cluster.getPointIds()) { + printPointHtml(Points[PointId], OS); + } + OS << "</ul></td>"; + for (const auto &Stats : Cluster.getCentroid().getStats()) { + OS << "<td class=\"measurement\">"; + writeMeasurementValue<kEscapeHtml>(OS, Stats.avg()); + OS << "<br><span class=\"minmax\">["; + writeMeasurementValue<kEscapeHtml>(OS, Stats.min()); + OS << ";"; + writeMeasurementValue<kEscapeHtml>(OS, Stats.max()); + OS << "]</span></td>"; + } + OS << "</tr>"; + } + OS << "</table>"; +} + +void Analysis::SchedClassCluster::addPoint( + size_t PointId, const InstructionBenchmarkClustering &Clustering) { + PointIds.push_back(PointId); + const auto &Point = Clustering.getPoints()[PointId]; + if (ClusterId.isUndef()) + ClusterId = Clustering.getClusterIdForPoint(PointId); + assert(ClusterId == Clustering.getClusterIdForPoint(PointId)); + + Centroid.addPoint(Point.Measurements); +} + +bool Analysis::SchedClassCluster::measurementsMatch( + const MCSubtargetInfo &STI, const ResolvedSchedClass &RSC, + const InstructionBenchmarkClustering &Clustering, + const double AnalysisInconsistencyEpsilonSquared_) const { + assert(!Clustering.getPoints().empty()); + const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode; + + if (!Centroid.validate(Mode)) + return false; + + const std::vector<BenchmarkMeasure> ClusterCenterPoint = + Centroid.getAsPoint(); + + const std::vector<BenchmarkMeasure> SchedClassPoint = + RSC.getAsPoint(Mode, STI, Centroid.getStats()); + if (SchedClassPoint.empty()) + return false; // In Uops mode validate() may not be enough. + + assert(ClusterCenterPoint.size() == SchedClassPoint.size() && + "Expected measured/sched data dimensions to match."); + + return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint, + AnalysisInconsistencyEpsilonSquared_); +} + +void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC, + raw_ostream &OS) const { + OS << "<table class=\"sched-class-desc\">"; + OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</" + "th><th>RThroughput</th><th>WriteProcRes</th><th title=\"This is the " + "idealized unit resource (port) pressure assuming ideal " + "distribution\">Idealized Resource Pressure</th></tr>"; + if (RSC.SCDesc->isValid()) { + const auto &SM = SubtargetInfo_->getSchedModel(); + OS << "<tr><td>✔</td>"; + OS << "<td>" << (RSC.WasVariant ? "✔" : "✕") << "</td>"; + OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>"; + // Latencies. + OS << "<td><ul>"; + for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) { + const auto *const Entry = + SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I); + OS << "<li>" << Entry->Cycles; + if (RSC.SCDesc->NumWriteLatencyEntries > 1) { + // Dismabiguate if more than 1 latency. + OS << " (WriteResourceID " << Entry->WriteResourceID << ")"; + } + OS << "</li>"; + } + OS << "</ul></td>"; + // inverse throughput. + OS << "<td>"; + writeMeasurementValue<kEscapeHtml>( + OS, + MCSchedModel::getReciprocalThroughput(*SubtargetInfo_, *RSC.SCDesc)); + OS << "</td>"; + // WriteProcRes. + OS << "<td><ul>"; + for (const auto &WPR : RSC.NonRedundantWriteProcRes) { + OS << "<li><span class=\"mono\">"; + writeEscaped<kEscapeHtml>(OS, + SM.getProcResource(WPR.ProcResourceIdx)->Name); + OS << "</span>: " << WPR.Cycles << "</li>"; + } + OS << "</ul></td>"; + // Idealized port pressure. + OS << "<td><ul>"; + for (const auto &Pressure : RSC.IdealizedProcResPressure) { + OS << "<li><span class=\"mono\">"; + writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel() + .getProcResource(Pressure.first) + ->Name); + OS << "</span>: "; + writeMeasurementValue<kEscapeHtml>(OS, Pressure.second); + OS << "</li>"; + } + OS << "</ul></td>"; + OS << "</tr>"; + } else { + OS << "<tr><td>✕</td><td></td><td></td></tr>"; + } + OS << "</table>"; +} + +void Analysis::printClusterRawHtml( + const InstructionBenchmarkClustering::ClusterId &Id, StringRef display_name, + llvm::raw_ostream &OS) const { + const auto &Points = Clustering_.getPoints(); + const auto &Cluster = Clustering_.getCluster(Id); + if (Cluster.PointIndices.empty()) + return; + + OS << "<div class=\"inconsistency\"><p>" << display_name << " Cluster (" + << Cluster.PointIndices.size() << " points)</p>"; + OS << "<table class=\"sched-class-clusters\">"; + // Table Header. + OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>"; + for (const auto &Measurement : Points[Cluster.PointIndices[0]].Measurements) { + OS << "<th>"; + writeEscaped<kEscapeHtml>(OS, Measurement.Key); + OS << "</th>"; + } + OS << "</tr>"; + + // Point data. + for (const auto &PointId : Cluster.PointIndices) { + OS << "<tr class=\"bad-cluster\"><td>" << display_name << "</td><td><ul>"; + printPointHtml(Points[PointId], OS); + OS << "</ul></td>"; + for (const auto &Measurement : Points[PointId].Measurements) { + OS << "<td class=\"measurement\">"; + writeMeasurementValue<kEscapeHtml>(OS, Measurement.PerInstructionValue); + } + OS << "</tr>"; + } + OS << "</table>"; + + OS << "</div>"; + +} // namespace exegesis + +static constexpr const char kHtmlHead[] = R"( +<head> +<title>llvm-exegesis Analysis Results</title> +<style> +body { + font-family: sans-serif +} +span.sched-class-name { + font-weight: bold; + font-family: monospace; +} +span.opcode { + font-family: monospace; +} +span.config { + font-family: monospace; +} +div.inconsistency { + margin-top: 50px; +} +table { + margin-left: 50px; + border-collapse: collapse; +} +table, table tr,td,th { + border: 1px solid #444; +} +table ul { + padding-left: 0px; + margin: 0px; + list-style-type: none; +} +table.sched-class-clusters td { + padding-left: 10px; + padding-right: 10px; + padding-top: 10px; + padding-bottom: 10px; +} +table.sched-class-desc td { + padding-left: 10px; + padding-right: 10px; + padding-top: 2px; + padding-bottom: 2px; +} +span.mono { + font-family: monospace; +} +td.measurement { + text-align: center; +} +tr.good-cluster td.measurement { + color: #292 +} +tr.bad-cluster td.measurement { + color: #922 +} +tr.good-cluster td.measurement span.minmax { + color: #888; +} +tr.bad-cluster td.measurement span.minmax { + color: #888; +} +</style> +</head> +)"; + +template <> +Error Analysis::run<Analysis::PrintSchedClassInconsistencies>( + raw_ostream &OS) const { + const auto &FirstPoint = Clustering_.getPoints()[0]; + // Print the header. + OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>"; + OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>"; + OS << "<h3>Triple: <span class=\"mono\">"; + writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple); + OS << "</span></h3><h3>Cpu: <span class=\"mono\">"; + writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName); + OS << "</span></h3>"; + + for (const auto &RSCAndPoints : makePointsPerSchedClass()) { + if (!RSCAndPoints.RSC.SCDesc) + continue; + // Bucket sched class points into sched class clusters. + std::vector<SchedClassCluster> SchedClassClusters; + for (const size_t PointId : RSCAndPoints.PointIds) { + const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId); + if (!ClusterId.isValid()) + continue; // Ignore noise and errors. FIXME: take noise into account ? + if (ClusterId.isUnstable() ^ AnalysisDisplayUnstableOpcodes_) + continue; // Either display stable or unstable clusters only. + auto SchedClassClusterIt = llvm::find_if( + SchedClassClusters, [ClusterId](const SchedClassCluster &C) { + return C.id() == ClusterId; + }); + if (SchedClassClusterIt == SchedClassClusters.end()) { + SchedClassClusters.emplace_back(); + SchedClassClusterIt = std::prev(SchedClassClusters.end()); + } + SchedClassClusterIt->addPoint(PointId, Clustering_); + } + + // Print any scheduling class that has at least one cluster that does not + // match the checked-in data. + if (all_of(SchedClassClusters, [this, + &RSCAndPoints](const SchedClassCluster &C) { + return C.measurementsMatch(*SubtargetInfo_, RSCAndPoints.RSC, + Clustering_, + AnalysisInconsistencyEpsilonSquared_); + })) + continue; // Nothing weird. + + OS << "<div class=\"inconsistency\"><p>Sched Class <span " + "class=\"sched-class-name\">"; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name); +#else + OS << RSCAndPoints.RSC.SchedClassId; +#endif + OS << "</span> contains instructions whose performance characteristics do" + " not match that of LLVM:</p>"; + printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS); + OS << "<p>llvm SchedModel data:</p>"; + printSchedClassDescHtml(RSCAndPoints.RSC, OS); + OS << "</div>"; + } + + printClusterRawHtml(InstructionBenchmarkClustering::ClusterId::noise(), + "[noise]", OS); + + OS << "</body></html>"; + return Error::success(); +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/Analysis.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Analysis.h new file mode 100644 index 0000000000..b6746bed80 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Analysis.h @@ -0,0 +1,130 @@ +//===-- Analysis.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Analysis output for benchmark results. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_ANALYSIS_H +#define LLVM_TOOLS_LLVM_EXEGESIS_ANALYSIS_H + +#include "Clustering.h" +#include "SchedClassResolution.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> +#include <set> +#include <string> +#include <unordered_map> + +namespace llvm { +namespace exegesis { + +// A helper class to analyze benchmark results for a target. +class Analysis { +public: + Analysis(const Target &Target, std::unique_ptr<MCSubtargetInfo> SubtargetInfo, + std::unique_ptr<MCInstrInfo> InstrInfo, + const InstructionBenchmarkClustering &Clustering, + double AnalysisInconsistencyEpsilon, + bool AnalysisDisplayUnstableOpcodes, + const std::string &ForceCpuName = ""); + + // Prints a csv of instructions for each cluster. + struct PrintClusters {}; + // Find potential errors in the scheduling information given measurements. + struct PrintSchedClassInconsistencies {}; + + template <typename Pass> Error run(raw_ostream &OS) const; + +private: + using ClusterId = InstructionBenchmarkClustering::ClusterId; + + // Represents the intersection of a sched class and a cluster. + class SchedClassCluster { + public: + const InstructionBenchmarkClustering::ClusterId &id() const { + return ClusterId; + } + + const std::vector<size_t> &getPointIds() const { return PointIds; } + + void addPoint(size_t PointId, + const InstructionBenchmarkClustering &Clustering); + + // Return the cluster centroid. + const SchedClassClusterCentroid &getCentroid() const { return Centroid; } + + // Returns true if the cluster representative measurements match that of SC. + bool + measurementsMatch(const MCSubtargetInfo &STI, const ResolvedSchedClass &SC, + const InstructionBenchmarkClustering &Clustering, + const double AnalysisInconsistencyEpsilonSquared_) const; + + private: + InstructionBenchmarkClustering::ClusterId ClusterId; + std::vector<size_t> PointIds; + // Measurement stats for the points in the SchedClassCluster. + SchedClassClusterCentroid Centroid; + }; + + void printInstructionRowCsv(size_t PointId, raw_ostream &OS) const; + + void printClusterRawHtml(const InstructionBenchmarkClustering::ClusterId &Id, + StringRef display_name, llvm::raw_ostream &OS) const; + + void printPointHtml(const InstructionBenchmark &Point, + llvm::raw_ostream &OS) const; + + void + printSchedClassClustersHtml(const std::vector<SchedClassCluster> &Clusters, + const ResolvedSchedClass &SC, + raw_ostream &OS) const; + void printSchedClassDescHtml(const ResolvedSchedClass &SC, + raw_ostream &OS) const; + + // A pair of (Sched Class, indices of points that belong to the sched + // class). + struct ResolvedSchedClassAndPoints { + explicit ResolvedSchedClassAndPoints(ResolvedSchedClass &&RSC); + + ResolvedSchedClass RSC; + std::vector<size_t> PointIds; + }; + + // Builds a list of ResolvedSchedClassAndPoints. + std::vector<ResolvedSchedClassAndPoints> makePointsPerSchedClass() const; + + template <typename EscapeTag, EscapeTag Tag> + void writeSnippet(raw_ostream &OS, ArrayRef<uint8_t> Bytes, + const char *Separator) const; + + const InstructionBenchmarkClustering &Clustering_; + std::unique_ptr<MCContext> Context_; + std::unique_ptr<MCSubtargetInfo> SubtargetInfo_; + std::unique_ptr<MCInstrInfo> InstrInfo_; + std::unique_ptr<MCRegisterInfo> RegInfo_; + std::unique_ptr<MCAsmInfo> AsmInfo_; + std::unique_ptr<MCInstPrinter> InstPrinter_; + std::unique_ptr<MCDisassembler> Disasm_; + const double AnalysisInconsistencyEpsilonSquared_; + const bool AnalysisDisplayUnstableOpcodes_; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/Assembler.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Assembler.cpp new file mode 100644 index 0000000000..84fd9295c7 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Assembler.cpp @@ -0,0 +1,326 @@ +//===-- Assembler.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Assembler.h" + +#include "SnippetRepetitor.h" +#include "Target.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/MemoryBuffer.h" + +namespace llvm { +namespace exegesis { + +static constexpr const char ModuleID[] = "ExegesisInfoTest"; +static constexpr const char FunctionID[] = "foo"; +static const Align kFunctionAlignment(4096); + +// Fills the given basic block with register setup code, and returns true if +// all registers could be setup correctly. +static bool generateSnippetSetupCode( + const ExegesisTarget &ET, const MCSubtargetInfo *const MSI, + ArrayRef<RegisterValue> RegisterInitialValues, BasicBlockFiller &BBF) { + bool IsSnippetSetupComplete = true; + for (const RegisterValue &RV : RegisterInitialValues) { + // Load a constant in the register. + const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value); + if (SetRegisterCode.empty()) + IsSnippetSetupComplete = false; + BBF.addInstructions(SetRegisterCode); + } + return IsSnippetSetupComplete; +} + +// Small utility function to add named passes. +static bool addPass(PassManagerBase &PM, StringRef PassName, + TargetPassConfig &TPC) { + const PassRegistry *PR = PassRegistry::getPassRegistry(); + const PassInfo *PI = PR->getPassInfo(PassName); + if (!PI) { + errs() << " run-pass " << PassName << " is not registered.\n"; + return true; + } + + if (!PI->getNormalCtor()) { + errs() << " cannot create pass: " << PI->getPassName() << "\n"; + return true; + } + Pass *P = PI->getNormalCtor()(); + std::string Banner = std::string("After ") + std::string(P->getPassName()); + PM.add(P); + TPC.printAndVerify(Banner); + + return false; +} + +MachineFunction &createVoidVoidPtrMachineFunction(StringRef FunctionName, + Module *Module, + MachineModuleInfo *MMI) { + Type *const ReturnType = Type::getInt32Ty(Module->getContext()); + Type *const MemParamType = PointerType::get( + Type::getInt8Ty(Module->getContext()), 0 /*default address space*/); + FunctionType *FunctionType = + FunctionType::get(ReturnType, {MemParamType}, false); + Function *const F = Function::Create( + FunctionType, GlobalValue::InternalLinkage, FunctionName, Module); + // Making sure we can create a MachineFunction out of this Function even if it + // contains no IR. + F->setIsMaterializable(true); + return MMI->getOrCreateMachineFunction(*F); +} + +BasicBlockFiller::BasicBlockFiller(MachineFunction &MF, MachineBasicBlock *MBB, + const MCInstrInfo *MCII) + : MF(MF), MBB(MBB), MCII(MCII) {} + +void BasicBlockFiller::addInstruction(const MCInst &Inst, const DebugLoc &DL) { + const unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &MCID = MCII->get(Opcode); + MachineInstrBuilder Builder = BuildMI(MBB, DL, MCID); + for (unsigned OpIndex = 0, E = Inst.getNumOperands(); OpIndex < E; + ++OpIndex) { + const MCOperand &Op = Inst.getOperand(OpIndex); + if (Op.isReg()) { + const bool IsDef = OpIndex < MCID.getNumDefs(); + unsigned Flags = 0; + const MCOperandInfo &OpInfo = MCID.operands().begin()[OpIndex]; + if (IsDef && !OpInfo.isOptionalDef()) + Flags |= RegState::Define; + Builder.addReg(Op.getReg(), Flags); + } else if (Op.isImm()) { + Builder.addImm(Op.getImm()); + } else if (!Op.isValid()) { + llvm_unreachable("Operand is not set"); + } else { + llvm_unreachable("Not yet implemented"); + } + } +} + +void BasicBlockFiller::addInstructions(ArrayRef<MCInst> Insts, + const DebugLoc &DL) { + for (const MCInst &Inst : Insts) + addInstruction(Inst, DL); +} + +void BasicBlockFiller::addReturn(const DebugLoc &DL) { + // Insert the return code. + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + if (TII->getReturnOpcode() < TII->getNumOpcodes()) { + BuildMI(MBB, DL, TII->get(TII->getReturnOpcode())); + } else { + MachineIRBuilder MIB(MF); + MIB.setMBB(*MBB); + + FunctionLoweringInfo FuncInfo; + FuncInfo.CanLowerReturn = true; + MF.getSubtarget().getCallLowering()->lowerReturn(MIB, nullptr, {}, + FuncInfo); + } +} + +FunctionFiller::FunctionFiller(MachineFunction &MF, + std::vector<unsigned> RegistersSetUp) + : MF(MF), MCII(MF.getTarget().getMCInstrInfo()), Entry(addBasicBlock()), + RegistersSetUp(std::move(RegistersSetUp)) {} + +BasicBlockFiller FunctionFiller::addBasicBlock() { + MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); + MF.push_back(MBB); + return BasicBlockFiller(MF, MBB, MCII); +} + +ArrayRef<unsigned> FunctionFiller::getRegistersSetUp() const { + return RegistersSetUp; +} + +static std::unique_ptr<Module> +createModule(const std::unique_ptr<LLVMContext> &Context, const DataLayout &DL) { + auto Mod = std::make_unique<Module>(ModuleID, *Context); + Mod->setDataLayout(DL); + return Mod; +} + +BitVector getFunctionReservedRegs(const TargetMachine &TM) { + std::unique_ptr<LLVMContext> Context = std::make_unique<LLVMContext>(); + std::unique_ptr<Module> Module = createModule(Context, TM.createDataLayout()); + // TODO: This only works for targets implementing LLVMTargetMachine. + const LLVMTargetMachine &LLVMTM = static_cast<const LLVMTargetMachine &>(TM); + std::unique_ptr<MachineModuleInfoWrapperPass> MMIWP = + std::make_unique<MachineModuleInfoWrapperPass>(&LLVMTM); + MachineFunction &MF = createVoidVoidPtrMachineFunction( + FunctionID, Module.get(), &MMIWP.get()->getMMI()); + // Saving reserved registers for client. + return MF.getSubtarget().getRegisterInfo()->getReservedRegs(MF); +} + +Error assembleToStream(const ExegesisTarget &ET, + std::unique_ptr<LLVMTargetMachine> TM, + ArrayRef<unsigned> LiveIns, + ArrayRef<RegisterValue> RegisterInitialValues, + const FillFunction &Fill, raw_pwrite_stream &AsmStream) { + auto Context = std::make_unique<LLVMContext>(); + std::unique_ptr<Module> Module = + createModule(Context, TM->createDataLayout()); + auto MMIWP = std::make_unique<MachineModuleInfoWrapperPass>(TM.get()); + MachineFunction &MF = createVoidVoidPtrMachineFunction( + FunctionID, Module.get(), &MMIWP.get()->getMMI()); + MF.ensureAlignment(kFunctionAlignment); + + // We need to instruct the passes that we're done with SSA and virtual + // registers. + auto &Properties = MF.getProperties(); + Properties.set(MachineFunctionProperties::Property::NoVRegs); + Properties.reset(MachineFunctionProperties::Property::IsSSA); + Properties.set(MachineFunctionProperties::Property::NoPHIs); + + for (const unsigned Reg : LiveIns) + MF.getRegInfo().addLiveIn(Reg); + + std::vector<unsigned> RegistersSetUp; + for (const auto &InitValue : RegisterInitialValues) { + RegistersSetUp.push_back(InitValue.Register); + } + FunctionFiller Sink(MF, std::move(RegistersSetUp)); + auto Entry = Sink.getEntry(); + for (const unsigned Reg : LiveIns) + Entry.MBB->addLiveIn(Reg); + + const bool IsSnippetSetupComplete = generateSnippetSetupCode( + ET, TM->getMCSubtargetInfo(), RegisterInitialValues, Entry); + + // If the snippet setup is not complete, we disable liveliness tracking. This + // means that we won't know what values are in the registers. + if (!IsSnippetSetupComplete) + Properties.reset(MachineFunctionProperties::Property::TracksLiveness); + + Fill(Sink); + + // prologue/epilogue pass needs the reserved registers to be frozen, this + // is usually done by the SelectionDAGISel pass. + MF.getRegInfo().freezeReservedRegs(MF); + + // We create the pass manager, run the passes to populate AsmBuffer. + MCContext &MCContext = MMIWP->getMMI().getContext(); + legacy::PassManager PM; + + TargetLibraryInfoImpl TLII(Triple(Module->getTargetTriple())); + PM.add(new TargetLibraryInfoWrapperPass(TLII)); + + TargetPassConfig *TPC = TM->createPassConfig(PM); + PM.add(TPC); + PM.add(MMIWP.release()); + TPC->printAndVerify("MachineFunctionGenerator::assemble"); + // Add target-specific passes. + ET.addTargetSpecificPasses(PM); + TPC->printAndVerify("After ExegesisTarget::addTargetSpecificPasses"); + // Adding the following passes: + // - postrapseudos: expands pseudo return instructions used on some targets. + // - machineverifier: checks that the MachineFunction is well formed. + // - prologepilog: saves and restore callee saved registers. + for (const char *PassName : + {"postrapseudos", "machineverifier", "prologepilog"}) + if (addPass(PM, PassName, *TPC)) + return make_error<Failure>("Unable to add a mandatory pass"); + TPC->setInitialized(); + + // AsmPrinter is responsible for generating the assembly into AsmBuffer. + if (TM->addAsmPrinter(PM, AsmStream, nullptr, CGFT_ObjectFile, MCContext)) + return make_error<Failure>("Cannot add AsmPrinter passes"); + + PM.run(*Module); // Run all the passes + return Error::success(); +} + +object::OwningBinary<object::ObjectFile> +getObjectFromBuffer(StringRef InputData) { + // Storing the generated assembly into a MemoryBuffer that owns the memory. + std::unique_ptr<MemoryBuffer> Buffer = + MemoryBuffer::getMemBufferCopy(InputData); + // Create the ObjectFile from the MemoryBuffer. + std::unique_ptr<object::ObjectFile> Obj = + cantFail(object::ObjectFile::createObjectFile(Buffer->getMemBufferRef())); + // Returning both the MemoryBuffer and the ObjectFile. + return object::OwningBinary<object::ObjectFile>(std::move(Obj), + std::move(Buffer)); +} + +object::OwningBinary<object::ObjectFile> getObjectFromFile(StringRef Filename) { + return cantFail(object::ObjectFile::createObjectFile(Filename)); +} + +namespace { + +// Implementation of this class relies on the fact that a single object with a +// single function will be loaded into memory. +class TrackingSectionMemoryManager : public SectionMemoryManager { +public: + explicit TrackingSectionMemoryManager(uintptr_t *CodeSize) + : CodeSize(CodeSize) {} + + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + StringRef SectionName) override { + *CodeSize = Size; + return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID, + SectionName); + } + +private: + uintptr_t *const CodeSize = nullptr; +}; + +} // namespace + +ExecutableFunction::ExecutableFunction( + std::unique_ptr<LLVMTargetMachine> TM, + object::OwningBinary<object::ObjectFile> &&ObjectFileHolder) + : Context(std::make_unique<LLVMContext>()) { + assert(ObjectFileHolder.getBinary() && "cannot create object file"); + // Initializing the execution engine. + // We need to use the JIT EngineKind to be able to add an object file. + LLVMLinkInMCJIT(); + uintptr_t CodeSize = 0; + std::string Error; + ExecEngine.reset( + EngineBuilder(createModule(Context, TM->createDataLayout())) + .setErrorStr(&Error) + .setMCPU(TM->getTargetCPU()) + .setEngineKind(EngineKind::JIT) + .setMCJITMemoryManager( + std::make_unique<TrackingSectionMemoryManager>(&CodeSize)) + .create(TM.release())); + if (!ExecEngine) + report_fatal_error(Twine(Error)); + // Adding the generated object file containing the assembled function. + // The ExecutionEngine makes sure the object file is copied into an + // executable page. + ExecEngine->addObjectFile(std::move(ObjectFileHolder)); + // Fetching function bytes. + const uint64_t FunctionAddress = ExecEngine->getFunctionAddress(FunctionID); + assert(isAligned(kFunctionAlignment, FunctionAddress) && + "function is not properly aligned"); + FunctionBytes = + StringRef(reinterpret_cast<const char *>(FunctionAddress), CodeSize); +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/Assembler.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Assembler.h new file mode 100644 index 0000000000..2a83344b75 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Assembler.h @@ -0,0 +1,132 @@ +//===-- Assembler.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines classes to assemble functions composed of a single basic block of +/// MCInsts. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_ASSEMBLER_H +#define LLVM_TOOLS_LLVM_EXEGESIS_ASSEMBLER_H + +#include <memory> + +#include "BenchmarkCode.h" +#include "Error.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +namespace exegesis { + +class ExegesisTarget; + +// Gather the set of reserved registers (depends on function's calling +// convention and target machine). +BitVector getFunctionReservedRegs(const TargetMachine &TM); + +// Helper to fill in a basic block. +class BasicBlockFiller { +public: + BasicBlockFiller(MachineFunction &MF, MachineBasicBlock *MBB, + const MCInstrInfo *MCII); + + void addInstruction(const MCInst &Inst, const DebugLoc &DL = DebugLoc()); + void addInstructions(ArrayRef<MCInst> Insts, const DebugLoc &DL = DebugLoc()); + + void addReturn(const DebugLoc &DL = DebugLoc()); + + MachineFunction &MF; + MachineBasicBlock *const MBB; + const MCInstrInfo *const MCII; +}; + +// Helper to fill in a function. +class FunctionFiller { +public: + FunctionFiller(MachineFunction &MF, std::vector<unsigned> RegistersSetUp); + + // Adds a basic block to the function. + BasicBlockFiller addBasicBlock(); + + // Returns the function entry point. + BasicBlockFiller getEntry() { return Entry; } + + MachineFunction &MF; + const MCInstrInfo *const MCII; + + // Returns the set of registers in the snippet setup code. + ArrayRef<unsigned> getRegistersSetUp() const; + +private: + BasicBlockFiller Entry; + // The set of registers that are set up in the basic block. + std::vector<unsigned> RegistersSetUp; +}; + +// A callback that fills a function. +using FillFunction = std::function<void(FunctionFiller &)>; + +// Creates a temporary `void foo(char*)` function containing the provided +// Instructions. Runs a set of llvm Passes to provide correct prologue and +// epilogue. Once the MachineFunction is ready, it is assembled for TM to +// AsmStream, the temporary function is eventually discarded. +Error assembleToStream(const ExegesisTarget &ET, + std::unique_ptr<LLVMTargetMachine> TM, + ArrayRef<unsigned> LiveIns, + ArrayRef<RegisterValue> RegisterInitialValues, + const FillFunction &Fill, raw_pwrite_stream &AsmStream); + +// Creates an ObjectFile in the format understood by the host. +// Note: the resulting object keeps a copy of Buffer so it can be discarded once +// this function returns. +object::OwningBinary<object::ObjectFile> getObjectFromBuffer(StringRef Buffer); + +// Loads the content of Filename as on ObjectFile and returns it. +object::OwningBinary<object::ObjectFile> getObjectFromFile(StringRef Filename); + +// Consumes an ObjectFile containing a `void foo(char*)` function and make it +// executable. +struct ExecutableFunction { + explicit ExecutableFunction( + std::unique_ptr<LLVMTargetMachine> TM, + object::OwningBinary<object::ObjectFile> &&ObjectFileHolder); + + // Retrieves the function as an array of bytes. + StringRef getFunctionBytes() const { return FunctionBytes; } + + // Executes the function. + void operator()(char *Memory) const { + ((void (*)(char *))(intptr_t)FunctionBytes.data())(Memory); + } + + std::unique_ptr<LLVMContext> Context; + std::unique_ptr<ExecutionEngine> ExecEngine; + StringRef FunctionBytes; +}; + +// Creates a void(int8*) MachineFunction. +MachineFunction &createVoidVoidPtrMachineFunction(StringRef FunctionID, + Module *Module, + MachineModuleInfo *MMI); + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_ASSEMBLER_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkCode.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkCode.h new file mode 100644 index 0000000000..7dceb25b50 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkCode.h @@ -0,0 +1,35 @@ +//===-- BenchmarkCode.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H +#define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H + +#include "BenchmarkResult.h" +#include "llvm/MC/MCInst.h" +#include <string> +#include <vector> + +namespace llvm { +namespace exegesis { + +// A collection of instructions that are to be assembled, executed and measured. +struct BenchmarkCode { + InstructionBenchmarkKey Key; + + // We also need to provide the registers that are live on entry for the + // assembler to generate proper prologue/epilogue. + std::vector<unsigned> LiveIns; + + // Informations about how this configuration was built. + std::string Info; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkResult.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkResult.cpp new file mode 100644 index 0000000000..dbf07699bb --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkResult.cpp @@ -0,0 +1,433 @@ +//===-- BenchmarkResult.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BenchmarkResult.h" +#include "BenchmarkRunner.h" +#include "Error.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/bit.h" +#include "llvm/ObjectYAML/YAML.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +static constexpr const char kIntegerPrefix[] = "i_0x"; +static constexpr const char kDoublePrefix[] = "f_"; +static constexpr const char kInvalidOperand[] = "INVALID"; +static constexpr llvm::StringLiteral kNoRegister("%noreg"); + +namespace llvm { + +namespace { + +// A mutable struct holding an LLVMState that can be passed through the +// serialization process to encode/decode registers and instructions. +struct YamlContext { + YamlContext(const exegesis::LLVMState &State) + : State(&State), ErrorStream(LastError), + OpcodeNameToOpcodeIdx( + generateOpcodeNameToOpcodeIdxMapping(State.getInstrInfo())), + RegNameToRegNo(generateRegNameToRegNoMapping(State.getRegInfo())) {} + + static StringMap<unsigned> + generateOpcodeNameToOpcodeIdxMapping(const MCInstrInfo &InstrInfo) { + StringMap<unsigned> Map(InstrInfo.getNumOpcodes()); + for (unsigned I = 0, E = InstrInfo.getNumOpcodes(); I < E; ++I) + Map[InstrInfo.getName(I)] = I; + assert(Map.size() == InstrInfo.getNumOpcodes() && "Size prediction failed"); + return Map; + }; + + StringMap<unsigned> + generateRegNameToRegNoMapping(const MCRegisterInfo &RegInfo) { + StringMap<unsigned> Map(RegInfo.getNumRegs()); + // Special-case RegNo 0, which would otherwise be spelled as ''. + Map[kNoRegister] = 0; + for (unsigned I = 1, E = RegInfo.getNumRegs(); I < E; ++I) + Map[RegInfo.getName(I)] = I; + assert(Map.size() == RegInfo.getNumRegs() && "Size prediction failed"); + return Map; + }; + + void serializeMCInst(const MCInst &MCInst, raw_ostream &OS) { + OS << getInstrName(MCInst.getOpcode()); + for (const auto &Op : MCInst) { + OS << ' '; + serializeMCOperand(Op, OS); + } + } + + void deserializeMCInst(StringRef String, MCInst &Value) { + SmallVector<StringRef, 16> Pieces; + String.split(Pieces, " ", /* MaxSplit */ -1, /* KeepEmpty */ false); + if (Pieces.empty()) { + ErrorStream << "Unknown Instruction: '" << String << "'\n"; + return; + } + bool ProcessOpcode = true; + for (StringRef Piece : Pieces) { + if (ProcessOpcode) + Value.setOpcode(getInstrOpcode(Piece)); + else + Value.addOperand(deserializeMCOperand(Piece)); + ProcessOpcode = false; + } + } + + std::string &getLastError() { return ErrorStream.str(); } + + raw_string_ostream &getErrorStream() { return ErrorStream; } + + StringRef getRegName(unsigned RegNo) { + // Special case: RegNo 0 is NoRegister. We have to deal with it explicitly. + if (RegNo == 0) + return kNoRegister; + const StringRef RegName = State->getRegInfo().getName(RegNo); + if (RegName.empty()) + ErrorStream << "No register with enum value '" << RegNo << "'\n"; + return RegName; + } + + Optional<unsigned> getRegNo(StringRef RegName) { + auto Iter = RegNameToRegNo.find(RegName); + if (Iter != RegNameToRegNo.end()) + return Iter->second; + ErrorStream << "No register with name '" << RegName << "'\n"; + return None; + } + +private: + void serializeIntegerOperand(raw_ostream &OS, int64_t Value) { + OS << kIntegerPrefix; + OS.write_hex(bit_cast<uint64_t>(Value)); + } + + bool tryDeserializeIntegerOperand(StringRef String, int64_t &Value) { + if (!String.consume_front(kIntegerPrefix)) + return false; + return !String.consumeInteger(16, Value); + } + + void serializeFPOperand(raw_ostream &OS, double Value) { + OS << kDoublePrefix << format("%la", Value); + } + + bool tryDeserializeFPOperand(StringRef String, double &Value) { + if (!String.consume_front(kDoublePrefix)) + return false; + char *EndPointer = nullptr; + Value = strtod(String.begin(), &EndPointer); + return EndPointer == String.end(); + } + + void serializeMCOperand(const MCOperand &MCOperand, raw_ostream &OS) { + if (MCOperand.isReg()) { + OS << getRegName(MCOperand.getReg()); + } else if (MCOperand.isImm()) { + serializeIntegerOperand(OS, MCOperand.getImm()); + } else if (MCOperand.isDFPImm()) { + serializeFPOperand(OS, bit_cast<double>(MCOperand.getDFPImm())); + } else { + OS << kInvalidOperand; + } + } + + MCOperand deserializeMCOperand(StringRef String) { + assert(!String.empty()); + int64_t IntValue = 0; + double DoubleValue = 0; + if (tryDeserializeIntegerOperand(String, IntValue)) + return MCOperand::createImm(IntValue); + if (tryDeserializeFPOperand(String, DoubleValue)) + return MCOperand::createDFPImm(bit_cast<uint64_t>(DoubleValue)); + if (auto RegNo = getRegNo(String)) + return MCOperand::createReg(*RegNo); + if (String != kInvalidOperand) + ErrorStream << "Unknown Operand: '" << String << "'\n"; + return {}; + } + + StringRef getInstrName(unsigned InstrNo) { + const StringRef InstrName = State->getInstrInfo().getName(InstrNo); + if (InstrName.empty()) + ErrorStream << "No opcode with enum value '" << InstrNo << "'\n"; + return InstrName; + } + + unsigned getInstrOpcode(StringRef InstrName) { + auto Iter = OpcodeNameToOpcodeIdx.find(InstrName); + if (Iter != OpcodeNameToOpcodeIdx.end()) + return Iter->second; + ErrorStream << "No opcode with name '" << InstrName << "'\n"; + return 0; + } + + const exegesis::LLVMState *State; + std::string LastError; + raw_string_ostream ErrorStream; + const StringMap<unsigned> OpcodeNameToOpcodeIdx; + const StringMap<unsigned> RegNameToRegNo; +}; +} // namespace + +// Defining YAML traits for IO. +namespace yaml { + +static YamlContext &getTypedContext(void *Ctx) { + return *reinterpret_cast<YamlContext *>(Ctx); +} + +// std::vector<MCInst> will be rendered as a list. +template <> struct SequenceElementTraits<MCInst> { + static const bool flow = false; +}; + +template <> struct ScalarTraits<MCInst> { + + static void output(const MCInst &Value, void *Ctx, raw_ostream &Out) { + getTypedContext(Ctx).serializeMCInst(Value, Out); + } + + static StringRef input(StringRef Scalar, void *Ctx, MCInst &Value) { + YamlContext &Context = getTypedContext(Ctx); + Context.deserializeMCInst(Scalar, Value); + return Context.getLastError(); + } + + // By default strings are quoted only when necessary. + // We force the use of single quotes for uniformity. + static QuotingType mustQuote(StringRef) { return QuotingType::Single; } + + static const bool flow = true; +}; + +// std::vector<exegesis::Measure> will be rendered as a list. +template <> struct SequenceElementTraits<exegesis::BenchmarkMeasure> { + static const bool flow = false; +}; + +// exegesis::Measure is rendererd as a flow instead of a list. +// e.g. { "key": "the key", "value": 0123 } +template <> struct MappingTraits<exegesis::BenchmarkMeasure> { + static void mapping(IO &Io, exegesis::BenchmarkMeasure &Obj) { + Io.mapRequired("key", Obj.Key); + if (!Io.outputting()) { + // For backward compatibility, interpret debug_string as a key. + Io.mapOptional("debug_string", Obj.Key); + } + Io.mapRequired("value", Obj.PerInstructionValue); + Io.mapOptional("per_snippet_value", Obj.PerSnippetValue); + } + static const bool flow = true; +}; + +template <> +struct ScalarEnumerationTraits<exegesis::InstructionBenchmark::ModeE> { + static void enumeration(IO &Io, + exegesis::InstructionBenchmark::ModeE &Value) { + Io.enumCase(Value, "", exegesis::InstructionBenchmark::Unknown); + Io.enumCase(Value, "latency", exegesis::InstructionBenchmark::Latency); + Io.enumCase(Value, "uops", exegesis::InstructionBenchmark::Uops); + Io.enumCase(Value, "inverse_throughput", + exegesis::InstructionBenchmark::InverseThroughput); + } +}; + +// std::vector<exegesis::RegisterValue> will be rendered as a list. +template <> struct SequenceElementTraits<exegesis::RegisterValue> { + static const bool flow = false; +}; + +template <> struct ScalarTraits<exegesis::RegisterValue> { + static constexpr const unsigned kRadix = 16; + static constexpr const bool kSigned = false; + + static void output(const exegesis::RegisterValue &RV, void *Ctx, + raw_ostream &Out) { + YamlContext &Context = getTypedContext(Ctx); + Out << Context.getRegName(RV.Register) << "=0x" + << toString(RV.Value, kRadix, kSigned); + } + + static StringRef input(StringRef String, void *Ctx, + exegesis::RegisterValue &RV) { + SmallVector<StringRef, 2> Pieces; + String.split(Pieces, "=0x", /* MaxSplit */ -1, + /* KeepEmpty */ false); + YamlContext &Context = getTypedContext(Ctx); + Optional<unsigned> RegNo; + if (Pieces.size() == 2 && (RegNo = Context.getRegNo(Pieces[0]))) { + RV.Register = *RegNo; + const unsigned BitsNeeded = APInt::getBitsNeeded(Pieces[1], kRadix); + RV.Value = APInt(BitsNeeded, Pieces[1], kRadix); + } else { + Context.getErrorStream() + << "Unknown initial register value: '" << String << "'"; + } + return Context.getLastError(); + } + + static QuotingType mustQuote(StringRef) { return QuotingType::Single; } + + static const bool flow = true; +}; + +template <> +struct MappingContextTraits<exegesis::InstructionBenchmarkKey, YamlContext> { + static void mapping(IO &Io, exegesis::InstructionBenchmarkKey &Obj, + YamlContext &Context) { + Io.setContext(&Context); + Io.mapRequired("instructions", Obj.Instructions); + Io.mapOptional("config", Obj.Config); + Io.mapRequired("register_initial_values", Obj.RegisterInitialValues); + } +}; + +template <> +struct MappingContextTraits<exegesis::InstructionBenchmark, YamlContext> { + struct NormalizedBinary { + NormalizedBinary(IO &io) {} + NormalizedBinary(IO &, std::vector<uint8_t> &Data) : Binary(Data) {} + std::vector<uint8_t> denormalize(IO &) { + std::vector<uint8_t> Data; + std::string Str; + raw_string_ostream OSS(Str); + Binary.writeAsBinary(OSS); + OSS.flush(); + Data.assign(Str.begin(), Str.end()); + return Data; + } + + BinaryRef Binary; + }; + + static void mapping(IO &Io, exegesis::InstructionBenchmark &Obj, + YamlContext &Context) { + Io.mapRequired("mode", Obj.Mode); + Io.mapRequired("key", Obj.Key, Context); + Io.mapRequired("cpu_name", Obj.CpuName); + Io.mapRequired("llvm_triple", Obj.LLVMTriple); + Io.mapRequired("num_repetitions", Obj.NumRepetitions); + Io.mapRequired("measurements", Obj.Measurements); + Io.mapRequired("error", Obj.Error); + Io.mapOptional("info", Obj.Info); + // AssembledSnippet + MappingNormalization<NormalizedBinary, std::vector<uint8_t>> BinaryString( + Io, Obj.AssembledSnippet); + Io.mapOptional("assembled_snippet", BinaryString->Binary); + } +}; + +} // namespace yaml + +namespace exegesis { + +Expected<InstructionBenchmark> +InstructionBenchmark::readYaml(const LLVMState &State, StringRef Filename) { + if (auto ExpectedMemoryBuffer = + errorOrToExpected(MemoryBuffer::getFile(Filename, /*IsText=*/true))) { + yaml::Input Yin(*ExpectedMemoryBuffer.get()); + YamlContext Context(State); + InstructionBenchmark Benchmark; + if (Yin.setCurrentDocument()) + yaml::yamlize(Yin, Benchmark, /*unused*/ true, Context); + if (!Context.getLastError().empty()) + return make_error<Failure>(Context.getLastError()); + return Benchmark; + } else { + return ExpectedMemoryBuffer.takeError(); + } +} + +Expected<std::vector<InstructionBenchmark>> +InstructionBenchmark::readYamls(const LLVMState &State, StringRef Filename) { + if (auto ExpectedMemoryBuffer = + errorOrToExpected(MemoryBuffer::getFile(Filename, /*IsText=*/true))) { + yaml::Input Yin(*ExpectedMemoryBuffer.get()); + YamlContext Context(State); + std::vector<InstructionBenchmark> Benchmarks; + while (Yin.setCurrentDocument()) { + Benchmarks.emplace_back(); + yamlize(Yin, Benchmarks.back(), /*unused*/ true, Context); + if (Yin.error()) + return errorCodeToError(Yin.error()); + if (!Context.getLastError().empty()) + return make_error<Failure>(Context.getLastError()); + Yin.nextDocument(); + } + return Benchmarks; + } else { + return ExpectedMemoryBuffer.takeError(); + } +} + +Error InstructionBenchmark::writeYamlTo(const LLVMState &State, + raw_ostream &OS) { + auto Cleanup = make_scope_exit([&] { OS.flush(); }); + yaml::Output Yout(OS, nullptr /*Ctx*/, 200 /*WrapColumn*/); + YamlContext Context(State); + Yout.beginDocuments(); + yaml::yamlize(Yout, *this, /*unused*/ true, Context); + if (!Context.getLastError().empty()) + return make_error<Failure>(Context.getLastError()); + Yout.endDocuments(); + return Error::success(); +} + +Error InstructionBenchmark::readYamlFrom(const LLVMState &State, + StringRef InputContent) { + yaml::Input Yin(InputContent); + YamlContext Context(State); + if (Yin.setCurrentDocument()) + yaml::yamlize(Yin, *this, /*unused*/ true, Context); + if (!Context.getLastError().empty()) + return make_error<Failure>(Context.getLastError()); + return Error::success(); +} + +Error InstructionBenchmark::writeYaml(const LLVMState &State, + const StringRef Filename) { + if (Filename == "-") { + if (auto Err = writeYamlTo(State, outs())) + return Err; + } else { + int ResultFD = 0; + if (auto E = errorCodeToError(openFileForWrite(Filename, ResultFD, + sys::fs::CD_CreateAlways, + sys::fs::OF_TextWithCRLF))) { + return E; + } + raw_fd_ostream Ostr(ResultFD, true /*shouldClose*/); + if (auto Err = writeYamlTo(State, Ostr)) + return Err; + } + return Error::success(); +} + +void PerInstructionStats::push(const BenchmarkMeasure &BM) { + if (Key.empty()) + Key = BM.Key; + assert(Key == BM.Key); + ++NumValues; + SumValues += BM.PerInstructionValue; + MaxValue = std::max(MaxValue, BM.PerInstructionValue); + MinValue = std::min(MinValue, BM.PerInstructionValue); +} + +bool operator==(const BenchmarkMeasure &A, const BenchmarkMeasure &B) { + return std::tie(A.Key, A.PerInstructionValue, A.PerSnippetValue) == + std::tie(B.Key, B.PerInstructionValue, B.PerSnippetValue); +} + + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkResult.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkResult.h new file mode 100644 index 0000000000..436bd00ac4 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkResult.h @@ -0,0 +1,124 @@ +//===-- BenchmarkResult.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines classes to represent measurements and serialize/deserialize them to +// Yaml. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H +#define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H + +#include "LlvmState.h" +#include "RegisterValue.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/Support/YAMLTraits.h" +#include <limits> +#include <string> +#include <unordered_map> +#include <vector> + +namespace llvm { +class Error; + +namespace exegesis { + +struct InstructionBenchmarkKey { + // The LLVM opcode name. + std::vector<MCInst> Instructions; + // The initial values of the registers. + std::vector<RegisterValue> RegisterInitialValues; + // An opaque configuration, that can be used to separate several benchmarks of + // the same instruction under different configurations. + std::string Config; +}; + +struct BenchmarkMeasure { + // A helper to create an unscaled BenchmarkMeasure. + static BenchmarkMeasure Create(std::string Key, double Value) { + return {Key, Value, Value}; + } + std::string Key; + // This is the per-instruction value, i.e. measured quantity scaled per + // instruction. + double PerInstructionValue; + // This is the per-snippet value, i.e. measured quantity for one repetition of + // the whole snippet. + double PerSnippetValue; +}; + +// The result of an instruction benchmark. +struct InstructionBenchmark { + InstructionBenchmarkKey Key; + enum ModeE { Unknown, Latency, Uops, InverseThroughput }; + ModeE Mode; + std::string CpuName; + std::string LLVMTriple; + // Which instruction is being benchmarked here? + const MCInst &keyInstruction() const { return Key.Instructions[0]; } + // The number of instructions inside the repeated snippet. For example, if a + // snippet of 3 instructions is repeated 4 times, this is 12. + unsigned NumRepetitions = 0; + enum RepetitionModeE { Duplicate, Loop, AggregateMin }; + // Note that measurements are per instruction. + std::vector<BenchmarkMeasure> Measurements; + std::string Error; + std::string Info; + std::vector<uint8_t> AssembledSnippet; + // How to aggregate measurements. + enum ResultAggregationModeE { Min, Max, Mean, MinVariance }; + // Read functions. + static Expected<InstructionBenchmark> readYaml(const LLVMState &State, + StringRef Filename); + + static Expected<std::vector<InstructionBenchmark>> + readYamls(const LLVMState &State, StringRef Filename); + + class Error readYamlFrom(const LLVMState &State, StringRef InputContent); + + // Write functions, non-const because of YAML traits. + class Error writeYamlTo(const LLVMState &State, raw_ostream &S); + + class Error writeYaml(const LLVMState &State, const StringRef Filename); +}; + +bool operator==(const BenchmarkMeasure &A, const BenchmarkMeasure &B); + +//------------------------------------------------------------------------------ +// Utilities to work with Benchmark measures. + +// A class that measures stats over benchmark measures. +class PerInstructionStats { +public: + void push(const BenchmarkMeasure &BM); + + double avg() const { + assert(NumValues); + return SumValues / NumValues; + } + double min() const { return MinValue; } + double max() const { return MaxValue; } + + const std::string &key() const { return Key; } + +private: + std::string Key; + double SumValues = 0.0; + int NumValues = 0; + double MaxValue = std::numeric_limits<double>::min(); + double MinValue = std::numeric_limits<double>::max(); +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkRunner.cpp new file mode 100644 index 0000000000..03e7ccc26f --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -0,0 +1,281 @@ +//===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <array> +#include <memory> +#include <string> + +#include "Assembler.h" +#include "BenchmarkRunner.h" +#include "Error.h" +#include "MCInstrDescView.h" +#include "PerfHelper.h" +#include "Target.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/CrashRecoveryContext.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Program.h" + +namespace llvm { +namespace exegesis { + +BenchmarkRunner::BenchmarkRunner(const LLVMState &State, + InstructionBenchmark::ModeE Mode) + : State(State), Mode(Mode), Scratch(std::make_unique<ScratchSpace>()) {} + +BenchmarkRunner::~BenchmarkRunner() = default; + +namespace { +class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { +public: + FunctionExecutorImpl(const LLVMState &State, + object::OwningBinary<object::ObjectFile> Obj, + BenchmarkRunner::ScratchSpace *Scratch) + : State(State), Function(State.createTargetMachine(), std::move(Obj)), + Scratch(Scratch) {} + +private: + Expected<int64_t> runAndMeasure(const char *Counters) const override { + auto ResultOrError = runAndSample(Counters); + if (ResultOrError) + return ResultOrError.get()[0]; + return ResultOrError.takeError(); + } + + static void + accumulateCounterValues(const llvm::SmallVector<int64_t, 4> &NewValues, + llvm::SmallVector<int64_t, 4> *Result) { + const size_t NumValues = std::max(NewValues.size(), Result->size()); + if (NumValues > Result->size()) + Result->resize(NumValues, 0); + for (size_t I = 0, End = NewValues.size(); I < End; ++I) + (*Result)[I] += NewValues[I]; + } + + Expected<llvm::SmallVector<int64_t, 4>> + runAndSample(const char *Counters) const override { + // We sum counts when there are several counters for a single ProcRes + // (e.g. P23 on SandyBridge). + llvm::SmallVector<int64_t, 4> CounterValues; + int Reserved = 0; + SmallVector<StringRef, 2> CounterNames; + StringRef(Counters).split(CounterNames, '+'); + char *const ScratchPtr = Scratch->ptr(); + const ExegesisTarget &ET = State.getExegesisTarget(); + for (auto &CounterName : CounterNames) { + CounterName = CounterName.trim(); + auto CounterOrError = ET.createCounter(CounterName, State); + + if (!CounterOrError) + return CounterOrError.takeError(); + + pfm::Counter *Counter = CounterOrError.get().get(); + if (Reserved == 0) { + Reserved = Counter->numValues(); + CounterValues.reserve(Reserved); + } else if (Reserved != Counter->numValues()) + // It'd be wrong to accumulate vectors of different sizes. + return make_error<Failure>( + llvm::Twine("Inconsistent number of values for counter ") + .concat(CounterName) + .concat(std::to_string(Counter->numValues())) + .concat(" vs expected of ") + .concat(std::to_string(Reserved))); + Scratch->clear(); + { + auto PS = ET.withSavedState(); + CrashRecoveryContext CRC; + CrashRecoveryContext::Enable(); + const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() { + Counter->start(); + this->Function(ScratchPtr); + Counter->stop(); + }); + CrashRecoveryContext::Disable(); + PS.reset(); + if (Crashed) { + std::string Msg = "snippet crashed while running"; +#ifdef LLVM_ON_UNIX + // See "Exit Status for Commands": + // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html + constexpr const int kSigOffset = 128; + if (const char *const SigName = strsignal(CRC.RetCode - kSigOffset)) { + Msg += ": "; + Msg += SigName; + } +#endif + return make_error<SnippetCrash>(std::move(Msg)); + } + } + + auto ValueOrError = Counter->readOrError(Function.getFunctionBytes()); + if (!ValueOrError) + return ValueOrError.takeError(); + accumulateCounterValues(ValueOrError.get(), &CounterValues); + } + return CounterValues; + } + + const LLVMState &State; + const ExecutableFunction Function; + BenchmarkRunner::ScratchSpace *const Scratch; +}; +} // namespace + +Expected<InstructionBenchmark> BenchmarkRunner::runConfiguration( + const BenchmarkCode &BC, unsigned NumRepetitions, unsigned LoopBodySize, + ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors, + bool DumpObjectToDisk) const { + InstructionBenchmark InstrBenchmark; + InstrBenchmark.Mode = Mode; + InstrBenchmark.CpuName = std::string(State.getTargetMachine().getTargetCPU()); + InstrBenchmark.LLVMTriple = + State.getTargetMachine().getTargetTriple().normalize(); + InstrBenchmark.NumRepetitions = NumRepetitions; + InstrBenchmark.Info = BC.Info; + + const std::vector<MCInst> &Instructions = BC.Key.Instructions; + + InstrBenchmark.Key = BC.Key; + + // If we end up having an error, and we've previously succeeded with + // some other Repetitor, we want to discard the previous measurements. + struct ClearBenchmarkOnReturn { + ClearBenchmarkOnReturn(InstructionBenchmark *IB) : IB(IB) {} + ~ClearBenchmarkOnReturn() { + if (Clear) + IB->Measurements.clear(); + } + void disarm() { Clear = false; } + + private: + InstructionBenchmark *const IB; + bool Clear = true; + }; + ClearBenchmarkOnReturn CBOR(&InstrBenchmark); + + for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors) { + // Assemble at least kMinInstructionsForSnippet instructions by repeating + // the snippet for debug/analysis. This is so that the user clearly + // understands that the inside instructions are repeated. + const int MinInstructionsForSnippet = 4 * Instructions.size(); + const int LoopBodySizeForSnippet = 2 * Instructions.size(); + { + SmallString<0> Buffer; + raw_svector_ostream OS(Buffer); + if (Error E = assembleToStream( + State.getExegesisTarget(), State.createTargetMachine(), + BC.LiveIns, BC.Key.RegisterInitialValues, + Repetitor->Repeat(Instructions, MinInstructionsForSnippet, + LoopBodySizeForSnippet), + OS)) { + return std::move(E); + } + const ExecutableFunction EF(State.createTargetMachine(), + getObjectFromBuffer(OS.str())); + const auto FnBytes = EF.getFunctionBytes(); + llvm::append_range(InstrBenchmark.AssembledSnippet, FnBytes); + } + + // Assemble NumRepetitions instructions repetitions of the snippet for + // measurements. + const auto Filler = Repetitor->Repeat( + Instructions, InstrBenchmark.NumRepetitions, LoopBodySize); + + object::OwningBinary<object::ObjectFile> ObjectFile; + if (DumpObjectToDisk) { + auto ObjectFilePath = writeObjectFile(BC, Filler); + if (Error E = ObjectFilePath.takeError()) { + InstrBenchmark.Error = toString(std::move(E)); + return InstrBenchmark; + } + outs() << "Check generated assembly with: /usr/bin/objdump -d " + << *ObjectFilePath << "\n"; + ObjectFile = getObjectFromFile(*ObjectFilePath); + } else { + SmallString<0> Buffer; + raw_svector_ostream OS(Buffer); + if (Error E = assembleToStream( + State.getExegesisTarget(), State.createTargetMachine(), + BC.LiveIns, BC.Key.RegisterInitialValues, Filler, OS)) { + return std::move(E); + } + ObjectFile = getObjectFromBuffer(OS.str()); + } + + const FunctionExecutorImpl Executor(State, std::move(ObjectFile), + Scratch.get()); + auto NewMeasurements = runMeasurements(Executor); + if (Error E = NewMeasurements.takeError()) { + if (!E.isA<SnippetCrash>()) + return std::move(E); + InstrBenchmark.Error = toString(std::move(E)); + return InstrBenchmark; + } + assert(InstrBenchmark.NumRepetitions > 0 && "invalid NumRepetitions"); + for (BenchmarkMeasure &BM : *NewMeasurements) { + // Scale the measurements by instruction. + BM.PerInstructionValue /= InstrBenchmark.NumRepetitions; + // Scale the measurements by snippet. + BM.PerSnippetValue *= static_cast<double>(Instructions.size()) / + InstrBenchmark.NumRepetitions; + } + if (InstrBenchmark.Measurements.empty()) { + InstrBenchmark.Measurements = std::move(*NewMeasurements); + continue; + } + + assert(Repetitors.size() > 1 && !InstrBenchmark.Measurements.empty() && + "We're in an 'min' repetition mode, and need to aggregate new " + "result to the existing result."); + assert(InstrBenchmark.Measurements.size() == NewMeasurements->size() && + "Expected to have identical number of measurements."); + for (auto I : zip(InstrBenchmark.Measurements, *NewMeasurements)) { + BenchmarkMeasure &Measurement = std::get<0>(I); + BenchmarkMeasure &NewMeasurement = std::get<1>(I); + assert(Measurement.Key == NewMeasurement.Key && + "Expected measurements to be symmetric"); + + Measurement.PerInstructionValue = std::min( + Measurement.PerInstructionValue, NewMeasurement.PerInstructionValue); + Measurement.PerSnippetValue = + std::min(Measurement.PerSnippetValue, NewMeasurement.PerSnippetValue); + } + } + + // We successfully measured everything, so don't discard the results. + CBOR.disarm(); + return InstrBenchmark; +} + +Expected<std::string> +BenchmarkRunner::writeObjectFile(const BenchmarkCode &BC, + const FillFunction &FillFunction) const { + int ResultFD = 0; + SmallString<256> ResultPath; + if (Error E = errorCodeToError( + sys::fs::createTemporaryFile("snippet", "o", ResultFD, ResultPath))) + return std::move(E); + raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/); + if (Error E = assembleToStream( + State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns, + BC.Key.RegisterInitialValues, FillFunction, OFS)) { + return std::move(E); + } + return std::string(ResultPath.str()); +} + +BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkRunner.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkRunner.h new file mode 100644 index 0000000000..b66902e6c0 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -0,0 +1,94 @@ +//===-- BenchmarkRunner.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines the abstract BenchmarkRunner class for measuring a certain execution +/// property of instructions (e.g. latency). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRUNNER_H +#define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRUNNER_H + +#include "Assembler.h" +#include "BenchmarkCode.h" +#include "BenchmarkResult.h" +#include "LlvmState.h" +#include "MCInstrDescView.h" +#include "SnippetRepetitor.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Error.h" +#include <cstdlib> +#include <memory> +#include <vector> + +namespace llvm { +namespace exegesis { + +// Common code for all benchmark modes. +class BenchmarkRunner { +public: + explicit BenchmarkRunner(const LLVMState &State, + InstructionBenchmark::ModeE Mode); + + virtual ~BenchmarkRunner(); + + Expected<InstructionBenchmark> + runConfiguration(const BenchmarkCode &Configuration, unsigned NumRepetitions, + unsigned LoopUnrollFactor, + ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors, + bool DumpObjectToDisk) const; + + // Scratch space to run instructions that touch memory. + struct ScratchSpace { + static constexpr const size_t kAlignment = 1024; + static constexpr const size_t kSize = 1 << 20; // 1MB. + ScratchSpace() + : UnalignedPtr(std::make_unique<char[]>(kSize + kAlignment)), + AlignedPtr( + UnalignedPtr.get() + kAlignment - + (reinterpret_cast<intptr_t>(UnalignedPtr.get()) % kAlignment)) {} + char *ptr() const { return AlignedPtr; } + void clear() { std::memset(ptr(), 0, kSize); } + + private: + const std::unique_ptr<char[]> UnalignedPtr; + char *const AlignedPtr; + }; + + // A helper to measure counters while executing a function in a sandboxed + // context. + class FunctionExecutor { + public: + virtual ~FunctionExecutor(); + // FIXME deprecate this. + virtual Expected<int64_t> runAndMeasure(const char *Counters) const = 0; + + virtual Expected<llvm::SmallVector<int64_t, 4>> + runAndSample(const char *Counters) const = 0; + }; + +protected: + const LLVMState &State; + const InstructionBenchmark::ModeE Mode; + +private: + virtual Expected<std::vector<BenchmarkMeasure>> + runMeasurements(const FunctionExecutor &Executor) const = 0; + + Expected<std::string> writeObjectFile(const BenchmarkCode &Configuration, + const FillFunction &Fill) const; + + const std::unique_ptr<ScratchSpace> Scratch; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRUNNER_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.cpp new file mode 100644 index 0000000000..08646aac52 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.cpp @@ -0,0 +1,408 @@ +//===-- Clustering.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Clustering.h" +#include "Error.h" +#include "SchedClassResolution.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include <algorithm> +#include <deque> +#include <string> +#include <vector> + +namespace llvm { +namespace exegesis { + +// The clustering problem has the following characteristics: +// (A) - Low dimension (dimensions are typically proc resource units, +// typically < 10). +// (B) - Number of points : ~thousands (points are measurements of an MCInst) +// (C) - Number of clusters: ~tens. +// (D) - The number of clusters is not known /a priory/. +// (E) - The amount of noise is relatively small. +// The problem is rather small. In terms of algorithms, (D) disqualifies +// k-means and makes algorithms such as DBSCAN[1] or OPTICS[2] more applicable. +// +// We've used DBSCAN here because it's simple to implement. This is a pretty +// straightforward and inefficient implementation of the pseudocode in [2]. +// +// [1] https://en.wikipedia.org/wiki/DBSCAN +// [2] https://en.wikipedia.org/wiki/OPTICS_algorithm + +// Finds the points at distance less than sqrt(EpsilonSquared) of Q (not +// including Q). +void InstructionBenchmarkClustering::rangeQuery( + const size_t Q, std::vector<size_t> &Neighbors) const { + Neighbors.clear(); + Neighbors.reserve(Points_.size() - 1); // The Q itself isn't a neighbor. + const auto &QMeasurements = Points_[Q].Measurements; + for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { + if (P == Q) + continue; + const auto &PMeasurements = Points_[P].Measurements; + if (PMeasurements.empty()) // Error point. + continue; + if (isNeighbour(PMeasurements, QMeasurements, + AnalysisClusteringEpsilonSquared_)) { + Neighbors.push_back(P); + } + } +} + +// Given a set of points, checks that all the points are neighbours +// up to AnalysisClusteringEpsilon. This is O(2*N). +bool InstructionBenchmarkClustering::areAllNeighbours( + ArrayRef<size_t> Pts) const { + // First, get the centroid of this group of points. This is O(N). + SchedClassClusterCentroid G; + for_each(Pts, [this, &G](size_t P) { + assert(P < Points_.size()); + ArrayRef<BenchmarkMeasure> Measurements = Points_[P].Measurements; + if (Measurements.empty()) // Error point. + return; + G.addPoint(Measurements); + }); + const std::vector<BenchmarkMeasure> Centroid = G.getAsPoint(); + + // Since we will be comparing with the centroid, we need to halve the epsilon. + double AnalysisClusteringEpsilonHalvedSquared = + AnalysisClusteringEpsilonSquared_ / 4.0; + + // And now check that every point is a neighbour of the centroid. Also O(N). + return all_of( + Pts, [this, &Centroid, AnalysisClusteringEpsilonHalvedSquared](size_t P) { + assert(P < Points_.size()); + const auto &PMeasurements = Points_[P].Measurements; + if (PMeasurements.empty()) // Error point. + return true; // Pretend that error point is a neighbour. + return isNeighbour(PMeasurements, Centroid, + AnalysisClusteringEpsilonHalvedSquared); + }); +} + +InstructionBenchmarkClustering::InstructionBenchmarkClustering( + const std::vector<InstructionBenchmark> &Points, + const double AnalysisClusteringEpsilonSquared) + : Points_(Points), + AnalysisClusteringEpsilonSquared_(AnalysisClusteringEpsilonSquared), + NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {} + +Error InstructionBenchmarkClustering::validateAndSetup() { + ClusterIdForPoint_.resize(Points_.size()); + // Mark erroneous measurements out. + // All points must have the same number of dimensions, in the same order. + const std::vector<BenchmarkMeasure> *LastMeasurement = nullptr; + for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { + const auto &Point = Points_[P]; + if (!Point.Error.empty()) { + ClusterIdForPoint_[P] = ClusterId::error(); + ErrorCluster_.PointIndices.push_back(P); + continue; + } + const auto *CurMeasurement = &Point.Measurements; + if (LastMeasurement) { + if (LastMeasurement->size() != CurMeasurement->size()) { + return make_error<ClusteringError>( + "inconsistent measurement dimensions"); + } + for (size_t I = 0, E = LastMeasurement->size(); I < E; ++I) { + if (LastMeasurement->at(I).Key != CurMeasurement->at(I).Key) { + return make_error<ClusteringError>( + "inconsistent measurement dimensions keys"); + } + } + } + LastMeasurement = CurMeasurement; + } + if (LastMeasurement) { + NumDimensions_ = LastMeasurement->size(); + } + return Error::success(); +} + +void InstructionBenchmarkClustering::clusterizeDbScan(const size_t MinPts) { + std::vector<size_t> Neighbors; // Persistent buffer to avoid allocs. + for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { + if (!ClusterIdForPoint_[P].isUndef()) + continue; // Previously processed in inner loop. + rangeQuery(P, Neighbors); + if (Neighbors.size() + 1 < MinPts) { // Density check. + // The region around P is not dense enough to create a new cluster, mark + // as noise for now. + ClusterIdForPoint_[P] = ClusterId::noise(); + continue; + } + + // Create a new cluster, add P. + Clusters_.emplace_back(ClusterId::makeValid(Clusters_.size())); + Cluster &CurrentCluster = Clusters_.back(); + ClusterIdForPoint_[P] = CurrentCluster.Id; /* Label initial point */ + CurrentCluster.PointIndices.push_back(P); + + // Process P's neighbors. + SetVector<size_t, std::deque<size_t>> ToProcess; + ToProcess.insert(Neighbors.begin(), Neighbors.end()); + while (!ToProcess.empty()) { + // Retrieve a point from the set. + const size_t Q = *ToProcess.begin(); + ToProcess.erase(ToProcess.begin()); + + if (ClusterIdForPoint_[Q].isNoise()) { + // Change noise point to border point. + ClusterIdForPoint_[Q] = CurrentCluster.Id; + CurrentCluster.PointIndices.push_back(Q); + continue; + } + if (!ClusterIdForPoint_[Q].isUndef()) { + continue; // Previously processed. + } + // Add Q to the current custer. + ClusterIdForPoint_[Q] = CurrentCluster.Id; + CurrentCluster.PointIndices.push_back(Q); + // And extend to the neighbors of Q if the region is dense enough. + rangeQuery(Q, Neighbors); + if (Neighbors.size() + 1 >= MinPts) { + ToProcess.insert(Neighbors.begin(), Neighbors.end()); + } + } + } + // assert(Neighbors.capacity() == (Points_.size() - 1)); + // ^ True, but it is not quaranteed to be true in all the cases. + + // Add noisy points to noise cluster. + for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { + if (ClusterIdForPoint_[P].isNoise()) { + NoiseCluster_.PointIndices.push_back(P); + } + } +} + +void InstructionBenchmarkClustering::clusterizeNaive( + const MCSubtargetInfo &SubtargetInfo, const MCInstrInfo &InstrInfo) { + // Given an instruction Opcode, which sched class id's are represented, + // and which are the benchmarks for each sched class? + std::vector<SmallMapVector<unsigned, SmallVector<size_t, 1>, 1>> + OpcodeToSchedClassesToPoints; + const unsigned NumOpcodes = InstrInfo.getNumOpcodes(); + OpcodeToSchedClassesToPoints.resize(NumOpcodes); + size_t NumClusters = 0; + for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { + const InstructionBenchmark &Point = Points_[P]; + const MCInst &MCI = Point.keyInstruction(); + unsigned SchedClassId; + std::tie(SchedClassId, std::ignore) = + ResolvedSchedClass::resolveSchedClassId(SubtargetInfo, InstrInfo, MCI); + const unsigned Opcode = MCI.getOpcode(); + assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)"); + auto &Points = OpcodeToSchedClassesToPoints[Opcode][SchedClassId]; + if (Points.empty()) // If we previously have not seen any points of + ++NumClusters; // this opcode's sched class, then new cluster begins. + Points.emplace_back(P); + } + assert(NumClusters <= NumOpcodes && + "can't see more opcodes than there are total opcodes"); + assert(NumClusters <= Points_.size() && + "can't see more opcodes than there are total points"); + + Clusters_.reserve(NumClusters); // We already know how many clusters there is. + for (const auto &SchedClassesOfOpcode : OpcodeToSchedClassesToPoints) { + if (SchedClassesOfOpcode.empty()) + continue; + for (ArrayRef<size_t> PointsOfSchedClass : + make_second_range(SchedClassesOfOpcode)) { + if (PointsOfSchedClass.empty()) + continue; + // Create a new cluster. + Clusters_.emplace_back(ClusterId::makeValid( + Clusters_.size(), + /*IsUnstable=*/!areAllNeighbours(PointsOfSchedClass))); + Cluster &CurrentCluster = Clusters_.back(); + // Mark points as belonging to the new cluster. + for_each(PointsOfSchedClass, [this, &CurrentCluster](size_t P) { + ClusterIdForPoint_[P] = CurrentCluster.Id; + }); + // And add all the points of this opcode's sched class to the new cluster. + CurrentCluster.PointIndices.reserve(PointsOfSchedClass.size()); + CurrentCluster.PointIndices.assign(PointsOfSchedClass.begin(), + PointsOfSchedClass.end()); + assert(CurrentCluster.PointIndices.size() == PointsOfSchedClass.size()); + } + } + assert(Clusters_.size() == NumClusters); +} + +// Given an instruction Opcode, we can make benchmarks (measurements) of the +// instruction characteristics/performance. Then, to facilitate further analysis +// we group the benchmarks with *similar* characteristics into clusters. +// Now, this is all not entirely deterministic. Some instructions have variable +// characteristics, depending on their arguments. And thus, if we do several +// benchmarks of the same instruction Opcode, we may end up with *different* +// performance characteristics measurements. And when we then do clustering, +// these several benchmarks of the same instruction Opcode may end up being +// clustered into *different* clusters. This is not great for further analysis. +// We shall find every opcode with benchmarks not in just one cluster, and move +// *all* the benchmarks of said Opcode into one new unstable cluster per Opcode. +void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) { + // Given an instruction Opcode and Config, in which clusters do benchmarks of + // this instruction lie? Normally, they all should be in the same cluster. + struct OpcodeAndConfig { + explicit OpcodeAndConfig(const InstructionBenchmark &IB) + : Opcode(IB.keyInstruction().getOpcode()), Config(&IB.Key.Config) {} + unsigned Opcode; + const std::string *Config; + + auto Tie() const -> auto { return std::tie(Opcode, *Config); } + + bool operator<(const OpcodeAndConfig &O) const { return Tie() < O.Tie(); } + bool operator!=(const OpcodeAndConfig &O) const { return Tie() != O.Tie(); } + }; + std::map<OpcodeAndConfig, SmallSet<ClusterId, 1>> OpcodeConfigToClusterIDs; + // Populate OpcodeConfigToClusterIDs and UnstableOpcodes data structures. + assert(ClusterIdForPoint_.size() == Points_.size() && "size mismatch"); + for (auto Point : zip(Points_, ClusterIdForPoint_)) { + const ClusterId &ClusterIdOfPoint = std::get<1>(Point); + if (!ClusterIdOfPoint.isValid()) + continue; // Only process fully valid clusters. + const OpcodeAndConfig Key(std::get<0>(Point)); + SmallSet<ClusterId, 1> &ClusterIDsOfOpcode = OpcodeConfigToClusterIDs[Key]; + ClusterIDsOfOpcode.insert(ClusterIdOfPoint); + } + + for (const auto &OpcodeConfigToClusterID : OpcodeConfigToClusterIDs) { + const SmallSet<ClusterId, 1> &ClusterIDs = OpcodeConfigToClusterID.second; + const OpcodeAndConfig &Key = OpcodeConfigToClusterID.first; + // We only care about unstable instructions. + if (ClusterIDs.size() < 2) + continue; + + // Create a new unstable cluster, one per Opcode. + Clusters_.emplace_back(ClusterId::makeValidUnstable(Clusters_.size())); + Cluster &UnstableCluster = Clusters_.back(); + // We will find *at least* one point in each of these clusters. + UnstableCluster.PointIndices.reserve(ClusterIDs.size()); + + // Go through every cluster which we recorded as containing benchmarks + // of this UnstableOpcode. NOTE: we only recorded valid clusters. + for (const ClusterId &CID : ClusterIDs) { + assert(CID.isValid() && + "We only recorded valid clusters, not noise/error clusters."); + Cluster &OldCluster = Clusters_[CID.getId()]; // Valid clusters storage. + // Within each cluster, go through each point, and either move it to the + // new unstable cluster, or 'keep' it. + // In this case, we'll reshuffle OldCluster.PointIndices vector + // so that all the points that are *not* for UnstableOpcode are first, + // and the rest of the points is for the UnstableOpcode. + const auto it = std::stable_partition( + OldCluster.PointIndices.begin(), OldCluster.PointIndices.end(), + [this, &Key](size_t P) { + return OpcodeAndConfig(Points_[P]) != Key; + }); + assert(std::distance(it, OldCluster.PointIndices.end()) > 0 && + "Should have found at least one bad point"); + // Mark to-be-moved points as belonging to the new cluster. + std::for_each(it, OldCluster.PointIndices.end(), + [this, &UnstableCluster](size_t P) { + ClusterIdForPoint_[P] = UnstableCluster.Id; + }); + // Actually append to-be-moved points to the new cluster. + UnstableCluster.PointIndices.insert(UnstableCluster.PointIndices.end(), + it, OldCluster.PointIndices.end()); + // And finally, remove "to-be-moved" points form the old cluster. + OldCluster.PointIndices.erase(it, OldCluster.PointIndices.end()); + // Now, the old cluster may end up being empty, but let's just keep it + // in whatever state it ended up. Purging empty clusters isn't worth it. + }; + assert(UnstableCluster.PointIndices.size() > 1 && + "New unstable cluster should end up with more than one point."); + assert(UnstableCluster.PointIndices.size() >= ClusterIDs.size() && + "New unstable cluster should end up with no less points than there " + "was clusters"); + } +} + +Expected<InstructionBenchmarkClustering> InstructionBenchmarkClustering::create( + const std::vector<InstructionBenchmark> &Points, const ModeE Mode, + const size_t DbscanMinPts, const double AnalysisClusteringEpsilon, + const MCSubtargetInfo *SubtargetInfo, const MCInstrInfo *InstrInfo) { + InstructionBenchmarkClustering Clustering( + Points, AnalysisClusteringEpsilon * AnalysisClusteringEpsilon); + if (auto Error = Clustering.validateAndSetup()) { + return std::move(Error); + } + if (Clustering.ErrorCluster_.PointIndices.size() == Points.size()) { + return Clustering; // Nothing to cluster. + } + + if (Mode == ModeE::Dbscan) { + Clustering.clusterizeDbScan(DbscanMinPts); + + if (InstrInfo) + Clustering.stabilize(InstrInfo->getNumOpcodes()); + } else /*if(Mode == ModeE::Naive)*/ { + if (!SubtargetInfo || !InstrInfo) + return make_error<Failure>("'naive' clustering mode requires " + "SubtargetInfo and InstrInfo to be present"); + Clustering.clusterizeNaive(*SubtargetInfo, *InstrInfo); + } + + return Clustering; +} + +void SchedClassClusterCentroid::addPoint(ArrayRef<BenchmarkMeasure> Point) { + if (Representative.empty()) + Representative.resize(Point.size()); + assert(Representative.size() == Point.size() && + "All points should have identical dimensions."); + + for (auto I : zip(Representative, Point)) + std::get<0>(I).push(std::get<1>(I)); +} + +std::vector<BenchmarkMeasure> SchedClassClusterCentroid::getAsPoint() const { + std::vector<BenchmarkMeasure> ClusterCenterPoint(Representative.size()); + for (auto I : zip(ClusterCenterPoint, Representative)) + std::get<0>(I).PerInstructionValue = std::get<1>(I).avg(); + return ClusterCenterPoint; +} + +bool SchedClassClusterCentroid::validate( + InstructionBenchmark::ModeE Mode) const { + size_t NumMeasurements = Representative.size(); + switch (Mode) { + case InstructionBenchmark::Latency: + if (NumMeasurements != 1) { + errs() + << "invalid number of measurements in latency mode: expected 1, got " + << NumMeasurements << "\n"; + return false; + } + break; + case InstructionBenchmark::Uops: + // Can have many measurements. + break; + case InstructionBenchmark::InverseThroughput: + if (NumMeasurements != 1) { + errs() << "invalid number of measurements in inverse throughput " + "mode: expected 1, got " + << NumMeasurements << "\n"; + return false; + } + break; + default: + llvm_unreachable("unimplemented measurement matching mode"); + return false; + } + + return true; // All good. +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h new file mode 100644 index 0000000000..a4da3af774 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Clustering.h @@ -0,0 +1,171 @@ +//===-- Clustering.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Utilities to compute benchmark result clusters. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H +#define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H + +#include "BenchmarkResult.h" +#include "llvm/ADT/Optional.h" +#include "llvm/Support/Error.h" +#include <limits> +#include <vector> + +namespace llvm { +namespace exegesis { + +class InstructionBenchmarkClustering { +public: + enum ModeE { Dbscan, Naive }; + + // Clusters `Points` using DBSCAN with the given parameters. See the cc file + // for more explanations on the algorithm. + static Expected<InstructionBenchmarkClustering> + create(const std::vector<InstructionBenchmark> &Points, ModeE Mode, + size_t DbscanMinPts, double AnalysisClusteringEpsilon, + const MCSubtargetInfo *SubtargetInfo = nullptr, + const MCInstrInfo *InstrInfo = nullptr); + + class ClusterId { + public: + static ClusterId noise() { return ClusterId(kNoise); } + static ClusterId error() { return ClusterId(kError); } + static ClusterId makeValid(size_t Id, bool IsUnstable = false) { + return ClusterId(Id, IsUnstable); + } + static ClusterId makeValidUnstable(size_t Id) { + return makeValid(Id, /*IsUnstable=*/true); + } + + ClusterId() : Id_(kUndef), IsUnstable_(false) {} + + // Compare id's, ignoring the 'unstability' bit. + bool operator==(const ClusterId &O) const { return Id_ == O.Id_; } + bool operator<(const ClusterId &O) const { return Id_ < O.Id_; } + + bool isValid() const { return Id_ <= kMaxValid; } + bool isUnstable() const { return IsUnstable_; } + bool isNoise() const { return Id_ == kNoise; } + bool isError() const { return Id_ == kError; } + bool isUndef() const { return Id_ == kUndef; } + + // Precondition: isValid(). + size_t getId() const { + assert(isValid()); + return Id_; + } + + private: + ClusterId(size_t Id, bool IsUnstable = false) + : Id_(Id), IsUnstable_(IsUnstable) {} + + static constexpr const size_t kMaxValid = + (std::numeric_limits<size_t>::max() >> 1) - 4; + static constexpr const size_t kNoise = kMaxValid + 1; + static constexpr const size_t kError = kMaxValid + 2; + static constexpr const size_t kUndef = kMaxValid + 3; + + size_t Id_ : (std::numeric_limits<size_t>::digits - 1); + size_t IsUnstable_ : 1; + }; + static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field."); + + struct Cluster { + Cluster() = delete; + explicit Cluster(const ClusterId &Id) : Id(Id) {} + + const ClusterId Id; + // Indices of benchmarks within the cluster. + std::vector<int> PointIndices; + }; + + ClusterId getClusterIdForPoint(size_t P) const { + return ClusterIdForPoint_[P]; + } + + const std::vector<InstructionBenchmark> &getPoints() const { return Points_; } + + const Cluster &getCluster(ClusterId Id) const { + assert(!Id.isUndef() && "unlabeled cluster"); + if (Id.isNoise()) { + return NoiseCluster_; + } + if (Id.isError()) { + return ErrorCluster_; + } + return Clusters_[Id.getId()]; + } + + const std::vector<Cluster> &getValidClusters() const { return Clusters_; } + + // Returns true if the given point is within a distance Epsilon of each other. + bool isNeighbour(const std::vector<BenchmarkMeasure> &P, + const std::vector<BenchmarkMeasure> &Q, + const double EpsilonSquared_) const { + double DistanceSquared = 0.0; + for (size_t I = 0, E = P.size(); I < E; ++I) { + const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue; + DistanceSquared += Diff * Diff; + } + return DistanceSquared <= EpsilonSquared_; + } + +private: + InstructionBenchmarkClustering( + const std::vector<InstructionBenchmark> &Points, + double AnalysisClusteringEpsilonSquared); + + Error validateAndSetup(); + + void clusterizeDbScan(size_t MinPts); + void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo, + const MCInstrInfo &InstrInfo); + + // Stabilization is only needed if dbscan was used to clusterize. + void stabilize(unsigned NumOpcodes); + + void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const; + + bool areAllNeighbours(ArrayRef<size_t> Pts) const; + + const std::vector<InstructionBenchmark> &Points_; + const double AnalysisClusteringEpsilonSquared_; + + int NumDimensions_ = 0; + // ClusterForPoint_[P] is the cluster id for Points[P]. + std::vector<ClusterId> ClusterIdForPoint_; + std::vector<Cluster> Clusters_; + Cluster NoiseCluster_; + Cluster ErrorCluster_; +}; + +class SchedClassClusterCentroid { +public: + const std::vector<PerInstructionStats> &getStats() const { + return Representative; + } + + std::vector<BenchmarkMeasure> getAsPoint() const; + + void addPoint(ArrayRef<BenchmarkMeasure> Point); + + bool validate(InstructionBenchmark::ModeE Mode) const; + +private: + // Measurement stats for the points in the SchedClassCluster. + std::vector<PerInstructionStats> Representative; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/CodeTemplate.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/CodeTemplate.cpp new file mode 100644 index 0000000000..9840a08c25 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/CodeTemplate.cpp @@ -0,0 +1,115 @@ +//===-- CodeTemplate.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CodeTemplate.h" + +namespace llvm { +namespace exegesis { + +CodeTemplate::CodeTemplate(CodeTemplate &&) = default; + +CodeTemplate &CodeTemplate::operator=(CodeTemplate &&) = default; + +InstructionTemplate::InstructionTemplate(const Instruction *Instr) + : Instr(Instr), VariableValues(Instr->Variables.size()) {} + +InstructionTemplate::InstructionTemplate(InstructionTemplate &&) = default; + +InstructionTemplate &InstructionTemplate:: +operator=(InstructionTemplate &&) = default; + +InstructionTemplate::InstructionTemplate(const InstructionTemplate &) = default; + +InstructionTemplate &InstructionTemplate:: +operator=(const InstructionTemplate &) = default; + +unsigned InstructionTemplate::getOpcode() const { + return Instr->Description.getOpcode(); +} + +MCOperand &InstructionTemplate::getValueFor(const Variable &Var) { + return VariableValues[Var.getIndex()]; +} + +const MCOperand &InstructionTemplate::getValueFor(const Variable &Var) const { + return VariableValues[Var.getIndex()]; +} + +MCOperand &InstructionTemplate::getValueFor(const Operand &Op) { + return getValueFor(Instr->Variables[Op.getVariableIndex()]); +} + +const MCOperand &InstructionTemplate::getValueFor(const Operand &Op) const { + return getValueFor(Instr->Variables[Op.getVariableIndex()]); +} + +bool InstructionTemplate::hasImmediateVariables() const { + return any_of(Instr->Variables, [this](const Variable &Var) { + return Instr->getPrimaryOperand(Var).isImmediate(); + }); +} + +MCInst InstructionTemplate::build() const { + MCInst Result; + Result.setOpcode(Instr->Description.Opcode); + for (const auto &Op : Instr->Operands) + if (Op.isExplicit()) + Result.addOperand(getValueFor(Op)); + return Result; +} + +bool isEnumValue(ExecutionMode Execution) { + return isPowerOf2_32(static_cast<uint32_t>(Execution)); +} + +StringRef getName(ExecutionMode Bit) { + assert(isEnumValue(Bit) && "Bit must be a power of two"); + switch (Bit) { + case ExecutionMode::UNKNOWN: + return "UNKNOWN"; + case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS: + return "ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS"; + case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: + return "ALWAYS_SERIAL_TIED_REGS_ALIAS"; + case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: + return "SERIAL_VIA_MEMORY_INSTR"; + case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: + return "SERIAL_VIA_EXPLICIT_REGS"; + case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: + return "SERIAL_VIA_NON_MEMORY_INSTR"; + case ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF: + return "ALWAYS_PARALLEL_MISSING_USE_OR_DEF"; + case ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS: + return "PARALLEL_VIA_EXPLICIT_REGS"; + } + llvm_unreachable("Missing enum case"); +} + +ArrayRef<ExecutionMode> getAllExecutionBits() { + static const ExecutionMode kAllExecutionModeBits[] = { + ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS, + ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS, + ExecutionMode::SERIAL_VIA_MEMORY_INSTR, + ExecutionMode::SERIAL_VIA_EXPLICIT_REGS, + ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR, + ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF, + ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS, + }; + return makeArrayRef(kAllExecutionModeBits); +} + +SmallVector<ExecutionMode, 4> getExecutionModeBits(ExecutionMode Execution) { + SmallVector<ExecutionMode, 4> Result; + for (const auto Bit : getAllExecutionBits()) + if ((Execution & Bit) == Bit) + Result.push_back(Bit); + return Result; +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/CodeTemplate.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/CodeTemplate.h new file mode 100644 index 0000000000..bea10304cb --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/CodeTemplate.h @@ -0,0 +1,140 @@ +//===-- CodeTemplate.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A set of structures and functions to craft instructions for the +/// SnippetGenerator. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CODETEMPLATE_H +#define LLVM_TOOLS_LLVM_EXEGESIS_CODETEMPLATE_H + +#include "MCInstrDescView.h" +#include "llvm/ADT/BitmaskEnum.h" + +namespace llvm { +namespace exegesis { + +// A template for an Instruction holding values for each of its Variables. +struct InstructionTemplate { + InstructionTemplate(const Instruction *Instr); + + InstructionTemplate(const InstructionTemplate &); // default + InstructionTemplate &operator=(const InstructionTemplate &); // default + InstructionTemplate(InstructionTemplate &&); // default + InstructionTemplate &operator=(InstructionTemplate &&); // default + + unsigned getOpcode() const; + MCOperand &getValueFor(const Variable &Var); + const MCOperand &getValueFor(const Variable &Var) const; + MCOperand &getValueFor(const Operand &Op); + const MCOperand &getValueFor(const Operand &Op) const; + bool hasImmediateVariables() const; + const Instruction &getInstr() const { return *Instr; } + ArrayRef<MCOperand> getVariableValues() const { return VariableValues; } + void setVariableValues(ArrayRef<MCOperand> NewVariableValues) { + assert(VariableValues.size() == NewVariableValues.size() && + "Value count mismatch"); + VariableValues.assign(NewVariableValues.begin(), NewVariableValues.end()); + } + + // Builds an MCInst from this InstructionTemplate setting its operands + // to the corresponding variable values. Precondition: All VariableValues must + // be set. + MCInst build() const; + +private: + const Instruction *Instr; + SmallVector<MCOperand, 4> VariableValues; +}; + +enum class ExecutionMode : uint8_t { + UNKNOWN = 0U, + // The instruction is always serial because implicit Use and Def alias. + // e.g. AAA (alias via EFLAGS) + ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS = 1u << 0, + + // The instruction is always serial because one Def is tied to a Use. + // e.g. AND32ri (alias via tied GR32) + ALWAYS_SERIAL_TIED_REGS_ALIAS = 1u << 1, + + // The execution can be made serial by inserting a second instruction that + // clobbers/reads memory. + // e.g. MOV8rm + SERIAL_VIA_MEMORY_INSTR = 1u << 2, + + // The execution can be made serial by picking one Def that aliases with one + // Use. + // e.g. VXORPSrr XMM1, XMM1, XMM2 + SERIAL_VIA_EXPLICIT_REGS = 1u << 3, + + // The execution can be made serial by inserting a second instruction that + // uses one of the Defs and defs one of the Uses. + // e.g. + // 1st instruction: MMX_PMOVMSKBrr ECX, MM7 + // 2nd instruction: MMX_MOVD64rr MM7, ECX + // or instruction: MMX_MOVD64to64rr MM7, ECX + // or instruction: MMX_PINSRWrr MM7, MM7, ECX, 1 + SERIAL_VIA_NON_MEMORY_INSTR = 1u << 4, + + // The execution is always parallel because the instruction is missing Use or + // Def operands. + ALWAYS_PARALLEL_MISSING_USE_OR_DEF = 1u << 5, + + // The execution can be made parallel by repeating the same instruction but + // making sure that Defs of one instruction do not alias with Uses of the + // second one. + PARALLEL_VIA_EXPLICIT_REGS = 1u << 6, + + LLVM_MARK_AS_BITMASK_ENUM(/*Largest*/ PARALLEL_VIA_EXPLICIT_REGS) +}; + +// Returns whether Execution is one of the values defined in the enum above. +bool isEnumValue(ExecutionMode Execution); + +// Returns a human readable string for the enum. +StringRef getName(ExecutionMode Execution); + +// Returns a sequence of increasing powers of two corresponding to all the +// Execution flags. +ArrayRef<ExecutionMode> getAllExecutionBits(); + +// Decomposes Execution into individual set bits. +SmallVector<ExecutionMode, 4> getExecutionModeBits(ExecutionMode); + +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + +// A CodeTemplate is a set of InstructionTemplates that may not be fully +// specified (i.e. some variables are not yet set). This allows the +// SnippetGenerator to instantiate it many times with specific values to study +// their impact on instruction's performance. +struct CodeTemplate { + CodeTemplate() = default; + + CodeTemplate(CodeTemplate &&); // default + CodeTemplate &operator=(CodeTemplate &&); // default + CodeTemplate(const CodeTemplate &) = delete; + CodeTemplate &operator=(const CodeTemplate &) = delete; + + ExecutionMode Execution = ExecutionMode::UNKNOWN; + // See InstructionBenchmarkKey.::Config. + std::string Config; + // Some information about how this template has been created. + std::string Info; + // The list of the instructions for this template. + std::vector<InstructionTemplate> Instructions; + // If the template uses the provided scratch memory, the register in which + // the pointer to this memory is passed in to the function. + unsigned ScratchSpacePointerInReg = 0; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_CODETEMPLATE_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/Error.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Error.cpp new file mode 100644 index 0000000000..51ce41bf00 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Error.cpp @@ -0,0 +1,31 @@ +//===-- Error.cpp -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Error.h" + +namespace llvm { +namespace exegesis { + +char ClusteringError::ID; + +void ClusteringError::log(raw_ostream &OS) const { OS << Msg; } + +std::error_code ClusteringError::convertToErrorCode() const { + return inconvertibleErrorCode(); +} + +char SnippetCrash::ID; + +void SnippetCrash::log(raw_ostream &OS) const { OS << Msg; } + +std::error_code SnippetCrash::convertToErrorCode() const { + return inconvertibleErrorCode(); +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/Error.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Error.h new file mode 100644 index 0000000000..e5fa093e6e --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Error.h @@ -0,0 +1,57 @@ +//===-- Error.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_ERROR_H +#define LLVM_TOOLS_LLVM_EXEGESIS_ERROR_H + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace exegesis { + +// A class representing failures that happened within llvm-exegesis, they are +// used to report informations to the user. +class Failure : public StringError { +public: + Failure(const Twine &S) : StringError(S, inconvertibleErrorCode()) {} +}; + +// A class representing failures that happened during clustering calculations. +class ClusteringError : public ErrorInfo<ClusteringError> { +public: + static char ID; + ClusteringError(const Twine &S) : Msg(S.str()) {} + + void log(raw_ostream &OS) const override; + + std::error_code convertToErrorCode() const override; + +private: + std::string Msg; +}; + +// A class representing failures that happened during snippet execution. +// Instead of terminating the program crashes are logged into the output. +class SnippetCrash : public ErrorInfo<SnippetCrash> { +public: + static char ID; + SnippetCrash(const Twine &S) : Msg(S.str()) {} + + void log(raw_ostream &OS) const override; + + std::error_code convertToErrorCode() const override; + +private: + std::string Msg; +}; + +} // namespace exegesis +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp new file mode 100644 index 0000000000..6cdefb8b06 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp @@ -0,0 +1,143 @@ +//===-- LatencyBenchmarkRunner.cpp ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LatencyBenchmarkRunner.h" + +#include "BenchmarkRunner.h" +#include "Target.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" +#include <algorithm> +#include <cmath> + +namespace llvm { +namespace exegesis { + +LatencyBenchmarkRunner::LatencyBenchmarkRunner( + const LLVMState &State, InstructionBenchmark::ModeE Mode, + InstructionBenchmark::ResultAggregationModeE ResultAgg) + : BenchmarkRunner(State, Mode) { + assert((Mode == InstructionBenchmark::Latency || + Mode == InstructionBenchmark::InverseThroughput) && + "invalid mode"); + ResultAggMode = ResultAgg; +} + +LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default; + +static double computeVariance(const llvm::SmallVector<int64_t, 4> &Values) { + if (Values.empty()) + return 0.0; + double Sum = std::accumulate(Values.begin(), Values.end(), 0.0); + + const double Mean = Sum / Values.size(); + double Ret = 0; + for (const auto &V : Values) { + double Delta = V - Mean; + Ret += Delta * Delta; + } + return Ret / Values.size(); +} + +static int64_t findMin(const llvm::SmallVector<int64_t, 4> &Values) { + if (Values.empty()) + return 0; + return *std::min_element(Values.begin(), Values.end()); +} + +static int64_t findMax(const llvm::SmallVector<int64_t, 4> &Values) { + if (Values.empty()) + return 0; + return *std::max_element(Values.begin(), Values.end()); +} + +static int64_t findMean(const llvm::SmallVector<int64_t, 4> &Values) { + if (Values.empty()) + return 0; + return std::accumulate(Values.begin(), Values.end(), 0.0) / + static_cast<double>(Values.size()); +} + +Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements( + const FunctionExecutor &Executor) const { + // Cycle measurements include some overhead from the kernel. Repeat the + // measure several times and return the aggregated value, as specified by + // ResultAggMode. + constexpr const int NumMeasurements = 30; + llvm::SmallVector<int64_t, 4> AccumulatedValues; + double MinVariance = std::numeric_limits<double>::infinity(); + const char *CounterName = State.getPfmCounters().CycleCounter; + // Values count for each run. + int ValuesCount = 0; + for (size_t I = 0; I < NumMeasurements; ++I) { + auto ExpectedCounterValues = Executor.runAndSample(CounterName); + if (!ExpectedCounterValues) + return ExpectedCounterValues.takeError(); + ValuesCount = ExpectedCounterValues.get().size(); + if (ValuesCount == 1) + AccumulatedValues.push_back(ExpectedCounterValues.get()[0]); + else { + // We'll keep the reading with lowest variance (ie., most stable) + double Variance = computeVariance(*ExpectedCounterValues); + if (MinVariance > Variance) { + AccumulatedValues = std::move(ExpectedCounterValues.get()); + MinVariance = Variance; + } + } + } + + std::string ModeName; + switch (Mode) { + case InstructionBenchmark::Latency: + ModeName = "latency"; + break; + case InstructionBenchmark::InverseThroughput: + ModeName = "inverse_throughput"; + break; + default: + break; + } + + switch (ResultAggMode) { + case InstructionBenchmark::MinVariance: { + if (ValuesCount == 1) + llvm::errs() << "Each sample only has one value. result-aggregation-mode " + "of min-variance is probably non-sensical\n"; + std::vector<BenchmarkMeasure> Result; + Result.reserve(AccumulatedValues.size()); + for (const int64_t Value : AccumulatedValues) + Result.push_back(BenchmarkMeasure::Create(ModeName, Value)); + return std::move(Result); + } + case InstructionBenchmark::Min: { + std::vector<BenchmarkMeasure> Result; + Result.push_back( + BenchmarkMeasure::Create(ModeName, findMin(AccumulatedValues))); + return std::move(Result); + } + case InstructionBenchmark::Max: { + std::vector<BenchmarkMeasure> Result; + Result.push_back( + BenchmarkMeasure::Create(ModeName, findMax(AccumulatedValues))); + return std::move(Result); + } + case InstructionBenchmark::Mean: { + std::vector<BenchmarkMeasure> Result; + Result.push_back( + BenchmarkMeasure::Create(ModeName, findMean(AccumulatedValues))); + return std::move(Result); + } + } + return llvm::make_error<Failure>(llvm::Twine("Unexpected benchmark mode(") + .concat(std::to_string(Mode)) + .concat(" and unexpected ResultAggMode ") + .concat(std::to_string(ResultAggMode))); +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h new file mode 100644 index 0000000000..b9b9efc25d --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h @@ -0,0 +1,38 @@ +//===-- LatencyBenchmarkRunner.h --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A BenchmarkRunner implementation to measure instruction latencies. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H +#define LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H + +#include "BenchmarkRunner.h" + +namespace llvm { +namespace exegesis { + +class LatencyBenchmarkRunner : public BenchmarkRunner { +public: + LatencyBenchmarkRunner( + const LLVMState &State, InstructionBenchmark::ModeE Mode, + InstructionBenchmark::ResultAggregationModeE ResultAggMode); + ~LatencyBenchmarkRunner() override; + +private: + Expected<std::vector<BenchmarkMeasure>> + runMeasurements(const FunctionExecutor &Executor) const override; + + InstructionBenchmark::ResultAggregationModeE ResultAggMode; +}; +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/LlvmState.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/LlvmState.cpp new file mode 100644 index 0000000000..4797ceb330 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/LlvmState.cpp @@ -0,0 +1,82 @@ +//===-- LlvmState.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LlvmState.h" +#include "Target.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" + +namespace llvm { +namespace exegesis { + +LLVMState::LLVMState(const std::string &Triple, const std::string &CpuName, + const std::string &Features) { + std::string Error; + const Target *const TheTarget = TargetRegistry::lookupTarget(Triple, Error); + assert(TheTarget && "unknown target for host"); + const TargetOptions Options; + TheTargetMachine.reset( + static_cast<LLVMTargetMachine *>(TheTarget->createTargetMachine( + Triple, CpuName, Features, Options, Reloc::Model::Static))); + assert(TheTargetMachine && "unable to create target machine"); + TheExegesisTarget = ExegesisTarget::lookup(TheTargetMachine->getTargetTriple()); + if (!TheExegesisTarget) { + errs() << "no exegesis target for " << Triple << ", using default\n"; + TheExegesisTarget = &ExegesisTarget::getDefault(); + } + PfmCounters = &TheExegesisTarget->getPfmCounters(CpuName); + + BitVector ReservedRegs = getFunctionReservedRegs(getTargetMachine()); + for (const unsigned Reg : TheExegesisTarget->getUnavailableRegisters()) + ReservedRegs.set(Reg); + RATC.reset( + new RegisterAliasingTrackerCache(getRegInfo(), std::move(ReservedRegs))); + IC.reset(new InstructionsCache(getInstrInfo(), getRATC())); +} + +LLVMState::LLVMState(const std::string &CpuName) + : LLVMState(sys::getProcessTriple(), + CpuName.empty() ? sys::getHostCPUName().str() : CpuName, "") {} + +std::unique_ptr<LLVMTargetMachine> LLVMState::createTargetMachine() const { + return std::unique_ptr<LLVMTargetMachine>(static_cast<LLVMTargetMachine *>( + TheTargetMachine->getTarget().createTargetMachine( + TheTargetMachine->getTargetTriple().normalize(), + TheTargetMachine->getTargetCPU(), + TheTargetMachine->getTargetFeatureString(), TheTargetMachine->Options, + Reloc::Model::Static))); +} + +bool LLVMState::canAssemble(const MCInst &Inst) const { + MCContext Context(TheTargetMachine->getTargetTriple(), + TheTargetMachine->getMCAsmInfo(), + TheTargetMachine->getMCRegisterInfo(), + TheTargetMachine->getMCSubtargetInfo()); + std::unique_ptr<const MCCodeEmitter> CodeEmitter( + TheTargetMachine->getTarget().createMCCodeEmitter( + *TheTargetMachine->getMCInstrInfo(), *TheTargetMachine->getMCRegisterInfo(), + Context)); + assert(CodeEmitter && "unable to create code emitter"); + SmallVector<char, 16> Tmp; + raw_svector_ostream OS(Tmp); + SmallVector<MCFixup, 4> Fixups; + CodeEmitter->encodeInstruction(Inst, OS, Fixups, + *TheTargetMachine->getMCSubtargetInfo()); + return Tmp.size() > 0; +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/LlvmState.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/LlvmState.h new file mode 100644 index 0000000000..e660a9f56b --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/LlvmState.h @@ -0,0 +1,79 @@ +//===-- LlvmState.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A class to set up and access common LLVM objects. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_LLVMSTATE_H +#define LLVM_TOOLS_LLVM_EXEGESIS_LLVMSTATE_H + +#include "MCInstrDescView.h" +#include "RegisterAliasing.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetMachine.h" +#include <memory> +#include <string> + +namespace llvm { +namespace exegesis { + +class ExegesisTarget; +struct PfmCountersInfo; + +// An object to initialize LLVM and prepare objects needed to run the +// measurements. +class LLVMState { +public: + // Uses the host triple. If CpuName is empty, uses the host CPU. + LLVMState(const std::string &CpuName); + + LLVMState(const std::string &Triple, + const std::string &CpuName, + const std::string &Features = ""); // For tests. + + const TargetMachine &getTargetMachine() const { return *TheTargetMachine; } + std::unique_ptr<LLVMTargetMachine> createTargetMachine() const; + + const ExegesisTarget &getExegesisTarget() const { return *TheExegesisTarget; } + + bool canAssemble(const MCInst &mc_inst) const; + + // For convenience: + const MCInstrInfo &getInstrInfo() const { + return *TheTargetMachine->getMCInstrInfo(); + } + const MCRegisterInfo &getRegInfo() const { + return *TheTargetMachine->getMCRegisterInfo(); + } + const MCSubtargetInfo &getSubtargetInfo() const { + return *TheTargetMachine->getMCSubtargetInfo(); + } + + const RegisterAliasingTrackerCache &getRATC() const { return *RATC; } + const InstructionsCache &getIC() const { return *IC; } + + const PfmCountersInfo &getPfmCounters() const { return *PfmCounters; } + +private: + const ExegesisTarget *TheExegesisTarget; + std::unique_ptr<const TargetMachine> TheTargetMachine; + std::unique_ptr<const RegisterAliasingTrackerCache> RATC; + std::unique_ptr<const InstructionsCache> IC; + const PfmCountersInfo *PfmCounters; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_LLVMSTATE_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/MCInstrDescView.cpp new file mode 100644 index 0000000000..049cc68b4f --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/MCInstrDescView.cpp @@ -0,0 +1,400 @@ +//===-- MCInstrDescView.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCInstrDescView.h" + +#include <iterator> +#include <map> +#include <tuple> + +#include "llvm/ADT/STLExtras.h" + +namespace llvm { +namespace exegesis { + +unsigned Variable::getIndex() const { return *Index; } + +unsigned Variable::getPrimaryOperandIndex() const { + assert(!TiedOperands.empty()); + return TiedOperands[0]; +} + +bool Variable::hasTiedOperands() const { + assert(TiedOperands.size() <= 2 && + "No more than two operands can be tied together"); + // By definition only Use and Def operands can be tied together. + // TiedOperands[0] is the Def operand (LLVM stores defs first). + // TiedOperands[1] is the Use operand. + return TiedOperands.size() > 1; +} + +unsigned Operand::getIndex() const { return *Index; } + +bool Operand::isExplicit() const { return Info; } + +bool Operand::isImplicit() const { return !Info; } + +bool Operand::isImplicitReg() const { return ImplicitReg; } + +bool Operand::isDef() const { return IsDef; } + +bool Operand::isUse() const { return !IsDef; } + +bool Operand::isReg() const { return Tracker; } + +bool Operand::isTied() const { return TiedToIndex.hasValue(); } + +bool Operand::isVariable() const { return VariableIndex.hasValue(); } + +bool Operand::isMemory() const { + return isExplicit() && + getExplicitOperandInfo().OperandType == MCOI::OPERAND_MEMORY; +} + +bool Operand::isImmediate() const { + return isExplicit() && + getExplicitOperandInfo().OperandType == MCOI::OPERAND_IMMEDIATE; +} + +unsigned Operand::getTiedToIndex() const { return *TiedToIndex; } + +unsigned Operand::getVariableIndex() const { return *VariableIndex; } + +unsigned Operand::getImplicitReg() const { + assert(ImplicitReg); + return *ImplicitReg; +} + +const RegisterAliasingTracker &Operand::getRegisterAliasing() const { + assert(Tracker); + return *Tracker; +} + +const MCOperandInfo &Operand::getExplicitOperandInfo() const { + assert(Info); + return *Info; +} + +const BitVector *BitVectorCache::getUnique(BitVector &&BV) const { + for (const auto &Entry : Cache) + if (*Entry == BV) + return Entry.get(); + Cache.push_back(std::make_unique<BitVector>()); + auto &Entry = Cache.back(); + Entry->swap(BV); + return Entry.get(); +} + +Instruction::Instruction(const MCInstrDesc *Description, StringRef Name, + SmallVector<Operand, 8> Operands, + SmallVector<Variable, 4> Variables, + const BitVector *ImplDefRegs, + const BitVector *ImplUseRegs, + const BitVector *AllDefRegs, + const BitVector *AllUseRegs) + : Description(*Description), Name(Name), Operands(std::move(Operands)), + Variables(std::move(Variables)), ImplDefRegs(*ImplDefRegs), + ImplUseRegs(*ImplUseRegs), AllDefRegs(*AllDefRegs), + AllUseRegs(*AllUseRegs) {} + +std::unique_ptr<Instruction> +Instruction::create(const MCInstrInfo &InstrInfo, + const RegisterAliasingTrackerCache &RATC, + const BitVectorCache &BVC, unsigned Opcode) { + const llvm::MCInstrDesc *const Description = &InstrInfo.get(Opcode); + unsigned OpIndex = 0; + SmallVector<Operand, 8> Operands; + SmallVector<Variable, 4> Variables; + for (; OpIndex < Description->getNumOperands(); ++OpIndex) { + const auto &OpInfo = Description->opInfo_begin()[OpIndex]; + Operand Operand; + Operand.Index = OpIndex; + Operand.IsDef = (OpIndex < Description->getNumDefs()); + // TODO(gchatelet): Handle isLookupPtrRegClass. + if (OpInfo.RegClass >= 0) + Operand.Tracker = &RATC.getRegisterClass(OpInfo.RegClass); + int TiedToIndex = Description->getOperandConstraint(OpIndex, MCOI::TIED_TO); + assert((TiedToIndex == -1 || + (0 <= TiedToIndex && + TiedToIndex < std::numeric_limits<uint8_t>::max())) && + "Unknown Operand Constraint"); + if (TiedToIndex >= 0) + Operand.TiedToIndex = TiedToIndex; + Operand.Info = &OpInfo; + Operands.push_back(Operand); + } + for (const MCPhysReg *MCPhysReg = Description->getImplicitDefs(); + MCPhysReg && *MCPhysReg; ++MCPhysReg, ++OpIndex) { + Operand Operand; + Operand.Index = OpIndex; + Operand.IsDef = true; + Operand.Tracker = &RATC.getRegister(*MCPhysReg); + Operand.ImplicitReg = MCPhysReg; + Operands.push_back(Operand); + } + for (const MCPhysReg *MCPhysReg = Description->getImplicitUses(); + MCPhysReg && *MCPhysReg; ++MCPhysReg, ++OpIndex) { + Operand Operand; + Operand.Index = OpIndex; + Operand.IsDef = false; + Operand.Tracker = &RATC.getRegister(*MCPhysReg); + Operand.ImplicitReg = MCPhysReg; + Operands.push_back(Operand); + } + Variables.reserve(Operands.size()); // Variables.size() <= Operands.size() + // Assigning Variables to non tied explicit operands. + for (auto &Op : Operands) + if (Op.isExplicit() && !Op.isTied()) { + const size_t VariableIndex = Variables.size(); + assert(VariableIndex < std::numeric_limits<uint8_t>::max()); + Op.VariableIndex = VariableIndex; + Variables.emplace_back(); + Variables.back().Index = VariableIndex; + } + // Assigning Variables to tied operands. + for (auto &Op : Operands) + if (Op.isExplicit() && Op.isTied()) + Op.VariableIndex = Operands[Op.getTiedToIndex()].getVariableIndex(); + // Assigning Operands to Variables. + for (auto &Op : Operands) + if (Op.isVariable()) + Variables[Op.getVariableIndex()].TiedOperands.push_back(Op.getIndex()); + // Processing Aliasing. + BitVector ImplDefRegs = RATC.emptyRegisters(); + BitVector ImplUseRegs = RATC.emptyRegisters(); + BitVector AllDefRegs = RATC.emptyRegisters(); + BitVector AllUseRegs = RATC.emptyRegisters(); + for (const auto &Op : Operands) { + if (Op.isReg()) { + const auto &AliasingBits = Op.getRegisterAliasing().aliasedBits(); + if (Op.isDef()) + AllDefRegs |= AliasingBits; + if (Op.isUse()) + AllUseRegs |= AliasingBits; + if (Op.isDef() && Op.isImplicit()) + ImplDefRegs |= AliasingBits; + if (Op.isUse() && Op.isImplicit()) + ImplUseRegs |= AliasingBits; + } + } + // Can't use make_unique because constructor is private. + return std::unique_ptr<Instruction>(new Instruction( + Description, InstrInfo.getName(Opcode), std::move(Operands), + std::move(Variables), BVC.getUnique(std::move(ImplDefRegs)), + BVC.getUnique(std::move(ImplUseRegs)), + BVC.getUnique(std::move(AllDefRegs)), + BVC.getUnique(std::move(AllUseRegs)))); +} + +const Operand &Instruction::getPrimaryOperand(const Variable &Var) const { + const auto PrimaryOperandIndex = Var.getPrimaryOperandIndex(); + assert(PrimaryOperandIndex < Operands.size()); + return Operands[PrimaryOperandIndex]; +} + +bool Instruction::hasMemoryOperands() const { + return any_of(Operands, [](const Operand &Op) { + return Op.isReg() && Op.isExplicit() && Op.isMemory(); + }); +} + +bool Instruction::hasAliasingImplicitRegisters() const { + return ImplDefRegs.anyCommon(ImplUseRegs); +} + +// Returns true if there are registers that are both in `A` and `B` but not in +// `Forbidden`. +static bool anyCommonExcludingForbidden(const BitVector &A, const BitVector &B, + const BitVector &Forbidden) { + assert(A.size() == B.size() && B.size() == Forbidden.size()); + const auto Size = A.size(); + for (int AIndex = A.find_first(); AIndex != -1;) { + const int BIndex = B.find_first_in(AIndex, Size); + if (BIndex == -1) + return false; + if (AIndex == BIndex && !Forbidden.test(AIndex)) + return true; + AIndex = A.find_first_in(BIndex + 1, Size); + } + return false; +} + +bool Instruction::hasAliasingRegistersThrough( + const Instruction &OtherInstr, const BitVector &ForbiddenRegisters) const { + return anyCommonExcludingForbidden(AllDefRegs, OtherInstr.AllUseRegs, + ForbiddenRegisters) && + anyCommonExcludingForbidden(OtherInstr.AllDefRegs, AllUseRegs, + ForbiddenRegisters); +} + +bool Instruction::hasTiedRegisters() const { + return any_of(Variables, + [](const Variable &Var) { return Var.hasTiedOperands(); }); +} + +bool Instruction::hasAliasingRegisters( + const BitVector &ForbiddenRegisters) const { + return anyCommonExcludingForbidden(AllDefRegs, AllUseRegs, + ForbiddenRegisters); +} + +bool Instruction::hasOneUseOrOneDef() const { + return AllDefRegs.count() || AllUseRegs.count(); +} + +void Instruction::dump(const MCRegisterInfo &RegInfo, + const RegisterAliasingTrackerCache &RATC, + raw_ostream &Stream) const { + Stream << "- " << Name << "\n"; + for (const auto &Op : Operands) { + Stream << "- Op" << Op.getIndex(); + if (Op.isExplicit()) + Stream << " Explicit"; + if (Op.isImplicit()) + Stream << " Implicit"; + if (Op.isUse()) + Stream << " Use"; + if (Op.isDef()) + Stream << " Def"; + if (Op.isImmediate()) + Stream << " Immediate"; + if (Op.isMemory()) + Stream << " Memory"; + if (Op.isReg()) { + if (Op.isImplicitReg()) + Stream << " Reg(" << RegInfo.getName(Op.getImplicitReg()) << ")"; + else + Stream << " RegClass(" + << RegInfo.getRegClassName( + &RegInfo.getRegClass(Op.Info->RegClass)) + << ")"; + } + if (Op.isTied()) + Stream << " TiedToOp" << Op.getTiedToIndex(); + Stream << "\n"; + } + for (const auto &Var : Variables) { + Stream << "- Var" << Var.getIndex(); + Stream << " ["; + bool IsFirst = true; + for (auto OperandIndex : Var.TiedOperands) { + if (!IsFirst) + Stream << ","; + Stream << "Op" << OperandIndex; + IsFirst = false; + } + Stream << "]"; + Stream << "\n"; + } + if (hasMemoryOperands()) + Stream << "- hasMemoryOperands\n"; + if (hasAliasingImplicitRegisters()) + Stream << "- hasAliasingImplicitRegisters (execution is always serial)\n"; + if (hasTiedRegisters()) + Stream << "- hasTiedRegisters (execution is always serial)\n"; + if (hasAliasingRegisters(RATC.emptyRegisters())) + Stream << "- hasAliasingRegisters\n"; +} + +InstructionsCache::InstructionsCache(const MCInstrInfo &InstrInfo, + const RegisterAliasingTrackerCache &RATC) + : InstrInfo(InstrInfo), RATC(RATC), BVC() {} + +const Instruction &InstructionsCache::getInstr(unsigned Opcode) const { + auto &Found = Instructions[Opcode]; + if (!Found) + Found = Instruction::create(InstrInfo, RATC, BVC, Opcode); + return *Found; +} + +bool RegisterOperandAssignment:: +operator==(const RegisterOperandAssignment &Other) const { + return std::tie(Op, Reg) == std::tie(Other.Op, Other.Reg); +} + +bool AliasingRegisterOperands:: +operator==(const AliasingRegisterOperands &Other) const { + return std::tie(Defs, Uses) == std::tie(Other.Defs, Other.Uses); +} + +static void +addOperandIfAlias(const MCPhysReg Reg, bool SelectDef, + ArrayRef<Operand> Operands, + SmallVectorImpl<RegisterOperandAssignment> &OperandValues) { + for (const auto &Op : Operands) { + if (Op.isReg() && Op.isDef() == SelectDef) { + const int SourceReg = Op.getRegisterAliasing().getOrigin(Reg); + if (SourceReg >= 0) + OperandValues.emplace_back(&Op, SourceReg); + } + } +} + +bool AliasingRegisterOperands::hasImplicitAliasing() const { + const auto HasImplicit = [](const RegisterOperandAssignment &ROV) { + return ROV.Op->isImplicit(); + }; + return any_of(Defs, HasImplicit) && any_of(Uses, HasImplicit); +} + +bool AliasingConfigurations::empty() const { return Configurations.empty(); } + +bool AliasingConfigurations::hasImplicitAliasing() const { + return any_of(Configurations, [](const AliasingRegisterOperands &ARO) { + return ARO.hasImplicitAliasing(); + }); +} + +AliasingConfigurations::AliasingConfigurations( + const Instruction &DefInstruction, const Instruction &UseInstruction) { + if (UseInstruction.AllUseRegs.anyCommon(DefInstruction.AllDefRegs)) { + auto CommonRegisters = UseInstruction.AllUseRegs; + CommonRegisters &= DefInstruction.AllDefRegs; + for (const MCPhysReg Reg : CommonRegisters.set_bits()) { + AliasingRegisterOperands ARO; + addOperandIfAlias(Reg, true, DefInstruction.Operands, ARO.Defs); + addOperandIfAlias(Reg, false, UseInstruction.Operands, ARO.Uses); + if (!ARO.Defs.empty() && !ARO.Uses.empty() && + !is_contained(Configurations, ARO)) + Configurations.push_back(std::move(ARO)); + } + } +} + +void DumpMCOperand(const MCRegisterInfo &MCRegisterInfo, const MCOperand &Op, + raw_ostream &OS) { + if (!Op.isValid()) + OS << "Invalid"; + else if (Op.isReg()) + OS << MCRegisterInfo.getName(Op.getReg()); + else if (Op.isImm()) + OS << Op.getImm(); + else if (Op.isDFPImm()) + OS << bit_cast<double>(Op.getDFPImm()); + else if (Op.isSFPImm()) + OS << bit_cast<float>(Op.getSFPImm()); + else if (Op.isExpr()) + OS << "Expr"; + else if (Op.isInst()) + OS << "SubInst"; +} + +void DumpMCInst(const MCRegisterInfo &MCRegisterInfo, + const MCInstrInfo &MCInstrInfo, const MCInst &MCInst, + raw_ostream &OS) { + OS << MCInstrInfo.getName(MCInst.getOpcode()); + for (unsigned I = 0, E = MCInst.getNumOperands(); I < E; ++I) { + if (I > 0) + OS << ','; + OS << ' '; + DumpMCOperand(MCRegisterInfo, MCInst.getOperand(I), OS); + } +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/MCInstrDescView.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/MCInstrDescView.h new file mode 100644 index 0000000000..8c7e0b2e01 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/MCInstrDescView.h @@ -0,0 +1,239 @@ +//===-- MCInstrDescView.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Provide views around LLVM structures to represents an instruction instance, +/// as well as its implicit and explicit arguments in a uniform way. +/// Arguments that are explicit and independant (non tied) also have a Variable +/// associated to them so the instruction can be fully defined by reading its +/// Variables. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_MCINSTRDESCVIEW_H +#define LLVM_TOOLS_LLVM_EXEGESIS_MCINSTRDESCVIEW_H + +#include <memory> +#include <random> +#include <unordered_map> + +#include "RegisterAliasing.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" + +namespace llvm { +namespace exegesis { + +// A variable represents the value associated to an Operand or a set of Operands +// if they are tied together. +struct Variable { + // Returns the index of this Variable inside Instruction's Variable. + unsigned getIndex() const; + + // Returns the index of the Operand linked to this Variable. + unsigned getPrimaryOperandIndex() const; + + // Returns whether this Variable has more than one Operand linked to it. + bool hasTiedOperands() const; + + // The indices of the operands tied to this Variable. + SmallVector<unsigned, 2> TiedOperands; + + // The index of this Variable in Instruction.Variables and its associated + // Value in InstructionBuilder.VariableValues. + Optional<uint8_t> Index; +}; + +// MCOperandInfo can only represents Explicit operands. This object gives a +// uniform view of Implicit and Explicit Operands. +// - Index: can be used to refer to MCInstrDesc::operands for Explicit operands. +// - Tracker: is set for Register Operands and is used to keep track of possible +// registers and the registers reachable from them (aliasing registers). +// - Info: a shortcut for MCInstrDesc::operands()[Index]. +// - TiedToIndex: the index of the Operand holding the value or -1. +// - ImplicitReg: a pointer to the register value when Operand is Implicit, +// nullptr otherwise. +// - VariableIndex: the index of the Variable holding the value for this Operand +// or -1 if this operand is implicit. +struct Operand { + bool isExplicit() const; + bool isImplicit() const; + bool isImplicitReg() const; + bool isDef() const; + bool isUse() const; + bool isReg() const; + bool isTied() const; + bool isVariable() const; + bool isMemory() const; + bool isImmediate() const; + unsigned getIndex() const; + unsigned getTiedToIndex() const; + unsigned getVariableIndex() const; + unsigned getImplicitReg() const; + const RegisterAliasingTracker &getRegisterAliasing() const; + const MCOperandInfo &getExplicitOperandInfo() const; + + // Please use the accessors above and not the following fields. + Optional<uint8_t> Index; + bool IsDef = false; + const RegisterAliasingTracker *Tracker = nullptr; // Set for Register Op. + const MCOperandInfo *Info = nullptr; // Set for Explicit Op. + Optional<uint8_t> TiedToIndex; // Set for Reg&Explicit Op. + const MCPhysReg *ImplicitReg = nullptr; // Set for Implicit Op. + Optional<uint8_t> VariableIndex; // Set for Explicit Op. +}; + +/// A cache of BitVector to reuse between Instructions. +/// The cache will only be exercised during Instruction initialization. +/// For X86, this is ~160 unique vectors for all of the ~15K Instructions. +struct BitVectorCache { + // Finds or allocates the provided BitVector in the cache and retrieves it's + // unique instance. + const BitVector *getUnique(BitVector &&BV) const; + +private: + mutable std::vector<std::unique_ptr<BitVector>> Cache; +}; + +// A view over an MCInstrDesc offering a convenient interface to compute +// Register aliasing. +struct Instruction { + // Create an instruction for a particular Opcode. + static std::unique_ptr<Instruction> + create(const MCInstrInfo &InstrInfo, const RegisterAliasingTrackerCache &RATC, + const BitVectorCache &BVC, unsigned Opcode); + + // Prevent copy or move, instructions are allocated once and cached. + Instruction(const Instruction &) = delete; + Instruction(Instruction &&) = delete; + Instruction &operator=(const Instruction &) = delete; + Instruction &operator=(Instruction &&) = delete; + + // Returns the Operand linked to this Variable. + // In case the Variable is tied, the primary (i.e. Def) Operand is returned. + const Operand &getPrimaryOperand(const Variable &Var) const; + + // Whether this instruction is self aliasing through its tied registers. + // Repeating this instruction is guaranteed to executes sequentially. + bool hasTiedRegisters() const; + + // Whether this instruction is self aliasing through its implicit registers. + // Repeating this instruction is guaranteed to executes sequentially. + bool hasAliasingImplicitRegisters() const; + + // Whether this instruction is self aliasing through some registers. + // Repeating this instruction may execute sequentially by picking aliasing + // Use and Def registers. It may also execute in parallel by picking non + // aliasing Use and Def registers. + bool hasAliasingRegisters(const BitVector &ForbiddenRegisters) const; + + // Whether this instruction's registers alias with OtherInstr's registers. + bool hasAliasingRegistersThrough(const Instruction &OtherInstr, + const BitVector &ForbiddenRegisters) const; + + // Returns whether this instruction has Memory Operands. + // Repeating this instruction executes sequentially with an instruction that + // reads or write the same memory region. + bool hasMemoryOperands() const; + + // Returns whether this instruction as at least one use or one def. + // Repeating this instruction may execute sequentially by adding an + // instruction that aliases one of these. + bool hasOneUseOrOneDef() const; + + // Convenient function to help with debugging. + void dump(const MCRegisterInfo &RegInfo, + const RegisterAliasingTrackerCache &RATC, + raw_ostream &Stream) const; + + const MCInstrDesc &Description; + const StringRef Name; // The name of this instruction. + const SmallVector<Operand, 8> Operands; + const SmallVector<Variable, 4> Variables; + const BitVector &ImplDefRegs; // The set of aliased implicit def registers. + const BitVector &ImplUseRegs; // The set of aliased implicit use registers. + const BitVector &AllDefRegs; // The set of all aliased def registers. + const BitVector &AllUseRegs; // The set of all aliased use registers. +private: + Instruction(const MCInstrDesc *Description, StringRef Name, + SmallVector<Operand, 8> Operands, + SmallVector<Variable, 4> Variables, const BitVector *ImplDefRegs, + const BitVector *ImplUseRegs, const BitVector *AllDefRegs, + const BitVector *AllUseRegs); +}; + +// Instructions are expensive to instantiate. This class provides a cache of +// Instructions with lazy construction. +struct InstructionsCache { + InstructionsCache(const MCInstrInfo &InstrInfo, + const RegisterAliasingTrackerCache &RATC); + + // Returns the Instruction object corresponding to this Opcode. + const Instruction &getInstr(unsigned Opcode) const; + +private: + const MCInstrInfo &InstrInfo; + const RegisterAliasingTrackerCache &RATC; + mutable std::unordered_map<unsigned, std::unique_ptr<Instruction>> + Instructions; + const BitVectorCache BVC; +}; + +// Represents the assignment of a Register to an Operand. +struct RegisterOperandAssignment { + RegisterOperandAssignment(const Operand *Operand, MCPhysReg Reg) + : Op(Operand), Reg(Reg) {} + + const Operand *Op; // Pointer to an Explicit Register Operand. + MCPhysReg Reg; + + bool operator==(const RegisterOperandAssignment &other) const; +}; + +// Represents a set of Operands that would alias through the use of some +// Registers. +// There are two reasons why operands would alias: +// - The registers assigned to each of the operands are the same or alias each +// other (e.g. AX/AL) +// - The operands are tied. +struct AliasingRegisterOperands { + SmallVector<RegisterOperandAssignment, 1> Defs; // Unlikely size() > 1. + SmallVector<RegisterOperandAssignment, 2> Uses; + + // True is Defs and Use contain an Implicit Operand. + bool hasImplicitAliasing() const; + + bool operator==(const AliasingRegisterOperands &other) const; +}; + +// Returns all possible configurations leading Def registers of DefInstruction +// to alias with Use registers of UseInstruction. +struct AliasingConfigurations { + AliasingConfigurations(const Instruction &DefInstruction, + const Instruction &UseInstruction); + + bool empty() const; // True if no aliasing configuration is found. + bool hasImplicitAliasing() const; + + SmallVector<AliasingRegisterOperands, 32> Configurations; +}; + +// Writes MCInst to OS. +// This is not assembly but the internal LLVM's name for instructions and +// registers. +void DumpMCInst(const MCRegisterInfo &MCRegisterInfo, + const MCInstrInfo &MCInstrInfo, const MCInst &MCInst, + raw_ostream &OS); + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_MCINSTRDESCVIEW_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp new file mode 100644 index 0000000000..7728fcb5d6 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp @@ -0,0 +1,257 @@ +//===-- ParallelSnippetGenerator.cpp ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ParallelSnippetGenerator.h" + +#include "BenchmarkRunner.h" +#include "MCInstrDescView.h" +#include "Target.h" + +// FIXME: Load constants into registers (e.g. with fld1) to not break +// instructions like x87. + +// Ideally we would like the only limitation on executing instructions to be the +// availability of the CPU resources (e.g. execution ports) needed to execute +// them, instead of the availability of their data dependencies. + +// To achieve that, one approach is to generate instructions that do not have +// data dependencies between them. +// +// For some instructions, this is trivial: +// mov rax, qword ptr [rsi] +// mov rax, qword ptr [rsi] +// mov rax, qword ptr [rsi] +// mov rax, qword ptr [rsi] +// For the above snippet, haswell just renames rax four times and executes the +// four instructions two at a time on P23 and P0126. +// +// For some instructions, we just need to make sure that the source is +// different from the destination. For example, IDIV8r reads from GPR and +// writes to AX. We just need to ensure that the Var is assigned a +// register which is different from AX: +// idiv bx +// idiv bx +// idiv bx +// idiv bx +// The above snippet will be able to fully saturate the ports, while the same +// with ax would issue one uop every `latency(IDIV8r)` cycles. +// +// Some instructions make this harder because they both read and write from +// the same register: +// inc rax +// inc rax +// inc rax +// inc rax +// This has a data dependency from each instruction to the next, limit the +// number of instructions that can be issued in parallel. +// It turns out that this is not a big issue on recent Intel CPUs because they +// have heuristics to balance port pressure. In the snippet above, subsequent +// instructions will end up evenly distributed on {P0,P1,P5,P6}, but some CPUs +// might end up executing them all on P0 (just because they can), or try +// avoiding P5 because it's usually under high pressure from vector +// instructions. +// This issue is even more important for high-latency instructions because +// they increase the idle time of the CPU, e.g. : +// imul rax, rbx +// imul rax, rbx +// imul rax, rbx +// imul rax, rbx +// +// To avoid that, we do the renaming statically by generating as many +// independent exclusive assignments as possible (until all possible registers +// are exhausted) e.g.: +// imul rax, rbx +// imul rcx, rbx +// imul rdx, rbx +// imul r8, rbx +// +// Some instruction even make the above static renaming impossible because +// they implicitly read and write from the same operand, e.g. ADC16rr reads +// and writes from EFLAGS. +// In that case we just use a greedy register assignment and hope for the +// best. + +namespace llvm { +namespace exegesis { + +static SmallVector<const Variable *, 8> +getVariablesWithTiedOperands(const Instruction &Instr) { + SmallVector<const Variable *, 8> Result; + for (const auto &Var : Instr.Variables) + if (Var.hasTiedOperands()) + Result.push_back(&Var); + return Result; +} + +ParallelSnippetGenerator::~ParallelSnippetGenerator() = default; + +void ParallelSnippetGenerator::instantiateMemoryOperands( + const unsigned ScratchSpacePointerInReg, + std::vector<InstructionTemplate> &Instructions) const { + if (ScratchSpacePointerInReg == 0) + return; // no memory operands. + const auto &ET = State.getExegesisTarget(); + const unsigned MemStep = ET.getMaxMemoryAccessSize(); + const size_t OriginalInstructionsSize = Instructions.size(); + size_t I = 0; + for (InstructionTemplate &IT : Instructions) { + ET.fillMemoryOperands(IT, ScratchSpacePointerInReg, I * MemStep); + ++I; + } + + while (Instructions.size() < kMinNumDifferentAddresses) { + InstructionTemplate IT = Instructions[I % OriginalInstructionsSize]; + ET.fillMemoryOperands(IT, ScratchSpacePointerInReg, I * MemStep); + ++I; + Instructions.push_back(std::move(IT)); + } + assert(I * MemStep < BenchmarkRunner::ScratchSpace::kSize && + "not enough scratch space"); +} + +static std::vector<InstructionTemplate> generateSnippetUsingStaticRenaming( + const LLVMState &State, const InstructionTemplate &IT, + const ArrayRef<const Variable *> TiedVariables, + const BitVector &ForbiddenRegisters) { + std::vector<InstructionTemplate> Instructions; + // Assign registers to variables in a round-robin manner. This is simple but + // ensures that the most register-constrained variable does not get starved. + std::vector<BitVector> PossibleRegsForVar; + for (const Variable *Var : TiedVariables) { + assert(Var); + const Operand &Op = IT.getInstr().getPrimaryOperand(*Var); + assert(Op.isReg()); + BitVector PossibleRegs = Op.getRegisterAliasing().sourceBits(); + remove(PossibleRegs, ForbiddenRegisters); + PossibleRegsForVar.push_back(std::move(PossibleRegs)); + } + SmallVector<int, 2> Iterators(TiedVariables.size(), 0); + while (true) { + InstructionTemplate TmpIT = IT; + // Find a possible register for each variable in turn, marking the + // register as taken. + for (size_t VarId = 0; VarId < TiedVariables.size(); ++VarId) { + const int NextPossibleReg = + PossibleRegsForVar[VarId].find_next(Iterators[VarId]); + if (NextPossibleReg <= 0) { + return Instructions; + } + TmpIT.getValueFor(*TiedVariables[VarId]) = + MCOperand::createReg(NextPossibleReg); + // Bump iterator. + Iterators[VarId] = NextPossibleReg; + // Prevent other variables from using the register. + for (BitVector &OtherPossibleRegs : PossibleRegsForVar) { + OtherPossibleRegs.reset(NextPossibleReg); + } + } + Instructions.push_back(std::move(TmpIT)); + } +} + +Expected<std::vector<CodeTemplate>> +ParallelSnippetGenerator::generateCodeTemplates( + InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { + const Instruction &Instr = Variant.getInstr(); + CodeTemplate CT; + CT.ScratchSpacePointerInReg = + Instr.hasMemoryOperands() + ? State.getExegesisTarget().getScratchMemoryRegister( + State.getTargetMachine().getTargetTriple()) + : 0; + const AliasingConfigurations SelfAliasing(Instr, Instr); + if (SelfAliasing.empty()) { + CT.Info = "instruction is parallel, repeating a random one."; + CT.Instructions.push_back(std::move(Variant)); + instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); + return getSingleton(std::move(CT)); + } + if (SelfAliasing.hasImplicitAliasing()) { + CT.Info = "instruction is serial, repeating a random one."; + CT.Instructions.push_back(std::move(Variant)); + instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); + return getSingleton(std::move(CT)); + } + const auto TiedVariables = getVariablesWithTiedOperands(Instr); + if (!TiedVariables.empty()) { + CT.Info = "instruction has tied variables, using static renaming."; + CT.Instructions = generateSnippetUsingStaticRenaming( + State, Variant, TiedVariables, ForbiddenRegisters); + instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); + return getSingleton(std::move(CT)); + } + // No tied variables, we pick random values for defs. + + // We don't want to accidentally serialize the instruction, + // so we must be sure that we don't pick a def that is an implicit use, + // or a use that is an implicit def, so record implicit regs now. + BitVector ImplicitUses(State.getRegInfo().getNumRegs()); + BitVector ImplicitDefs(State.getRegInfo().getNumRegs()); + for (const auto &Op : Instr.Operands) { + if (Op.isReg() && Op.isImplicit() && !Op.isMemory()) { + assert(Op.isImplicitReg() && "Not an implicit register operand?"); + if (Op.isUse()) + ImplicitUses.set(Op.getImplicitReg()); + else { + assert(Op.isDef() && "Not a use and not a def?"); + ImplicitDefs.set(Op.getImplicitReg()); + } + } + } + const auto ImplicitUseAliases = + getAliasedBits(State.getRegInfo(), ImplicitUses); + const auto ImplicitDefAliases = + getAliasedBits(State.getRegInfo(), ImplicitDefs); + BitVector Defs(State.getRegInfo().getNumRegs()); + for (const auto &Op : Instr.Operands) { + if (Op.isReg() && Op.isExplicit() && Op.isDef() && !Op.isMemory()) { + auto PossibleRegisters = Op.getRegisterAliasing().sourceBits(); + // Do not use forbidden registers and regs that are implicitly used. + // Note that we don't try to avoid using implicit defs explicitly. + remove(PossibleRegisters, ForbiddenRegisters); + remove(PossibleRegisters, ImplicitUseAliases); + if (!PossibleRegisters.any()) + return make_error<StringError>( + Twine("no available registers:\ncandidates:\n") + .concat(debugString(State.getRegInfo(), + Op.getRegisterAliasing().sourceBits())) + .concat("\nforbidden:\n") + .concat(debugString(State.getRegInfo(), ForbiddenRegisters)) + .concat("\nimplicit use:\n") + .concat(debugString(State.getRegInfo(), ImplicitUseAliases)), + inconvertibleErrorCode()); + const auto RandomReg = randomBit(PossibleRegisters); + Defs.set(RandomReg); + Variant.getValueFor(Op) = MCOperand::createReg(RandomReg); + } + } + // And pick random use values that are not reserved and don't alias with defs. + // Note that we don't try to avoid using implicit uses explicitly. + const auto DefAliases = getAliasedBits(State.getRegInfo(), Defs); + for (const auto &Op : Instr.Operands) { + if (Op.isReg() && Op.isExplicit() && Op.isUse() && !Op.isMemory()) { + auto PossibleRegisters = Op.getRegisterAliasing().sourceBits(); + remove(PossibleRegisters, ForbiddenRegisters); + remove(PossibleRegisters, DefAliases); + remove(PossibleRegisters, ImplicitDefAliases); + assert(PossibleRegisters.any() && "No register left to choose from"); + const auto RandomReg = randomBit(PossibleRegisters); + Variant.getValueFor(Op) = MCOperand::createReg(RandomReg); + } + } + CT.Info = + "instruction has no tied variables picking Uses different from defs"; + CT.Instructions.push_back(std::move(Variant)); + instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); + return getSingleton(std::move(CT)); +} + +constexpr const size_t ParallelSnippetGenerator::kMinNumDifferentAddresses; + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/ParallelSnippetGenerator.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/ParallelSnippetGenerator.h new file mode 100644 index 0000000000..94eb4e26eb --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/ParallelSnippetGenerator.h @@ -0,0 +1,65 @@ +//===-- ParallelSnippetGenerator.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A SnippetGenerator implementation to create parallel instruction snippets. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H +#define LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H + +#include "SnippetGenerator.h" + +namespace llvm { +namespace exegesis { + +class ParallelSnippetGenerator : public SnippetGenerator { +public: + using SnippetGenerator::SnippetGenerator; + ~ParallelSnippetGenerator() override; + + Expected<std::vector<CodeTemplate>> + generateCodeTemplates(InstructionTemplate Variant, + const BitVector &ForbiddenRegisters) const override; + + static constexpr const size_t kMinNumDifferentAddresses = 6; + +private: + // Instantiates memory operands within a snippet. + // To make computations as parallel as possible, we generate independant + // memory locations for instructions that load and store. If there are less + // than kMinNumDifferentAddresses in the original snippet, we duplicate + // instructions until there are this number of instructions. + // For example, assuming kMinNumDifferentAddresses=5 and + // getMaxMemoryAccessSize()=64, if the original snippet is: + // mov eax, [memory] + // we might generate: + // mov eax, [rdi] + // mov eax, [rdi + 64] + // mov eax, [rdi + 128] + // mov eax, [rdi + 192] + // mov eax, [rdi + 256] + // If the original snippet is: + // mov eax, [memory] + // add eax, [memory] + // we might generate: + // mov eax, [rdi] + // add eax, [rdi + 64] + // mov eax, [rdi + 128] + // add eax, [rdi + 192] + // mov eax, [rdi + 256] + void instantiateMemoryOperands( + unsigned ScratchSpaceReg, + std::vector<InstructionTemplate> &SnippetTemplate) const; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/PerfHelper.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/PerfHelper.cpp new file mode 100644 index 0000000000..e77980022d --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/PerfHelper.cpp @@ -0,0 +1,168 @@ +//===-- PerfHelper.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PerfHelper.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" +#ifdef HAVE_LIBPFM +#error #include <perfmon/perf_event.h> +#error #include <perfmon/pfmlib.h> +#error #include <perfmon/pfmlib_perf_event.h> +#endif + +#include <cassert> +#include <cstddef> +#include <errno.h> // for erno +#include <string.h> // for strerror() + +namespace llvm { +namespace exegesis { +namespace pfm { + +#ifdef HAVE_LIBPFM +static bool isPfmError(int Code) { return Code != PFM_SUCCESS; } +#endif + +bool pfmInitialize() { +#ifdef HAVE_LIBPFM + return isPfmError(pfm_initialize()); +#else + return true; +#endif +} + +void pfmTerminate() { +#ifdef HAVE_LIBPFM + pfm_terminate(); +#endif +} + +PerfEvent::~PerfEvent() { +#ifdef HAVE_LIBPFM + delete Attr; + ; +#endif +} + +PerfEvent::PerfEvent(PerfEvent &&Other) + : EventString(std::move(Other.EventString)), + FullQualifiedEventString(std::move(Other.FullQualifiedEventString)), + Attr(Other.Attr) { + Other.Attr = nullptr; +} + +PerfEvent::PerfEvent(StringRef PfmEventString) + : EventString(PfmEventString.str()), Attr(nullptr) { +#ifdef HAVE_LIBPFM + char *Fstr = nullptr; + pfm_perf_encode_arg_t Arg = {}; + Attr = new perf_event_attr(); + Arg.attr = Attr; + Arg.fstr = &Fstr; + Arg.size = sizeof(pfm_perf_encode_arg_t); + const int Result = pfm_get_os_event_encoding(EventString.c_str(), PFM_PLM3, + PFM_OS_PERF_EVENT, &Arg); + if (isPfmError(Result)) { + // We don't know beforehand which counters are available (e.g. 6 uops ports + // on Sandybridge but 8 on Haswell) so we report the missing counter without + // crashing. + errs() << pfm_strerror(Result) << " - cannot create event " << EventString + << "\n"; + } + if (Fstr) { + FullQualifiedEventString = Fstr; + free(Fstr); + } +#endif +} + +StringRef PerfEvent::name() const { return EventString; } + +bool PerfEvent::valid() const { return !FullQualifiedEventString.empty(); } + +const perf_event_attr *PerfEvent::attribute() const { return Attr; } + +StringRef PerfEvent::getPfmEventString() const { + return FullQualifiedEventString; +} + +#ifdef HAVE_LIBPFM +Counter::Counter(PerfEvent &&E) : Event(std::move(E)){ + assert(Event.valid()); + const pid_t Pid = 0; // measure current process/thread. + const int Cpu = -1; // measure any processor. + const int GroupFd = -1; // no grouping of counters. + const uint32_t Flags = 0; + perf_event_attr AttrCopy = *Event.attribute(); + FileDescriptor = perf_event_open(&AttrCopy, Pid, Cpu, GroupFd, Flags); + if (FileDescriptor == -1) { + errs() << "Unable to open event. ERRNO: " << strerror(errno) + << ". Make sure your kernel allows user " + "space perf monitoring.\nYou may want to try:\n$ sudo sh " + "-c 'echo -1 > /proc/sys/kernel/perf_event_paranoid'\n"; + } + assert(FileDescriptor != -1 && "Unable to open event"); +} + +Counter::~Counter() { close(FileDescriptor); } + +void Counter::start() { ioctl(FileDescriptor, PERF_EVENT_IOC_RESET, 0); } + +void Counter::stop() { ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0); } + +int64_t Counter::read() const { + auto ValueOrError = readOrError(); + if (ValueOrError) { + if (!ValueOrError.get().empty()) + return ValueOrError.get()[0]; + errs() << "Counter has no reading\n"; + } else + errs() << ValueOrError.takeError() << "\n"; + return -1; +} + +llvm::Expected<llvm::SmallVector<int64_t, 4>> +Counter::readOrError(StringRef /*unused*/) const { + int64_t Count = 0; + ssize_t ReadSize = ::read(FileDescriptor, &Count, sizeof(Count)); + if (ReadSize != sizeof(Count)) + return llvm::make_error<llvm::StringError>("Failed to read event counter", + llvm::errc::io_error); + llvm::SmallVector<int64_t, 4> Result; + Result.push_back(Count); + return Result; +} + +int Counter::numValues() const { return 1; } +#else + +Counter::Counter(PerfEvent &&Event) : Event(std::move(Event)) {} + +Counter::~Counter() = default; + +void Counter::start() {} + +void Counter::stop() {} + +int64_t Counter::read() const { return 42; } + +llvm::Expected<llvm::SmallVector<int64_t, 4>> +Counter::readOrError(StringRef /*unused*/) const { + return llvm::make_error<llvm::StringError>("Not implemented", + llvm::errc::io_error); +} + +int Counter::numValues() const { return 1; } + +#endif + +} // namespace pfm +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/PerfHelper.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/PerfHelper.h new file mode 100644 index 0000000000..19a35595c9 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/PerfHelper.h @@ -0,0 +1,112 @@ +//===-- PerfHelper.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Helpers for measuring perf events. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H +#define LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Error.h" + +#include <cstdint> +#include <functional> +#include <memory> + +struct perf_event_attr; + +namespace llvm { +namespace exegesis { +namespace pfm { + +// Returns true on error. +bool pfmInitialize(); +void pfmTerminate(); + +// Retrieves the encoding for the event described by pfm_event_string. +// NOTE: pfm_initialize() must be called before creating PerfEvent objects. +class PerfEvent { +public: + // http://perfmon2.sourceforge.net/manv4/libpfm.html + // Events are expressed as strings. e.g. "INSTRUCTION_RETIRED" + explicit PerfEvent(StringRef PfmEventString); + + PerfEvent(const PerfEvent &) = delete; + PerfEvent(PerfEvent &&other); + ~PerfEvent(); + + // The pfm_event_string passed at construction time. + StringRef name() const; + + // Whether the event was successfully created. + bool valid() const; + + // The encoded event to be passed to the Kernel. + const perf_event_attr *attribute() const; + + // The fully qualified name for the event. + // e.g. "snb_ep::INSTRUCTION_RETIRED:e=0:i=0:c=0:t=0:u=1:k=0:mg=0:mh=1" + StringRef getPfmEventString() const; + +protected: + PerfEvent() = default; + std::string EventString; + std::string FullQualifiedEventString; + perf_event_attr *Attr; +}; + +// Uses a valid PerfEvent to configure the Kernel so we can measure the +// underlying event. +class Counter { +public: + // event: the PerfEvent to measure. + explicit Counter(PerfEvent &&event); + + Counter(const Counter &) = delete; + Counter(Counter &&other) = default; + + virtual ~Counter(); + + /// Starts the measurement of the event. + virtual void start(); + + /// Stops the measurement of the event. + void stop(); + + /// Returns the current value of the counter or -1 if it cannot be read. + int64_t read() const; + + /// Returns the current value of the counter or error if it cannot be read. + /// FunctionBytes: The benchmark function being executed. + /// This is used to filter out the measurements to ensure they are only + /// within the benchmarked code. + /// If empty (or not specified), then no filtering will be done. + /// Not all counters choose to use this. + virtual llvm::Expected<llvm::SmallVector<int64_t, 4>> + readOrError(StringRef FunctionBytes = StringRef()) const; + + virtual int numValues() const; + +protected: + PerfEvent Event; +#ifdef HAVE_LIBPFM + int FileDescriptor = -1; +#endif +}; + +} // namespace pfm +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/PowerPC/Target.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/PowerPC/Target.cpp new file mode 100644 index 0000000000..54d42dfd22 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/PowerPC/Target.cpp @@ -0,0 +1,140 @@ +//===-- Target.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// The PowerPC ExegesisTarget. +//===----------------------------------------------------------------------===// +#include "../Target.h" +#include "PPC.h" +#include "PPCRegisterInfo.h" + +namespace llvm { +namespace exegesis { + +// Helper to fill a memory operand with a value. +static void setMemOp(InstructionTemplate &IT, int OpIdx, + const MCOperand &OpVal) { + const auto Op = IT.getInstr().Operands[OpIdx]; + assert(Op.isExplicit() && "invalid memory pattern"); + IT.getValueFor(Op) = OpVal; +} + +#include "PPCGenExegesis.inc" + +namespace { +class ExegesisPowerPCTarget : public ExegesisTarget { +public: + ExegesisPowerPCTarget() : ExegesisTarget(PPCCpuPfmCounters) {} + +private: + std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg, + const APInt &Value) const override; + bool matchesArch(Triple::ArchType Arch) const override { + return Arch == Triple::ppc64le; + } + unsigned getScratchMemoryRegister(const Triple &) const override; + void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg, + unsigned Offset) const override; +}; +} // end anonymous namespace + +static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) { + switch (RegBitWidth) { + case 32: + return PPC::LI; + case 64: + return PPC::LI8; + } + llvm_unreachable("Invalid Value Width"); +} + +// Generates instruction to load an immediate value into a register. +static MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth, + const APInt &Value) { + if (Value.getBitWidth() > RegBitWidth) + llvm_unreachable("Value must fit in the Register"); + // We don't really care the value in reg, ignore the 16 bit + // restriction for now. + // TODO: make sure we get the exact value in reg if needed. + return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth)) + .addReg(Reg) + .addImm(Value.getZExtValue()); +} + +unsigned +ExegesisPowerPCTarget::getScratchMemoryRegister(const Triple &TT) const { + // R13 is reserved as Thread Pointer, we won't use threading in benchmark, so + // use it as scratch memory register + return TT.isArch64Bit() ? PPC::X13 : PPC::R13; +} + +void ExegesisPowerPCTarget::fillMemoryOperands(InstructionTemplate &IT, + unsigned Reg, + unsigned Offset) const { + int MemOpIdx = 0; + if (IT.getInstr().hasTiedRegisters()) + MemOpIdx = 1; + int DispOpIdx = MemOpIdx + 1; + const auto DispOp = IT.getInstr().Operands[DispOpIdx]; + if (DispOp.isReg()) + // We don't really care about the real address in snippets, + // So hardcode X1 for X-form Memory Operations for simplicity. + // TODO: materialize the offset into a reggister + setMemOp(IT, DispOpIdx, MCOperand::createReg(PPC::X1)); + else + setMemOp(IT, DispOpIdx, MCOperand::createImm(Offset)); // Disp + setMemOp(IT, MemOpIdx + 2, MCOperand::createReg(Reg)); // BaseReg +} + +std::vector<MCInst> ExegesisPowerPCTarget::setRegTo(const MCSubtargetInfo &STI, + unsigned Reg, + const APInt &Value) const { + // X11 is optional use in function linkage, should be the least used one + // Use it as scratch reg to load immediate. + unsigned ScratchImmReg = PPC::X11; + + if (PPC::GPRCRegClass.contains(Reg)) + return {loadImmediate(Reg, 32, Value)}; + if (PPC::G8RCRegClass.contains(Reg)) + return {loadImmediate(Reg, 64, Value)}; + if (PPC::F4RCRegClass.contains(Reg)) + return {loadImmediate(ScratchImmReg, 64, Value), + MCInstBuilder(PPC::MTVSRD).addReg(Reg).addReg(ScratchImmReg)}; + // We don't care the real value in reg, so set 64 bits or duplicate 64 bits + // for simplicity. + // TODO: update these if we need a accurate 128 values in registers. + if (PPC::VRRCRegClass.contains(Reg)) + return {loadImmediate(ScratchImmReg, 64, Value), + MCInstBuilder(PPC::MTVRD).addReg(Reg).addReg(ScratchImmReg)}; + if (PPC::VSRCRegClass.contains(Reg)) + return {loadImmediate(ScratchImmReg, 64, Value), + MCInstBuilder(PPC::MTVSRDD) + .addReg(Reg) + .addReg(ScratchImmReg) + .addReg(ScratchImmReg)}; + if (PPC::VFRCRegClass.contains(Reg)) + return {loadImmediate(ScratchImmReg, 64, Value), + MCInstBuilder(PPC::MTVSRD).addReg(Reg).addReg(ScratchImmReg)}; + // SPE not supported yet + if (PPC::SPERCRegClass.contains(Reg)) { + errs() << "Unsupported SPE Reg:" << Reg << "\n"; + return {}; + } + errs() << "setRegTo is not implemented, results will be unreliable:" << Reg + << "\n"; + return {}; +} + +static ExegesisTarget *getTheExegesisPowerPCTarget() { + static ExegesisPowerPCTarget Target; + return &Target; +} + +void InitializePowerPCExegesisTarget() { + ExegesisTarget::registerTarget(getTheExegesisPowerPCTarget()); +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/PowerPC/ya.make b/contrib/libs/llvm14/tools/llvm-exegesis/lib/PowerPC/ya.make new file mode 100644 index 0000000000..7f2d6ea594 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/PowerPC/ya.make @@ -0,0 +1,36 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/libs/llvm14 + contrib/libs/llvm14/include + contrib/libs/llvm14/lib/IR + contrib/libs/llvm14/lib/Support + contrib/libs/llvm14/lib/Target/PowerPC + contrib/libs/llvm14/lib/Target/PowerPC/AsmParser + contrib/libs/llvm14/lib/Target/PowerPC/Disassembler + contrib/libs/llvm14/lib/Target/PowerPC/MCTargetDesc + contrib/libs/llvm14/lib/Target/PowerPC/TargetInfo + contrib/libs/llvm14/tools/llvm-exegesis/lib +) + +ADDINCL( + ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm14/lib/Target/PowerPC + contrib/libs/llvm14/lib/Target/PowerPC + contrib/libs/llvm14/tools/llvm-exegesis/lib/PowerPC +) + +NO_COMPILER_WARNINGS() + +NO_UTIL() + +SRCS( + Target.cpp +) + +END() diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/RegisterAliasing.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/RegisterAliasing.cpp new file mode 100644 index 0000000000..ee612fb0dd --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/RegisterAliasing.cpp @@ -0,0 +1,92 @@ +//===-- RegisterAliasing.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RegisterAliasing.h" + +namespace llvm { +namespace exegesis { + +BitVector getAliasedBits(const MCRegisterInfo &RegInfo, + const BitVector &SourceBits) { + BitVector AliasedBits(RegInfo.getNumRegs()); + for (const size_t PhysReg : SourceBits.set_bits()) { + using RegAliasItr = MCRegAliasIterator; + for (auto Itr = RegAliasItr(PhysReg, &RegInfo, true); Itr.isValid(); + ++Itr) { + AliasedBits.set(*Itr); + } + } + return AliasedBits; +} + +RegisterAliasingTracker::RegisterAliasingTracker(const MCRegisterInfo &RegInfo) + : SourceBits(RegInfo.getNumRegs()), AliasedBits(RegInfo.getNumRegs()), + Origins(RegInfo.getNumRegs()) {} + +RegisterAliasingTracker::RegisterAliasingTracker( + const MCRegisterInfo &RegInfo, const BitVector &ReservedReg, + const MCRegisterClass &RegClass) + : RegisterAliasingTracker(RegInfo) { + for (MCPhysReg PhysReg : RegClass) + if (!ReservedReg[PhysReg]) // Removing reserved registers. + SourceBits.set(PhysReg); + FillOriginAndAliasedBits(RegInfo, SourceBits); +} + +RegisterAliasingTracker::RegisterAliasingTracker(const MCRegisterInfo &RegInfo, + const MCPhysReg PhysReg) + : RegisterAliasingTracker(RegInfo) { + SourceBits.set(PhysReg); + FillOriginAndAliasedBits(RegInfo, SourceBits); +} + +void RegisterAliasingTracker::FillOriginAndAliasedBits( + const MCRegisterInfo &RegInfo, const BitVector &SourceBits) { + using RegAliasItr = MCRegAliasIterator; + for (const size_t PhysReg : SourceBits.set_bits()) { + for (auto Itr = RegAliasItr(PhysReg, &RegInfo, true); Itr.isValid(); + ++Itr) { + AliasedBits.set(*Itr); + Origins[*Itr] = PhysReg; + } + } +} + +RegisterAliasingTrackerCache::RegisterAliasingTrackerCache( + const MCRegisterInfo &RegInfo, const BitVector &ReservedReg) + : RegInfo(RegInfo), ReservedReg(ReservedReg), + EmptyRegisters(RegInfo.getNumRegs()) {} + +const RegisterAliasingTracker & +RegisterAliasingTrackerCache::getRegister(MCPhysReg PhysReg) const { + auto &Found = Registers[PhysReg]; + if (!Found) + Found.reset(new RegisterAliasingTracker(RegInfo, PhysReg)); + return *Found; +} + +const RegisterAliasingTracker & +RegisterAliasingTrackerCache::getRegisterClass(unsigned RegClassIndex) const { + auto &Found = RegisterClasses[RegClassIndex]; + const auto &RegClass = RegInfo.getRegClass(RegClassIndex); + if (!Found) + Found.reset(new RegisterAliasingTracker(RegInfo, ReservedReg, RegClass)); + return *Found; +} + +std::string debugString(const MCRegisterInfo &RegInfo, const BitVector &Regs) { + std::string Result; + for (const unsigned Reg : Regs.set_bits()) { + Result.append(RegInfo.getName(Reg)); + Result.push_back(' '); + } + return Result; +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/RegisterAliasing.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/RegisterAliasing.h new file mode 100644 index 0000000000..b2980854ba --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/RegisterAliasing.h @@ -0,0 +1,119 @@ +//===-- RegisterAliasingTracker.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines classes to keep track of register aliasing. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_ALIASINGTRACKER_H +#define LLVM_TOOLS_LLVM_EXEGESIS_ALIASINGTRACKER_H + +#include <memory> +#include <unordered_map> + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PackedVector.h" +#include "llvm/MC/MCRegisterInfo.h" + +namespace llvm { +namespace exegesis { + +// Returns the registers that are aliased by the ones set in SourceBits. +BitVector getAliasedBits(const MCRegisterInfo &RegInfo, + const BitVector &SourceBits); + +// Keeps track of a mapping from one register (or a register class) to its +// aliased registers. +// +// e.g. +// RegisterAliasingTracker Tracker(RegInfo, X86::EAX); +// Tracker.sourceBits() == { X86::EAX } +// Tracker.aliasedBits() == { X86::AL, X86::AH, X86::AX, +// X86::EAX,X86::HAX, X86::RAX } +// Tracker.getOrigin(X86::AL) == X86::EAX; +// Tracker.getOrigin(X86::BX) == -1; +struct RegisterAliasingTracker { + // Construct a tracker from an MCRegisterClass. + RegisterAliasingTracker(const MCRegisterInfo &RegInfo, + const BitVector &ReservedReg, + const MCRegisterClass &RegClass); + + // Construct a tracker from an MCPhysReg. + RegisterAliasingTracker(const MCRegisterInfo &RegInfo, + const MCPhysReg Register); + + const BitVector &sourceBits() const { return SourceBits; } + + // Retrieves all the touched registers as a BitVector. + const BitVector &aliasedBits() const { return AliasedBits; } + + // Returns the origin of this register or -1. + int getOrigin(MCPhysReg Aliased) const { + if (!AliasedBits[Aliased]) + return -1; + return Origins[Aliased]; + } + +private: + RegisterAliasingTracker(const MCRegisterInfo &RegInfo); + RegisterAliasingTracker(const RegisterAliasingTracker &) = delete; + + void FillOriginAndAliasedBits(const MCRegisterInfo &RegInfo, + const BitVector &OriginalBits); + + BitVector SourceBits; + BitVector AliasedBits; + PackedVector<size_t, 10> Origins; // Max 1024 physical registers. +}; + +// A cache of existing trackers. +struct RegisterAliasingTrackerCache { + // RegInfo must outlive the cache. + RegisterAliasingTrackerCache(const MCRegisterInfo &RegInfo, + const BitVector &ReservedReg); + + // Convenient function to retrieve a BitVector of the right size. + const BitVector &emptyRegisters() const { return EmptyRegisters; } + + // Convenient function to retrieve the registers the function body can't use. + const BitVector &reservedRegisters() const { return ReservedReg; } + + // Convenient function to retrieve the underlying MCRegInfo. + const MCRegisterInfo ®Info() const { return RegInfo; } + + // Retrieves the RegisterAliasingTracker for this particular register. + const RegisterAliasingTracker &getRegister(MCPhysReg Reg) const; + + // Retrieves the RegisterAliasingTracker for this particular register class. + const RegisterAliasingTracker &getRegisterClass(unsigned RegClassIndex) const; + +private: + const MCRegisterInfo &RegInfo; + const BitVector ReservedReg; + const BitVector EmptyRegisters; + mutable std::unordered_map<unsigned, std::unique_ptr<RegisterAliasingTracker>> + Registers; + mutable std::unordered_map<unsigned, std::unique_ptr<RegisterAliasingTracker>> + RegisterClasses; +}; + +// `a = a & ~b`, optimized for few bit sets in B and no allocation. +inline void remove(BitVector &A, const BitVector &B) { + assert(A.size() == B.size()); + for (auto I : B.set_bits()) + A.reset(I); +} + +// Returns a debug string for the list of registers. +std::string debugString(const MCRegisterInfo &RegInfo, const BitVector &Regs); + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_ALIASINGTRACKER_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/RegisterValue.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/RegisterValue.cpp new file mode 100644 index 0000000000..f881aa6d53 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/RegisterValue.cpp @@ -0,0 +1,51 @@ +//===-- RegisterValue.cpp ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RegisterValue.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { +namespace exegesis { + +static APFloat getFloatValue(const fltSemantics &FltSemantics, + PredefinedValues Value) { + switch (Value) { + case PredefinedValues::POS_ZERO: + return APFloat::getZero(FltSemantics); + case PredefinedValues::NEG_ZERO: + return APFloat::getZero(FltSemantics, true); + case PredefinedValues::ONE: + return APFloat(FltSemantics, "1"); + case PredefinedValues::TWO: + return APFloat(FltSemantics, "2"); + case PredefinedValues::INF: + return APFloat::getInf(FltSemantics); + case PredefinedValues::QNAN: + return APFloat::getQNaN(FltSemantics); + case PredefinedValues::SMALLEST_NORM: + return APFloat::getSmallestNormalized(FltSemantics); + case PredefinedValues::LARGEST: + return APFloat::getLargest(FltSemantics); + case PredefinedValues::ULP: + return APFloat::getSmallest(FltSemantics); + case PredefinedValues::ONE_PLUS_ULP: + auto Output = getFloatValue(FltSemantics, PredefinedValues::ONE); + Output.next(false); + return Output; + } + llvm_unreachable("Unhandled exegesis::PredefinedValues"); +} + +APInt bitcastFloatValue(const fltSemantics &FltSemantics, + PredefinedValues Value) { + return getFloatValue(FltSemantics, Value).bitcastToAPInt(); +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/RegisterValue.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/RegisterValue.h new file mode 100644 index 0000000000..3429783a48 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/RegisterValue.h @@ -0,0 +1,52 @@ +//===-- RegisterValue.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Defines a Target independent value for a Register. This is useful to explore +/// the influence of the instruction input values on its execution time. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_REGISTERVALUE_H +#define LLVM_TOOLS_LLVM_EXEGESIS_REGISTERVALUE_H + +#include <llvm/ADT/APFloat.h> +#include <llvm/ADT/APInt.h> + +namespace llvm { +namespace exegesis { + +// A simple object storing the value for a particular register. +struct RegisterValue { + static RegisterValue zero(unsigned Reg) { return {Reg, APInt()}; } + unsigned Register; + APInt Value; +}; + +enum class PredefinedValues { + POS_ZERO, // Positive zero + NEG_ZERO, // Negative zero + ONE, // 1.0 + TWO, // 2.0 + INF, // Infinity + QNAN, // Quiet NaN + ULP, // One Unit in the last place + SMALLEST = ULP, // The minimum subnormal number + SMALLEST_NORM, // The minimum normal number + LARGEST, // The maximum normal number + ONE_PLUS_ULP, // The value just after 1.0 +}; + +APInt bitcastFloatValue(const fltSemantics &FltSemantics, + PredefinedValues Value); + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_REGISTERVALUE_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/SchedClassResolution.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SchedClassResolution.cpp new file mode 100644 index 0000000000..03386cf238 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SchedClassResolution.cpp @@ -0,0 +1,321 @@ +//===-- SchedClassResolution.cpp --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SchedClassResolution.h" +#include "BenchmarkResult.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/FormatVariadic.h" +#include <limits> +#include <unordered_set> +#include <vector> + +namespace llvm { +namespace exegesis { + +// Return the non-redundant list of WriteProcRes used by the given sched class. +// The scheduling model for LLVM is such that each instruction has a certain +// number of uops which consume resources which are described by WriteProcRes +// entries. Each entry describe how many cycles are spent on a specific ProcRes +// kind. +// For example, an instruction might have 3 uOps, one dispatching on P0 +// (ProcResIdx=1) and two on P06 (ProcResIdx = 7). +// Note that LLVM additionally denormalizes resource consumption to include +// usage of super resources by subresources. So in practice if there exists a +// P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by +// P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed +// by P06 are also consumed by P016. In the figure below, parenthesized cycles +// denote implied usage of superresources by subresources: +// P0 P06 P016 +// uOp1 1 (1) (1) +// uOp2 1 (1) +// uOp3 1 (1) +// ============================= +// 1 3 3 +// Eventually we end up with three entries for the WriteProcRes of the +// instruction: +// {ProcResIdx=1, Cycles=1} // P0 +// {ProcResIdx=7, Cycles=3} // P06 +// {ProcResIdx=10, Cycles=3} // P016 +// +// Note that in this case, P016 does not contribute any cycles, so it would +// be removed by this function. +// FIXME: Move this to MCSubtargetInfo and use it in llvm-mca. +static SmallVector<MCWriteProcResEntry, 8> +getNonRedundantWriteProcRes(const MCSchedClassDesc &SCDesc, + const MCSubtargetInfo &STI) { + SmallVector<MCWriteProcResEntry, 8> Result; + const auto &SM = STI.getSchedModel(); + const unsigned NumProcRes = SM.getNumProcResourceKinds(); + + // This assumes that the ProcResDescs are sorted in topological order, which + // is guaranteed by the tablegen backend. + SmallVector<float, 32> ProcResUnitUsage(NumProcRes); + for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc), + *const WPREnd = STI.getWriteProcResEnd(&SCDesc); + WPR != WPREnd; ++WPR) { + const MCProcResourceDesc *const ProcResDesc = + SM.getProcResource(WPR->ProcResourceIdx); + if (ProcResDesc->SubUnitsIdxBegin == nullptr) { + // This is a ProcResUnit. + Result.push_back({WPR->ProcResourceIdx, WPR->Cycles}); + ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles; + } else { + // This is a ProcResGroup. First see if it contributes any cycles or if + // it has cycles just from subunits. + float RemainingCycles = WPR->Cycles; + for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin; + SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits; + ++SubResIdx) { + RemainingCycles -= ProcResUnitUsage[*SubResIdx]; + } + if (RemainingCycles < 0.01f) { + // The ProcResGroup contributes no cycles of its own. + continue; + } + // The ProcResGroup contributes `RemainingCycles` cycles of its own. + Result.push_back({WPR->ProcResourceIdx, + static_cast<uint16_t>(std::round(RemainingCycles))}); + // Spread the remaining cycles over all subunits. + for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin; + SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits; + ++SubResIdx) { + ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits; + } + } + } + return Result; +} + +// Distributes a pressure budget as evenly as possible on the provided subunits +// given the already existing port pressure distribution. +// +// The algorithm is as follows: while there is remaining pressure to +// distribute, find the subunits with minimal pressure, and distribute +// remaining pressure equally up to the pressure of the unit with +// second-to-minimal pressure. +// For example, let's assume we want to distribute 2*P1256 +// (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is: +// DensePressure = P0 P1 P2 P3 P4 P5 P6 P7 +// 0.1 0.3 0.2 0.0 0.0 0.5 0.5 0.5 +// RemainingPressure = 2.0 +// We sort the subunits by pressure: +// Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)] +// We'll first start by the subunits with minimal pressure, which are at +// the beginning of the sorted array. In this example there is one (P2). +// The subunit with second-to-minimal pressure is the next one in the +// array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles +// from the budget. +// Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)] +// RemainingPressure = 1.9 +// We repeat this process: distribute 0.2 pressure on each of the minimal +// P2 and P1, decrease budget by 2*0.2: +// Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)] +// RemainingPressure = 1.5 +// There are no second-to-minimal subunits so we just share the remaining +// budget (1.5 cycles) equally: +// Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)] +// RemainingPressure = 0.0 +// We stop as there is no remaining budget to distribute. +static void distributePressure(float RemainingPressure, + SmallVector<uint16_t, 32> Subunits, + SmallVector<float, 32> &DensePressure) { + // Find the number of subunits with minimal pressure (they are at the + // front). + sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) { + return DensePressure[A] < DensePressure[B]; + }); + const auto getPressureForSubunit = [&DensePressure, + &Subunits](size_t I) -> float & { + return DensePressure[Subunits[I]]; + }; + size_t NumMinimalSU = 1; + while (NumMinimalSU < Subunits.size() && + getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) { + ++NumMinimalSU; + } + while (RemainingPressure > 0.0f) { + if (NumMinimalSU == Subunits.size()) { + // All units are minimal, just distribute evenly and be done. + for (size_t I = 0; I < NumMinimalSU; ++I) { + getPressureForSubunit(I) += RemainingPressure / NumMinimalSU; + } + return; + } + // Distribute the remaining pressure equally. + const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1); + const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU); + assert(MinimalPressure < SecondToMinimalPressure); + const float Increment = SecondToMinimalPressure - MinimalPressure; + if (RemainingPressure <= NumMinimalSU * Increment) { + // There is not enough remaining pressure. + for (size_t I = 0; I < NumMinimalSU; ++I) { + getPressureForSubunit(I) += RemainingPressure / NumMinimalSU; + } + return; + } + // Bump all minimal pressure subunits to `SecondToMinimalPressure`. + for (size_t I = 0; I < NumMinimalSU; ++I) { + getPressureForSubunit(I) = SecondToMinimalPressure; + RemainingPressure -= SecondToMinimalPressure; + } + while (NumMinimalSU < Subunits.size() && + getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) { + ++NumMinimalSU; + } + } +} + +std::vector<std::pair<uint16_t, float>> +computeIdealizedProcResPressure(const MCSchedModel &SM, + SmallVector<MCWriteProcResEntry, 8> WPRS) { + // DensePressure[I] is the port pressure for Proc Resource I. + SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds()); + sort(WPRS, [](const MCWriteProcResEntry &A, const MCWriteProcResEntry &B) { + return A.ProcResourceIdx < B.ProcResourceIdx; + }); + for (const MCWriteProcResEntry &WPR : WPRS) { + // Get units for the entry. + const MCProcResourceDesc *const ProcResDesc = + SM.getProcResource(WPR.ProcResourceIdx); + if (ProcResDesc->SubUnitsIdxBegin == nullptr) { + // This is a ProcResUnit. + DensePressure[WPR.ProcResourceIdx] += WPR.Cycles; + } else { + // This is a ProcResGroup. + SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin, + ProcResDesc->SubUnitsIdxBegin + + ProcResDesc->NumUnits); + distributePressure(WPR.Cycles, Subunits, DensePressure); + } + } + // Turn dense pressure into sparse pressure by removing zero entries. + std::vector<std::pair<uint16_t, float>> Pressure; + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + if (DensePressure[I] > 0.0f) + Pressure.emplace_back(I, DensePressure[I]); + } + return Pressure; +} + +ResolvedSchedClass::ResolvedSchedClass(const MCSubtargetInfo &STI, + unsigned ResolvedSchedClassId, + bool WasVariant) + : SchedClassId(ResolvedSchedClassId), + SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)), + WasVariant(WasVariant), + NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)), + IdealizedProcResPressure(computeIdealizedProcResPressure( + STI.getSchedModel(), NonRedundantWriteProcRes)) { + assert((SCDesc == nullptr || !SCDesc->isVariant()) && + "ResolvedSchedClass should never be variant"); +} + +static unsigned ResolveVariantSchedClassId(const MCSubtargetInfo &STI, + const MCInstrInfo &InstrInfo, + unsigned SchedClassId, + const MCInst &MCI) { + const auto &SM = STI.getSchedModel(); + while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant()) { + SchedClassId = STI.resolveVariantSchedClass(SchedClassId, &MCI, &InstrInfo, + SM.getProcessorID()); + } + return SchedClassId; +} + +std::pair<unsigned /*SchedClassId*/, bool /*WasVariant*/> +ResolvedSchedClass::resolveSchedClassId(const MCSubtargetInfo &SubtargetInfo, + const MCInstrInfo &InstrInfo, + const MCInst &MCI) { + unsigned SchedClassId = InstrInfo.get(MCI.getOpcode()).getSchedClass(); + const bool WasVariant = SchedClassId && SubtargetInfo.getSchedModel() + .getSchedClassDesc(SchedClassId) + ->isVariant(); + SchedClassId = + ResolveVariantSchedClassId(SubtargetInfo, InstrInfo, SchedClassId, MCI); + return std::make_pair(SchedClassId, WasVariant); +} + +// Returns a ProxResIdx by id or name. +static unsigned findProcResIdx(const MCSubtargetInfo &STI, + const StringRef NameOrId) { + // Interpret the key as an ProcResIdx. + unsigned ProcResIdx = 0; + if (to_integer(NameOrId, ProcResIdx, 10)) + return ProcResIdx; + // Interpret the key as a ProcRes name. + const auto &SchedModel = STI.getSchedModel(); + for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) { + if (NameOrId == SchedModel.getProcResource(I)->Name) + return I; + } + return 0; +} + +std::vector<BenchmarkMeasure> ResolvedSchedClass::getAsPoint( + InstructionBenchmark::ModeE Mode, const MCSubtargetInfo &STI, + ArrayRef<PerInstructionStats> Representative) const { + const size_t NumMeasurements = Representative.size(); + + std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements); + + if (Mode == InstructionBenchmark::Latency) { + assert(NumMeasurements == 1 && "Latency is a single measure."); + BenchmarkMeasure &LatencyMeasure = SchedClassPoint[0]; + + // Find the latency. + LatencyMeasure.PerInstructionValue = 0.0; + + for (unsigned I = 0; I < SCDesc->NumWriteLatencyEntries; ++I) { + const MCWriteLatencyEntry *const WLE = + STI.getWriteLatencyEntry(SCDesc, I); + LatencyMeasure.PerInstructionValue = + std::max<double>(LatencyMeasure.PerInstructionValue, WLE->Cycles); + } + } else if (Mode == InstructionBenchmark::Uops) { + for (auto I : zip(SchedClassPoint, Representative)) { + BenchmarkMeasure &Measure = std::get<0>(I); + const PerInstructionStats &Stats = std::get<1>(I); + + StringRef Key = Stats.key(); + uint16_t ProcResIdx = findProcResIdx(STI, Key); + if (ProcResIdx > 0) { + // Find the pressure on ProcResIdx `Key`. + const auto ProcResPressureIt = + llvm::find_if(IdealizedProcResPressure, + [ProcResIdx](const std::pair<uint16_t, float> &WPR) { + return WPR.first == ProcResIdx; + }); + Measure.PerInstructionValue = + ProcResPressureIt == IdealizedProcResPressure.end() + ? 0.0 + : ProcResPressureIt->second; + } else if (Key == "NumMicroOps") { + Measure.PerInstructionValue = SCDesc->NumMicroOps; + } else { + errs() << "expected `key` to be either a ProcResIdx or a ProcRes " + "name, got " + << Key << "\n"; + return {}; + } + } + } else if (Mode == InstructionBenchmark::InverseThroughput) { + assert(NumMeasurements == 1 && "Inverse Throughput is a single measure."); + BenchmarkMeasure &RThroughputMeasure = SchedClassPoint[0]; + + RThroughputMeasure.PerInstructionValue = + MCSchedModel::getReciprocalThroughput(STI, *SCDesc); + } else { + llvm_unreachable("unimplemented measurement matching mode"); + } + + return SchedClassPoint; +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/SchedClassResolution.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SchedClassResolution.h new file mode 100644 index 0000000000..3c7d8b3190 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SchedClassResolution.h @@ -0,0 +1,61 @@ +//===-- SchedClassResolution.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Resolution of MCInst sched class into expanded form for further analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_SCHEDCLASSRESOLUTION_H +#define LLVM_TOOLS_LLVM_EXEGESIS_SCHEDCLASSRESOLUTION_H + +#include "BenchmarkResult.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace exegesis { + +// Computes the idealized ProcRes Unit pressure. This is the expected +// distribution if the CPU scheduler can distribute the load as evenly as +// possible. +std::vector<std::pair<uint16_t, float>> +computeIdealizedProcResPressure(const MCSchedModel &SM, + SmallVector<MCWriteProcResEntry, 8> WPRS); + +// An MCSchedClassDesc augmented with some additional data. +struct ResolvedSchedClass { + ResolvedSchedClass(const MCSubtargetInfo &STI, unsigned ResolvedSchedClassId, + bool WasVariant); + + static std::pair<unsigned /*SchedClassId*/, bool /*WasVariant*/> + resolveSchedClassId(const MCSubtargetInfo &SubtargetInfo, + const MCInstrInfo &InstrInfo, const MCInst &MCI); + + std::vector<BenchmarkMeasure> + getAsPoint(InstructionBenchmark::ModeE Mode, const MCSubtargetInfo &STI, + ArrayRef<PerInstructionStats> Representative) const; + + const unsigned SchedClassId; + const MCSchedClassDesc *const SCDesc; + const bool WasVariant; // Whether the original class was variant. + const SmallVector<MCWriteProcResEntry, 8> NonRedundantWriteProcRes; + const std::vector<std::pair<uint16_t, float>> IdealizedProcResPressure; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_SCHEDCLASSRESOLUTION_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp new file mode 100644 index 0000000000..962136a1f8 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp @@ -0,0 +1,181 @@ +//===-- SerialSnippetGenerator.cpp ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SerialSnippetGenerator.h" + +#include "CodeTemplate.h" +#include "MCInstrDescView.h" +#include "Target.h" +#include <algorithm> +#include <numeric> +#include <vector> + +namespace llvm { +namespace exegesis { + +struct ExecutionClass { + ExecutionMode Mask; + const char *Description; +} static const kExecutionClasses[] = { + {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS | + ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS, + "Repeating a single implicitly serial instruction"}, + {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS, + "Repeating a single explicitly serial instruction"}, + {ExecutionMode::SERIAL_VIA_MEMORY_INSTR | + ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR, + "Repeating two instructions"}, +}; + +static constexpr size_t kMaxAliasingInstructions = 10; + +static std::vector<const Instruction *> +computeAliasingInstructions(const LLVMState &State, const Instruction *Instr, + size_t MaxAliasingInstructions, + const BitVector &ForbiddenRegisters) { + // Randomly iterate the set of instructions. + std::vector<unsigned> Opcodes; + Opcodes.resize(State.getInstrInfo().getNumOpcodes()); + std::iota(Opcodes.begin(), Opcodes.end(), 0U); + llvm::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator()); + + std::vector<const Instruction *> AliasingInstructions; + for (const unsigned OtherOpcode : Opcodes) { + if (OtherOpcode == Instr->Description.getOpcode()) + continue; + const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode); + const MCInstrDesc &OtherInstrDesc = OtherInstr.Description; + // Ignore instructions that we cannot run. + if (OtherInstrDesc.isPseudo() || OtherInstrDesc.usesCustomInsertionHook() || + OtherInstrDesc.isBranch() || OtherInstrDesc.isIndirectBranch() || + OtherInstrDesc.isCall() || OtherInstrDesc.isReturn()) { + continue; + } + if (OtherInstr.hasMemoryOperands()) + continue; + if (!State.getExegesisTarget().allowAsBackToBack(OtherInstr)) + continue; + if (Instr->hasAliasingRegistersThrough(OtherInstr, ForbiddenRegisters)) + AliasingInstructions.push_back(&OtherInstr); + if (AliasingInstructions.size() >= MaxAliasingInstructions) + break; + } + return AliasingInstructions; +} + +static ExecutionMode getExecutionModes(const Instruction &Instr, + const BitVector &ForbiddenRegisters) { + ExecutionMode EM = ExecutionMode::UNKNOWN; + if (Instr.hasAliasingImplicitRegisters()) + EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS; + if (Instr.hasTiedRegisters()) + EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS; + if (Instr.hasMemoryOperands()) + EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR; + else { + if (Instr.hasAliasingRegisters(ForbiddenRegisters)) + EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS; + if (Instr.hasOneUseOrOneDef()) + EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR; + } + return EM; +} + +static void appendCodeTemplates(const LLVMState &State, + InstructionTemplate Variant, + const BitVector &ForbiddenRegisters, + ExecutionMode ExecutionModeBit, + StringRef ExecutionClassDescription, + std::vector<CodeTemplate> &CodeTemplates) { + assert(isEnumValue(ExecutionModeBit) && "Bit must be a power of two"); + switch (ExecutionModeBit) { + case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS: + // Nothing to do, the instruction is always serial. + LLVM_FALLTHROUGH; + case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: { + // Picking whatever value for the tied variable will make the instruction + // serial. + CodeTemplate CT; + CT.Execution = ExecutionModeBit; + CT.Info = std::string(ExecutionClassDescription); + CT.Instructions.push_back(std::move(Variant)); + CodeTemplates.push_back(std::move(CT)); + return; + } + case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: { + // Select back-to-back memory instruction. + // TODO: Implement me. + return; + } + case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: { + // Making the execution of this instruction serial by selecting one def + // register to alias with one use register. + const AliasingConfigurations SelfAliasing(Variant.getInstr(), + Variant.getInstr()); + assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() && + "Instr must alias itself explicitly"); + // This is a self aliasing instruction so defs and uses are from the same + // instance, hence twice Variant in the following call. + setRandomAliasing(SelfAliasing, Variant, Variant); + CodeTemplate CT; + CT.Execution = ExecutionModeBit; + CT.Info = std::string(ExecutionClassDescription); + CT.Instructions.push_back(std::move(Variant)); + CodeTemplates.push_back(std::move(CT)); + return; + } + case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: { + const Instruction &Instr = Variant.getInstr(); + // Select back-to-back non-memory instruction. + for (const auto *OtherInstr : computeAliasingInstructions( + State, &Instr, kMaxAliasingInstructions, ForbiddenRegisters)) { + const AliasingConfigurations Forward(Instr, *OtherInstr); + const AliasingConfigurations Back(*OtherInstr, Instr); + InstructionTemplate ThisIT(Variant); + InstructionTemplate OtherIT(OtherInstr); + if (!Forward.hasImplicitAliasing()) + setRandomAliasing(Forward, ThisIT, OtherIT); + else if (!Back.hasImplicitAliasing()) + setRandomAliasing(Back, OtherIT, ThisIT); + CodeTemplate CT; + CT.Execution = ExecutionModeBit; + CT.Info = std::string(ExecutionClassDescription); + CT.Instructions.push_back(std::move(ThisIT)); + CT.Instructions.push_back(std::move(OtherIT)); + CodeTemplates.push_back(std::move(CT)); + } + return; + } + default: + llvm_unreachable("Unhandled enum value"); + } +} + +SerialSnippetGenerator::~SerialSnippetGenerator() = default; + +Expected<std::vector<CodeTemplate>> +SerialSnippetGenerator::generateCodeTemplates( + InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { + std::vector<CodeTemplate> Results; + const ExecutionMode EM = + getExecutionModes(Variant.getInstr(), ForbiddenRegisters); + for (const auto EC : kExecutionClasses) { + for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask)) + appendCodeTemplates(State, Variant, ForbiddenRegisters, ExecutionModeBit, + EC.Description, Results); + if (!Results.empty()) + break; + } + if (Results.empty()) + return make_error<Failure>( + "No strategy found to make the execution serial"); + return std::move(Results); +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/SerialSnippetGenerator.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SerialSnippetGenerator.h new file mode 100644 index 0000000000..42a1ed38b5 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SerialSnippetGenerator.h @@ -0,0 +1,37 @@ +//===-- SerialSnippetGenerator.h --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A SnippetGenerator implementation to create serial instruction snippets. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H +#define LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H + +#include "Error.h" +#include "MCInstrDescView.h" +#include "SnippetGenerator.h" + +namespace llvm { +namespace exegesis { + +class SerialSnippetGenerator : public SnippetGenerator { +public: + using SnippetGenerator::SnippetGenerator; + ~SerialSnippetGenerator() override; + + Expected<std::vector<CodeTemplate>> + generateCodeTemplates(InstructionTemplate Variant, + const BitVector &ForbiddenRegisters) const override; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetFile.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetFile.cpp new file mode 100644 index 0000000000..9c316d60c4 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetFile.cpp @@ -0,0 +1,181 @@ +//===-- SnippetFile.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SnippetFile.h" +#include "Error.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SourceMgr.h" +#include <string> + +namespace llvm { +namespace exegesis { +namespace { + +// An MCStreamer that reads a BenchmarkCode definition from a file. +class BenchmarkCodeStreamer : public MCStreamer, public AsmCommentConsumer { +public: + explicit BenchmarkCodeStreamer(MCContext *Context, + const MCRegisterInfo *TheRegInfo, + BenchmarkCode *Result) + : MCStreamer(*Context), RegInfo(TheRegInfo), Result(Result) {} + + // Implementation of the MCStreamer interface. We only care about + // instructions. + void emitInstruction(const MCInst &Instruction, + const MCSubtargetInfo &STI) override { + Result->Key.Instructions.push_back(Instruction); + } + + // Implementation of the AsmCommentConsumer. + void HandleComment(SMLoc Loc, StringRef CommentText) override { + CommentText = CommentText.trim(); + if (!CommentText.consume_front("LLVM-EXEGESIS-")) + return; + if (CommentText.consume_front("DEFREG")) { + // LLVM-EXEGESIS-DEFREF <reg> <hex_value> + RegisterValue RegVal; + SmallVector<StringRef, 2> Parts; + CommentText.split(Parts, ' ', /*unlimited splits*/ -1, + /*do not keep empty strings*/ false); + if (Parts.size() != 2) { + errs() << "invalid comment 'LLVM-EXEGESIS-DEFREG " << CommentText + << "', expected two parameters <REG> <HEX_VALUE>\n"; + ++InvalidComments; + return; + } + if (!(RegVal.Register = findRegisterByName(Parts[0].trim()))) { + errs() << "unknown register '" << Parts[0] + << "' in 'LLVM-EXEGESIS-DEFREG " << CommentText << "'\n"; + ++InvalidComments; + return; + } + const StringRef HexValue = Parts[1].trim(); + RegVal.Value = APInt( + /* each hex digit is 4 bits */ HexValue.size() * 4, HexValue, 16); + Result->Key.RegisterInitialValues.push_back(std::move(RegVal)); + return; + } + if (CommentText.consume_front("LIVEIN")) { + // LLVM-EXEGESIS-LIVEIN <reg> + const auto RegName = CommentText.ltrim(); + if (unsigned Reg = findRegisterByName(RegName)) + Result->LiveIns.push_back(Reg); + else { + errs() << "unknown register '" << RegName + << "' in 'LLVM-EXEGESIS-LIVEIN " << CommentText << "'\n"; + ++InvalidComments; + } + return; + } + } + + unsigned numInvalidComments() const { return InvalidComments; } + +private: + // We only care about instructions, we don't implement this part of the API. + void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) override {} + bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override { + return false; + } + void emitValueToAlignment(unsigned ByteAlignment, int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) override {} + void emitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment, SMLoc Loc) override {} + + unsigned findRegisterByName(const StringRef RegName) const { + // FIXME: Can we do better than this ? + for (unsigned I = 0, E = RegInfo->getNumRegs(); I < E; ++I) { + if (RegName == RegInfo->getName(I)) + return I; + } + errs() << "'" << RegName + << "' is not a valid register name for the target\n"; + return 0; + } + + const MCRegisterInfo *const RegInfo; + BenchmarkCode *const Result; + unsigned InvalidComments = 0; +}; + +} // namespace + +// Reads code snippets from file `Filename`. +Expected<std::vector<BenchmarkCode>> readSnippets(const LLVMState &State, + StringRef Filename) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr = + MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = BufferPtr.getError()) { + return make_error<Failure>("cannot read snippet: " + Filename + ": " + + EC.message()); + } + SourceMgr SM; + SM.AddNewSourceBuffer(std::move(BufferPtr.get()), SMLoc()); + + BenchmarkCode Result; + + const TargetMachine &TM = State.getTargetMachine(); + MCContext Context(TM.getTargetTriple(), TM.getMCAsmInfo(), + TM.getMCRegisterInfo(), TM.getMCSubtargetInfo()); + std::unique_ptr<MCObjectFileInfo> ObjectFileInfo( + TM.getTarget().createMCObjectFileInfo(Context, /*PIC=*/false)); + Context.setObjectFileInfo(ObjectFileInfo.get()); + Context.initInlineSourceManager(); + BenchmarkCodeStreamer Streamer(&Context, TM.getMCRegisterInfo(), &Result); + + std::string Error; + raw_string_ostream ErrorStream(Error); + formatted_raw_ostream InstPrinterOStream(ErrorStream); + const std::unique_ptr<MCInstPrinter> InstPrinter( + TM.getTarget().createMCInstPrinter( + TM.getTargetTriple(), TM.getMCAsmInfo()->getAssemblerDialect(), + *TM.getMCAsmInfo(), *TM.getMCInstrInfo(), *TM.getMCRegisterInfo())); + // The following call will take care of calling Streamer.setTargetStreamer. + TM.getTarget().createAsmTargetStreamer(Streamer, InstPrinterOStream, + InstPrinter.get(), + TM.Options.MCOptions.AsmVerbose); + if (!Streamer.getTargetStreamer()) + return make_error<Failure>("cannot create target asm streamer"); + + const std::unique_ptr<MCAsmParser> AsmParser( + createMCAsmParser(SM, Context, Streamer, *TM.getMCAsmInfo())); + if (!AsmParser) + return make_error<Failure>("cannot create asm parser"); + AsmParser->getLexer().setCommentConsumer(&Streamer); + + const std::unique_ptr<MCTargetAsmParser> TargetAsmParser( + TM.getTarget().createMCAsmParser(*TM.getMCSubtargetInfo(), *AsmParser, + *TM.getMCInstrInfo(), + MCTargetOptions())); + + if (!TargetAsmParser) + return make_error<Failure>("cannot create target asm parser"); + AsmParser->setTargetParser(*TargetAsmParser); + + if (AsmParser->Run(false)) + return make_error<Failure>("cannot parse asm file"); + if (Streamer.numInvalidComments()) + return make_error<Failure>(Twine("found ") + .concat(Twine(Streamer.numInvalidComments())) + .concat(" invalid LLVM-EXEGESIS comments")); + return std::vector<BenchmarkCode>{std::move(Result)}; +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetFile.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetFile.h new file mode 100644 index 0000000000..c346a047bf --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetFile.h @@ -0,0 +1,35 @@ +//===-- SnippetFile.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Utilities to read a snippet file. +/// Snippet files are just asm files with additional comments to specify which +/// registers should be defined or are live on entry. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_SNIPPETFILE_H +#define LLVM_TOOLS_LLVM_EXEGESIS_SNIPPETFILE_H + +#include "BenchmarkCode.h" +#include "LlvmState.h" +#include "llvm/Support/Error.h" + +#include <vector> + +namespace llvm { +namespace exegesis { + +// Reads code snippets from file `Filename`. +Expected<std::vector<BenchmarkCode>> readSnippets(const LLVMState &State, + StringRef Filename); + +} // namespace exegesis +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetGenerator.cpp new file mode 100644 index 0000000000..b3a7118115 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -0,0 +1,275 @@ +//===-- SnippetGenerator.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <array> +#include <string> + +#include "Assembler.h" +#include "Error.h" +#include "MCInstrDescView.h" +#include "SnippetGenerator.h" +#include "Target.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Program.h" + +namespace llvm { +namespace exegesis { + +std::vector<CodeTemplate> getSingleton(CodeTemplate &&CT) { + std::vector<CodeTemplate> Result; + Result.push_back(std::move(CT)); + return Result; +} + +SnippetGeneratorFailure::SnippetGeneratorFailure(const Twine &S) + : StringError(S, inconvertibleErrorCode()) {} + +SnippetGenerator::SnippetGenerator(const LLVMState &State, const Options &Opts) + : State(State), Opts(Opts) {} + +SnippetGenerator::~SnippetGenerator() = default; + +Error SnippetGenerator::generateConfigurations( + const InstructionTemplate &Variant, std::vector<BenchmarkCode> &Benchmarks, + const BitVector &ExtraForbiddenRegs) const { + BitVector ForbiddenRegs = State.getRATC().reservedRegisters(); + ForbiddenRegs |= ExtraForbiddenRegs; + // If the instruction has memory registers, prevent the generator from + // using the scratch register and its aliasing registers. + if (Variant.getInstr().hasMemoryOperands()) { + const auto &ET = State.getExegesisTarget(); + unsigned ScratchSpacePointerInReg = + ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple()); + if (ScratchSpacePointerInReg == 0) + return make_error<Failure>( + "Infeasible : target does not support memory instructions"); + const auto &ScratchRegAliases = + State.getRATC().getRegister(ScratchSpacePointerInReg).aliasedBits(); + // If the instruction implicitly writes to ScratchSpacePointerInReg , abort. + // FIXME: We could make a copy of the scratch register. + for (const auto &Op : Variant.getInstr().Operands) { + if (Op.isDef() && Op.isImplicitReg() && + ScratchRegAliases.test(Op.getImplicitReg())) + return make_error<Failure>( + "Infeasible : memory instruction uses scratch memory register"); + } + ForbiddenRegs |= ScratchRegAliases; + } + + if (auto E = generateCodeTemplates(Variant, ForbiddenRegs)) { + MutableArrayRef<CodeTemplate> Templates = E.get(); + + // Avoid reallocations in the loop. + Benchmarks.reserve(Benchmarks.size() + Templates.size()); + for (CodeTemplate &CT : Templates) { + // TODO: Generate as many BenchmarkCode as needed. + { + BenchmarkCode BC; + BC.Info = CT.Info; + for (InstructionTemplate &IT : CT.Instructions) { + if (auto error = randomizeUnsetVariables(State, ForbiddenRegs, IT)) + return error; + BC.Key.Instructions.push_back(IT.build()); + } + if (CT.ScratchSpacePointerInReg) + BC.LiveIns.push_back(CT.ScratchSpacePointerInReg); + BC.Key.RegisterInitialValues = + computeRegisterInitialValues(CT.Instructions); + BC.Key.Config = CT.Config; + Benchmarks.emplace_back(std::move(BC)); + if (Benchmarks.size() >= Opts.MaxConfigsPerOpcode) { + // We reached the number of allowed configs and return early. + return Error::success(); + } + } + } + return Error::success(); + } else + return E.takeError(); +} + +std::vector<RegisterValue> SnippetGenerator::computeRegisterInitialValues( + const std::vector<InstructionTemplate> &Instructions) const { + // Collect all register uses and create an assignment for each of them. + // Ignore memory operands which are handled separately. + // Loop invariant: DefinedRegs[i] is true iif it has been set at least once + // before the current instruction. + BitVector DefinedRegs = State.getRATC().emptyRegisters(); + std::vector<RegisterValue> RIV; + for (const InstructionTemplate &IT : Instructions) { + // Returns the register that this Operand sets or uses, or 0 if this is not + // a register. + const auto GetOpReg = [&IT](const Operand &Op) -> unsigned { + if (Op.isMemory()) + return 0; + if (Op.isImplicitReg()) + return Op.getImplicitReg(); + if (Op.isExplicit() && IT.getValueFor(Op).isReg()) + return IT.getValueFor(Op).getReg(); + return 0; + }; + // Collect used registers that have never been def'ed. + for (const Operand &Op : IT.getInstr().Operands) { + if (Op.isUse()) { + const unsigned Reg = GetOpReg(Op); + if (Reg > 0 && !DefinedRegs.test(Reg)) { + RIV.push_back(RegisterValue::zero(Reg)); + DefinedRegs.set(Reg); + } + } + } + // Mark defs as having been def'ed. + for (const Operand &Op : IT.getInstr().Operands) { + if (Op.isDef()) { + const unsigned Reg = GetOpReg(Op); + if (Reg > 0) + DefinedRegs.set(Reg); + } + } + } + return RIV; +} + +Expected<std::vector<CodeTemplate>> +generateSelfAliasingCodeTemplates(InstructionTemplate Variant) { + const AliasingConfigurations SelfAliasing(Variant.getInstr(), + Variant.getInstr()); + if (SelfAliasing.empty()) + return make_error<SnippetGeneratorFailure>("empty self aliasing"); + std::vector<CodeTemplate> Result; + Result.emplace_back(); + CodeTemplate &CT = Result.back(); + if (SelfAliasing.hasImplicitAliasing()) { + CT.Info = "implicit Self cycles, picking random values."; + } else { + CT.Info = "explicit self cycles, selecting one aliasing Conf."; + // This is a self aliasing instruction so defs and uses are from the same + // instance, hence twice Variant in the following call. + setRandomAliasing(SelfAliasing, Variant, Variant); + } + CT.Instructions.push_back(std::move(Variant)); + return std::move(Result); +} + +Expected<std::vector<CodeTemplate>> +generateUnconstrainedCodeTemplates(const InstructionTemplate &Variant, + StringRef Msg) { + std::vector<CodeTemplate> Result; + Result.emplace_back(); + CodeTemplate &CT = Result.back(); + CT.Info = + std::string(formatv("{0}, repeating an unconstrained assignment", Msg)); + CT.Instructions.push_back(std::move(Variant)); + return std::move(Result); +} + +std::mt19937 &randomGenerator() { + static std::random_device RandomDevice; + static std::mt19937 RandomGenerator(RandomDevice()); + return RandomGenerator; +} + +size_t randomIndex(size_t Max) { + std::uniform_int_distribution<> Distribution(0, Max); + return Distribution(randomGenerator()); +} + +template <typename C> static decltype(auto) randomElement(const C &Container) { + assert(!Container.empty() && + "Can't pick a random element from an empty container)"); + return Container[randomIndex(Container.size() - 1)]; +} + +static void setRegisterOperandValue(const RegisterOperandAssignment &ROV, + InstructionTemplate &IB) { + assert(ROV.Op); + if (ROV.Op->isExplicit()) { + auto &AssignedValue = IB.getValueFor(*ROV.Op); + if (AssignedValue.isValid()) { + assert(AssignedValue.isReg() && AssignedValue.getReg() == ROV.Reg); + return; + } + AssignedValue = MCOperand::createReg(ROV.Reg); + } else { + assert(ROV.Op->isImplicitReg()); + assert(ROV.Reg == ROV.Op->getImplicitReg()); + } +} + +size_t randomBit(const BitVector &Vector) { + assert(Vector.any()); + auto Itr = Vector.set_bits_begin(); + for (size_t I = randomIndex(Vector.count() - 1); I != 0; --I) + ++Itr; + return *Itr; +} + +void setRandomAliasing(const AliasingConfigurations &AliasingConfigurations, + InstructionTemplate &DefIB, InstructionTemplate &UseIB) { + assert(!AliasingConfigurations.empty()); + assert(!AliasingConfigurations.hasImplicitAliasing()); + const auto &RandomConf = randomElement(AliasingConfigurations.Configurations); + setRegisterOperandValue(randomElement(RandomConf.Defs), DefIB); + setRegisterOperandValue(randomElement(RandomConf.Uses), UseIB); +} + +static Error randomizeMCOperand(const LLVMState &State, + const Instruction &Instr, const Variable &Var, + MCOperand &AssignedValue, + const BitVector &ForbiddenRegs) { + const Operand &Op = Instr.getPrimaryOperand(Var); + if (Op.getExplicitOperandInfo().OperandType >= + MCOI::OperandType::OPERAND_FIRST_TARGET) + return State.getExegesisTarget().randomizeTargetMCOperand( + Instr, Var, AssignedValue, ForbiddenRegs); + switch (Op.getExplicitOperandInfo().OperandType) { + case MCOI::OperandType::OPERAND_IMMEDIATE: + // FIXME: explore immediate values too. + AssignedValue = MCOperand::createImm(1); + break; + case MCOI::OperandType::OPERAND_REGISTER: { + assert(Op.isReg()); + auto AllowedRegs = Op.getRegisterAliasing().sourceBits(); + assert(AllowedRegs.size() == ForbiddenRegs.size()); + for (auto I : ForbiddenRegs.set_bits()) + AllowedRegs.reset(I); + if (!AllowedRegs.any()) + return make_error<Failure>( + Twine("no available registers:\ncandidates:\n") + .concat(debugString(State.getRegInfo(), + Op.getRegisterAliasing().sourceBits())) + .concat("\nforbidden:\n") + .concat(debugString(State.getRegInfo(), ForbiddenRegs))); + AssignedValue = MCOperand::createReg(randomBit(AllowedRegs)); + break; + } + default: + break; + } + return Error::success(); +} + +Error randomizeUnsetVariables(const LLVMState &State, + const BitVector &ForbiddenRegs, + InstructionTemplate &IT) { + for (const Variable &Var : IT.getInstr().Variables) { + MCOperand &AssignedValue = IT.getValueFor(Var); + if (!AssignedValue.isValid()) + if (auto Err = randomizeMCOperand(State, IT.getInstr(), Var, + AssignedValue, ForbiddenRegs)) + return Err; + } + return Error::success(); +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetGenerator.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetGenerator.h new file mode 100644 index 0000000000..7a53c03547 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetGenerator.h @@ -0,0 +1,109 @@ +//===-- SnippetGenerator.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines the abstract SnippetGenerator class for generating code that allows +/// measuring a certain property of instructions (e.g. latency). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_SNIPPETGENERATOR_H +#define LLVM_TOOLS_LLVM_EXEGESIS_SNIPPETGENERATOR_H + +#include "Assembler.h" +#include "BenchmarkCode.h" +#include "CodeTemplate.h" +#include "LlvmState.h" +#include "MCInstrDescView.h" +#include "RegisterAliasing.h" +#include "llvm/ADT/CombinationGenerator.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Error.h" +#include <cstdlib> +#include <memory> +#include <vector> + +namespace llvm { +namespace exegesis { + +std::vector<CodeTemplate> getSingleton(CodeTemplate &&CT); + +// Generates code templates that has a self-dependency. +Expected<std::vector<CodeTemplate>> +generateSelfAliasingCodeTemplates(InstructionTemplate Variant); + +// Generates code templates without assignment constraints. +Expected<std::vector<CodeTemplate>> +generateUnconstrainedCodeTemplates(const InstructionTemplate &Variant, + StringRef Msg); + +// A class representing failures that happened during Benchmark, they are used +// to report informations to the user. +class SnippetGeneratorFailure : public StringError { +public: + SnippetGeneratorFailure(const Twine &S); +}; + +// Common code for all benchmark modes. +class SnippetGenerator { +public: + struct Options { + unsigned MaxConfigsPerOpcode = 1; + }; + + explicit SnippetGenerator(const LLVMState &State, const Options &Opts); + + virtual ~SnippetGenerator(); + + // Calls generateCodeTemplate and expands it into one or more BenchmarkCode. + Error generateConfigurations(const InstructionTemplate &Variant, + std::vector<BenchmarkCode> &Benchmarks, + const BitVector &ExtraForbiddenRegs) const; + + // Given a snippet, computes which registers the setup code needs to define. + std::vector<RegisterValue> computeRegisterInitialValues( + const std::vector<InstructionTemplate> &Snippet) const; + +protected: + const LLVMState &State; + const Options Opts; + +private: + // API to be implemented by subclasses. + virtual Expected<std::vector<CodeTemplate>> + generateCodeTemplates(InstructionTemplate Variant, + const BitVector &ForbiddenRegisters) const = 0; +}; + +// A global Random Number Generator to randomize configurations. +// FIXME: Move random number generation into an object and make it seedable for +// unit tests. +std::mt19937 &randomGenerator(); + +// Picks a random unsigned integer from 0 to Max (inclusive). +size_t randomIndex(size_t Max); + +// Picks a random bit among the bits set in Vector and returns its index. +// Precondition: Vector must have at least one bit set. +size_t randomBit(const BitVector &Vector); + +// Picks a random configuration, then selects a random def and a random use from +// it and finally set the selected values in the provided InstructionInstances. +void setRandomAliasing(const AliasingConfigurations &AliasingConfigurations, + InstructionTemplate &DefIB, InstructionTemplate &UseIB); + +// Assigns a Random Value to all Variables in IT that are still Invalid. +// Do not use any of the registers in `ForbiddenRegs`. +Error randomizeUnsetVariables(const LLVMState &State, + const BitVector &ForbiddenRegs, + InstructionTemplate &IT); + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_SNIPPETGENERATOR_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetRepetitor.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetRepetitor.cpp new file mode 100644 index 0000000000..1851cb4674 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetRepetitor.cpp @@ -0,0 +1,133 @@ +//===-- SnippetRepetitor.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <array> +#include <string> + +#include "SnippetRepetitor.h" +#include "Target.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" + +namespace llvm { +namespace exegesis { +namespace { + +class DuplicateSnippetRepetitor : public SnippetRepetitor { +public: + using SnippetRepetitor::SnippetRepetitor; + + // Repeats the snippet until there are at least MinInstructions in the + // resulting code. + FillFunction Repeat(ArrayRef<MCInst> Instructions, unsigned MinInstructions, + unsigned LoopBodySize) const override { + return [Instructions, MinInstructions](FunctionFiller &Filler) { + auto Entry = Filler.getEntry(); + if (!Instructions.empty()) { + // Add the whole snippet at least once. + Entry.addInstructions(Instructions); + for (unsigned I = Instructions.size(); I < MinInstructions; ++I) { + Entry.addInstruction(Instructions[I % Instructions.size()]); + } + } + Entry.addReturn(); + }; + } + + BitVector getReservedRegs() const override { + // We're using no additional registers. + return State.getRATC().emptyRegisters(); + } +}; + +class LoopSnippetRepetitor : public SnippetRepetitor { +public: + explicit LoopSnippetRepetitor(const LLVMState &State) + : SnippetRepetitor(State), + LoopCounter(State.getExegesisTarget().getLoopCounterRegister( + State.getTargetMachine().getTargetTriple())) {} + + // Loop over the snippet ceil(MinInstructions / Instructions.Size()) times. + FillFunction Repeat(ArrayRef<MCInst> Instructions, unsigned MinInstructions, + unsigned LoopBodySize) const override { + return [this, Instructions, MinInstructions, + LoopBodySize](FunctionFiller &Filler) { + const auto &ET = State.getExegesisTarget(); + auto Entry = Filler.getEntry(); + auto Loop = Filler.addBasicBlock(); + auto Exit = Filler.addBasicBlock(); + + const unsigned LoopUnrollFactor = + LoopBodySize <= Instructions.size() + ? 1 + : divideCeil(LoopBodySize, Instructions.size()); + assert(LoopUnrollFactor >= 1 && "Should end up with at least 1 snippet."); + + // Set loop counter to the right value: + const APInt LoopCount( + 32, + divideCeil(MinInstructions, LoopUnrollFactor * Instructions.size())); + assert(LoopCount.uge(1) && "Trip count should be at least 1."); + for (const MCInst &Inst : + ET.setRegTo(State.getSubtargetInfo(), LoopCounter, LoopCount)) + Entry.addInstruction(Inst); + + // Set up the loop basic block. + Entry.MBB->addSuccessor(Loop.MBB, BranchProbability::getOne()); + Loop.MBB->addSuccessor(Loop.MBB, BranchProbability::getOne()); + // The live ins are: the loop counter, the registers that were setup by + // the entry block, and entry block live ins. + Loop.MBB->addLiveIn(LoopCounter); + for (unsigned Reg : Filler.getRegistersSetUp()) + Loop.MBB->addLiveIn(Reg); + for (const auto &LiveIn : Entry.MBB->liveins()) + Loop.MBB->addLiveIn(LiveIn); + for (auto _ : seq(0U, LoopUnrollFactor)) { + (void)_; + Loop.addInstructions(Instructions); + } + ET.decrementLoopCounterAndJump(*Loop.MBB, *Loop.MBB, + State.getInstrInfo()); + + // Set up the exit basic block. + Loop.MBB->addSuccessor(Exit.MBB, BranchProbability::getZero()); + Exit.addReturn(); + }; + } + + BitVector getReservedRegs() const override { + // We're using a single loop counter, but we have to reserve all aliasing + // registers. + return State.getRATC().getRegister(LoopCounter).aliasedBits(); + } + +private: + const unsigned LoopCounter; +}; + +} // namespace + +SnippetRepetitor::~SnippetRepetitor() {} + +std::unique_ptr<const SnippetRepetitor> +SnippetRepetitor::Create(InstructionBenchmark::RepetitionModeE Mode, + const LLVMState &State) { + switch (Mode) { + case InstructionBenchmark::Duplicate: + return std::make_unique<DuplicateSnippetRepetitor>(State); + case InstructionBenchmark::Loop: + return std::make_unique<LoopSnippetRepetitor>(State); + case InstructionBenchmark::AggregateMin: + break; + } + llvm_unreachable("Unknown RepetitionModeE enum"); +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetRepetitor.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetRepetitor.h new file mode 100644 index 0000000000..239fa25408 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/SnippetRepetitor.h @@ -0,0 +1,54 @@ +//===-- SnippetRepetitor.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines helpers to fill functions with repetitions of a snippet. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_FUNCTIONFILLER_H +#define LLVM_TOOLS_LLVM_EXEGESIS_FUNCTIONFILLER_H + +#include "Assembler.h" +#include "BenchmarkResult.h" +#include "LlvmState.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Object/Binary.h" + +namespace llvm { +namespace exegesis { + +class SnippetRepetitor { +public: + static std::unique_ptr<const SnippetRepetitor> + Create(InstructionBenchmark::RepetitionModeE Mode, const LLVMState &State); + + virtual ~SnippetRepetitor(); + + // Returns the set of registers that are reserved by the repetitor. + virtual BitVector getReservedRegs() const = 0; + + // Returns a functor that repeats `Instructions` so that the function executes + // at least `MinInstructions` instructions. + virtual FillFunction Repeat(ArrayRef<MCInst> Instructions, + unsigned MinInstructions, + unsigned LoopBodySize) const = 0; + + explicit SnippetRepetitor(const LLVMState &State) : State(State) {} + +protected: + const LLVMState &State; +}; + +} // namespace exegesis +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/Target.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Target.cpp new file mode 100644 index 0000000000..9ff19d57a8 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Target.cpp @@ -0,0 +1,177 @@ +//===-- Target.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "Target.h" + +#include "LatencyBenchmarkRunner.h" +#include "ParallelSnippetGenerator.h" +#include "SerialSnippetGenerator.h" +#include "UopsBenchmarkRunner.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace exegesis { + +ExegesisTarget::~ExegesisTarget() {} // anchor. + +static ExegesisTarget *FirstTarget = nullptr; + +const ExegesisTarget *ExegesisTarget::lookup(Triple TT) { + for (const ExegesisTarget *T = FirstTarget; T != nullptr; T = T->Next) { + if (T->matchesArch(TT.getArch())) + return T; + } + return nullptr; +} + +Expected<std::unique_ptr<pfm::Counter>> +ExegesisTarget::createCounter(StringRef CounterName, const LLVMState &) const { + pfm::PerfEvent Event(CounterName); + if (!Event.valid()) + return llvm::make_error<Failure>( + llvm::Twine("Unable to create counter with name '") + .concat(CounterName) + .concat("'")); + + return std::make_unique<pfm::Counter>(std::move(Event)); +} + +void ExegesisTarget::registerTarget(ExegesisTarget *Target) { + if (FirstTarget == nullptr) { + FirstTarget = Target; + return; + } + if (Target->Next != nullptr) + return; // Already registered. + Target->Next = FirstTarget; + FirstTarget = Target; +} + +std::unique_ptr<SnippetGenerator> ExegesisTarget::createSnippetGenerator( + InstructionBenchmark::ModeE Mode, const LLVMState &State, + const SnippetGenerator::Options &Opts) const { + switch (Mode) { + case InstructionBenchmark::Unknown: + return nullptr; + case InstructionBenchmark::Latency: + return createSerialSnippetGenerator(State, Opts); + case InstructionBenchmark::Uops: + case InstructionBenchmark::InverseThroughput: + return createParallelSnippetGenerator(State, Opts); + } + return nullptr; +} + +Expected<std::unique_ptr<BenchmarkRunner>> +ExegesisTarget::createBenchmarkRunner( + InstructionBenchmark::ModeE Mode, const LLVMState &State, + InstructionBenchmark::ResultAggregationModeE ResultAggMode) const { + PfmCountersInfo PfmCounters = State.getPfmCounters(); + switch (Mode) { + case InstructionBenchmark::Unknown: + return nullptr; + case InstructionBenchmark::Latency: + case InstructionBenchmark::InverseThroughput: + if (!PfmCounters.CycleCounter) { + const char *ModeName = Mode == InstructionBenchmark::Latency + ? "latency" + : "inverse_throughput"; + return make_error<Failure>( + Twine("can't run '") + .concat(ModeName) + .concat("' mode, sched model does not define a cycle counter.")); + } + return createLatencyBenchmarkRunner(State, Mode, ResultAggMode); + case InstructionBenchmark::Uops: + if (!PfmCounters.UopsCounter && !PfmCounters.IssueCounters) + return make_error<Failure>("can't run 'uops' mode, sched model does not " + "define uops or issue counters."); + return createUopsBenchmarkRunner(State, ResultAggMode); + } + return nullptr; +} + +std::unique_ptr<SnippetGenerator> ExegesisTarget::createSerialSnippetGenerator( + const LLVMState &State, const SnippetGenerator::Options &Opts) const { + return std::make_unique<SerialSnippetGenerator>(State, Opts); +} + +std::unique_ptr<SnippetGenerator> ExegesisTarget::createParallelSnippetGenerator( + const LLVMState &State, const SnippetGenerator::Options &Opts) const { + return std::make_unique<ParallelSnippetGenerator>(State, Opts); +} + +std::unique_ptr<BenchmarkRunner> ExegesisTarget::createLatencyBenchmarkRunner( + const LLVMState &State, InstructionBenchmark::ModeE Mode, + InstructionBenchmark::ResultAggregationModeE ResultAggMode) const { + return std::make_unique<LatencyBenchmarkRunner>(State, Mode, ResultAggMode); +} + +std::unique_ptr<BenchmarkRunner> ExegesisTarget::createUopsBenchmarkRunner( + const LLVMState &State, + InstructionBenchmark::ResultAggregationModeE /*unused*/) const { + return std::make_unique<UopsBenchmarkRunner>(State); +} + +static_assert(std::is_pod<PfmCountersInfo>::value, + "We shouldn't have dynamic initialization here"); +const PfmCountersInfo PfmCountersInfo::Default = {nullptr, nullptr, nullptr, + 0u}; + +const PfmCountersInfo &ExegesisTarget::getPfmCounters(StringRef CpuName) const { + assert(llvm::is_sorted( + CpuPfmCounters, + [](const CpuAndPfmCounters &LHS, const CpuAndPfmCounters &RHS) { + return strcmp(LHS.CpuName, RHS.CpuName) < 0; + }) && + "CpuPfmCounters table is not sorted"); + + // Find entry + auto Found = llvm::lower_bound(CpuPfmCounters, CpuName); + if (Found == CpuPfmCounters.end() || StringRef(Found->CpuName) != CpuName) { + // Use the default. + if (!CpuPfmCounters.empty() && CpuPfmCounters.begin()->CpuName[0] == '\0') { + Found = CpuPfmCounters.begin(); // The target specifies a default. + } else { + return PfmCountersInfo::Default; // No default for the target. + } + } + assert(Found->PCI && "Missing counters"); + return *Found->PCI; +} + +ExegesisTarget::SavedState::~SavedState() {} // anchor. + +namespace { + +// Default implementation. +class ExegesisDefaultTarget : public ExegesisTarget { +public: + ExegesisDefaultTarget() : ExegesisTarget({}) {} + +private: + std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg, + const APInt &Value) const override { + llvm_unreachable("Not yet implemented"); + } + + bool matchesArch(Triple::ArchType Arch) const override { + llvm_unreachable("never called"); + return false; + } +}; + +} // namespace + +const ExegesisTarget &ExegesisTarget::getDefault() { + static ExegesisDefaultTarget Target; + return Target; +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/Target.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Target.h new file mode 100644 index 0000000000..28c103aa19 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/Target.h @@ -0,0 +1,208 @@ +//===-- Target.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Classes that handle the creation of target-specific objects. This is +/// similar to Target/TargetRegistry. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H +#define LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H + +#include "BenchmarkResult.h" +#include "BenchmarkRunner.h" +#include "Error.h" +#include "LlvmState.h" +#include "PerfHelper.h" +#include "SnippetGenerator.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace exegesis { + +struct PfmCountersInfo { + // An optional name of a performance counter that can be used to measure + // cycles. + const char *CycleCounter; + + // An optional name of a performance counter that can be used to measure + // uops. + const char *UopsCounter; + + // An IssueCounter specifies how to measure uops issued to specific proc + // resources. + struct IssueCounter { + const char *Counter; + // The name of the ProcResource that this counter measures. + const char *ProcResName; + }; + // An optional list of IssueCounters. + const IssueCounter *IssueCounters; + unsigned NumIssueCounters; + + static const PfmCountersInfo Default; +}; + +struct CpuAndPfmCounters { + const char *CpuName; + const PfmCountersInfo *PCI; + bool operator<(StringRef S) const { return StringRef(CpuName) < S; } +}; + +class ExegesisTarget { +public: + explicit ExegesisTarget(ArrayRef<CpuAndPfmCounters> CpuPfmCounters) + : CpuPfmCounters(CpuPfmCounters) {} + + // Targets can use this to create target-specific perf counters. + virtual Expected<std::unique_ptr<pfm::Counter>> + createCounter(StringRef CounterName, const LLVMState &State) const; + + // Targets can use this to add target-specific passes in assembleToStream(); + virtual void addTargetSpecificPasses(PassManagerBase &PM) const {} + + // Generates code to move a constant into a the given register. + // Precondition: Value must fit into Reg. + virtual std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg, + const APInt &Value) const = 0; + + // Returns the register pointing to scratch memory, or 0 if this target + // does not support memory operands. The benchmark function uses the + // default calling convention. + virtual unsigned getScratchMemoryRegister(const Triple &) const { return 0; } + + // Fills memory operands with references to the address at [Reg] + Offset. + virtual void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg, + unsigned Offset) const { + llvm_unreachable( + "fillMemoryOperands() requires getScratchMemoryRegister() > 0"); + } + + // Returns a counter usable as a loop counter. + virtual unsigned getLoopCounterRegister(const Triple &) const { return 0; } + + // Adds the code to decrement the loop counter and + virtual void decrementLoopCounterAndJump(MachineBasicBlock &MBB, + MachineBasicBlock &TargetMBB, + const MCInstrInfo &MII) const { + llvm_unreachable("decrementLoopCounterAndBranch() requires " + "getLoopCounterRegister() > 0"); + } + + // Returns a list of unavailable registers. + // Targets can use this to prevent some registers to be automatically selected + // for use in snippets. + virtual ArrayRef<unsigned> getUnavailableRegisters() const { return {}; } + + // Returns the maximum number of bytes a load/store instruction can access at + // once. This is typically the size of the largest register available on the + // processor. Note that this only used as a hint to generate independant + // load/stores to/from memory, so the exact returned value does not really + // matter as long as it's large enough. + virtual unsigned getMaxMemoryAccessSize() const { return 0; } + + // Assigns a random operand of the right type to variable Var. + // The target is responsible for handling any operand starting from + // OPERAND_FIRST_TARGET. + virtual Error randomizeTargetMCOperand(const Instruction &Instr, + const Variable &Var, + MCOperand &AssignedValue, + const BitVector &ForbiddenRegs) const { + return make_error<Failure>( + "targets with target-specific operands should implement this"); + } + + // Returns true if this instruction is supported as a back-to-back + // instructions. + // FIXME: Eventually we should discover this dynamically. + virtual bool allowAsBackToBack(const Instruction &Instr) const { + return true; + } + + // For some instructions, it is interesting to measure how it's performance + // characteristics differ depending on it's operands. + // This allows us to produce all the interesting variants. + virtual std::vector<InstructionTemplate> + generateInstructionVariants(const Instruction &Instr, + unsigned MaxConfigsPerOpcode) const { + // By default, we're happy with whatever randomizer will give us. + return {&Instr}; + } + + // Checks hardware and software support for current benchmark mode. + // Returns an error if the target host does not have support to run the + // benchmark. + virtual Error checkFeatureSupport() const { return Error::success(); } + + // Creates a snippet generator for the given mode. + std::unique_ptr<SnippetGenerator> + createSnippetGenerator(InstructionBenchmark::ModeE Mode, + const LLVMState &State, + const SnippetGenerator::Options &Opts) const; + // Creates a benchmark runner for the given mode. + Expected<std::unique_ptr<BenchmarkRunner>> createBenchmarkRunner( + InstructionBenchmark::ModeE Mode, const LLVMState &State, + InstructionBenchmark::ResultAggregationModeE ResultAggMode = + InstructionBenchmark::Min) const; + + // Returns the ExegesisTarget for the given triple or nullptr if the target + // does not exist. + static const ExegesisTarget *lookup(Triple TT); + // Returns the default (unspecialized) ExegesisTarget. + static const ExegesisTarget &getDefault(); + // Registers a target. Not thread safe. + static void registerTarget(ExegesisTarget *T); + + virtual ~ExegesisTarget(); + + // Returns the Pfm counters for the given CPU (or the default if no pfm + // counters are defined for this CPU). + const PfmCountersInfo &getPfmCounters(StringRef CpuName) const; + + // Saves the CPU state that needs to be preserved when running a benchmark, + // and returns and RAII object that restores the state on destruction. + // By default no state is preserved. + struct SavedState { + virtual ~SavedState(); + }; + virtual std::unique_ptr<SavedState> withSavedState() const { + return std::make_unique<SavedState>(); + } + +private: + virtual bool matchesArch(Triple::ArchType Arch) const = 0; + + // Targets can implement their own snippet generators/benchmarks runners by + // implementing these. + std::unique_ptr<SnippetGenerator> virtual createSerialSnippetGenerator( + const LLVMState &State, const SnippetGenerator::Options &Opts) const; + std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator( + const LLVMState &State, const SnippetGenerator::Options &Opts) const; + std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner( + const LLVMState &State, InstructionBenchmark::ModeE Mode, + InstructionBenchmark::ResultAggregationModeE ResultAggMode) const; + std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner( + const LLVMState &State, + InstructionBenchmark::ResultAggregationModeE ResultAggMode) const; + + const ExegesisTarget *Next = nullptr; + const ArrayRef<CpuAndPfmCounters> CpuPfmCounters; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/TargetSelect.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/TargetSelect.h new file mode 100644 index 0000000000..003d12e6e7 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/TargetSelect.h @@ -0,0 +1,40 @@ +//===-- TargetSelect.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Utilities to handle the creation of the native exegesis target. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_TARGET_SELECT_H +#define LLVM_TOOLS_LLVM_EXEGESIS_TARGET_SELECT_H + +namespace llvm { +namespace exegesis { + +#ifdef LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET +void LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET(); +#endif + +// Initializes the native exegesis target, or returns false if there is no +// native target (either because llvm-exegesis does not support the target or +// because it's not linked in). +inline bool InitializeNativeExegesisTarget() { +#ifdef LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET + LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET(); + return true; +#else + return false; +#endif +} + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_TARGET_SELECT_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp new file mode 100644 index 0000000000..b99b1c5e71 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp @@ -0,0 +1,46 @@ +//===-- UopsBenchmarkRunner.cpp ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UopsBenchmarkRunner.h" + +#include "Target.h" + +namespace llvm { +namespace exegesis { + +UopsBenchmarkRunner::~UopsBenchmarkRunner() = default; + +Expected<std::vector<BenchmarkMeasure>> +UopsBenchmarkRunner::runMeasurements(const FunctionExecutor &Executor) const { + std::vector<BenchmarkMeasure> Result; + const PfmCountersInfo &PCI = State.getPfmCounters(); + // Uops per port. + for (const auto *IssueCounter = PCI.IssueCounters, + *IssueCounterEnd = PCI.IssueCounters + PCI.NumIssueCounters; + IssueCounter != IssueCounterEnd; ++IssueCounter) { + if (!IssueCounter->Counter) + continue; + auto ExpectedCounterValue = Executor.runAndMeasure(IssueCounter->Counter); + if (!ExpectedCounterValue) + return ExpectedCounterValue.takeError(); + Result.push_back(BenchmarkMeasure::Create(IssueCounter->ProcResName, + *ExpectedCounterValue)); + } + // NumMicroOps. + if (const char *const UopsCounter = PCI.UopsCounter) { + auto ExpectedCounterValue = Executor.runAndMeasure(UopsCounter); + if (!ExpectedCounterValue) + return ExpectedCounterValue.takeError(); + Result.push_back( + BenchmarkMeasure::Create("NumMicroOps", *ExpectedCounterValue)); + } + return std::move(Result); +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h new file mode 100644 index 0000000000..cda74eb453 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h @@ -0,0 +1,38 @@ +//===-- UopsBenchmarkRunner.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A BenchmarkRunner implementation to measure uop decomposition. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H +#define LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H + +#include "BenchmarkRunner.h" + +namespace llvm { +namespace exegesis { + +class UopsBenchmarkRunner : public BenchmarkRunner { +public: + UopsBenchmarkRunner(const LLVMState &State) + : BenchmarkRunner(State, InstructionBenchmark::Uops) {} + ~UopsBenchmarkRunner() override; + + static constexpr const size_t kMinNumDifferentAddresses = 6; + +private: + Expected<std::vector<BenchmarkMeasure>> + runMeasurements(const FunctionExecutor &Executor) const override; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/X86/Target.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/X86/Target.cpp new file mode 100644 index 0000000000..7188d8fafe --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/X86/Target.cpp @@ -0,0 +1,970 @@ +//===-- Target.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "../Target.h" + +#include "../Error.h" +#include "../ParallelSnippetGenerator.h" +#include "../SerialSnippetGenerator.h" +#include "../SnippetGenerator.h" +#include "MCTargetDesc/X86BaseInfo.h" +#include "MCTargetDesc/X86MCTargetDesc.h" +#include "X86.h" +#include "X86Counter.h" +#include "X86RegisterInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Host.h" + +#include <memory> +#include <string> +#include <vector> +#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) +#include <float.h> +#include <immintrin.h> +#include <intrin.h> +#endif + +namespace llvm { +namespace exegesis { + +static cl::OptionCategory + BenchmarkOptions("llvm-exegesis benchmark x86-options"); + +// If a positive value is specified, we are going to use the LBR in +// latency-mode. +// +// Note: +// - A small value is preferred, but too low a value could result in +// throttling. +// - A prime number is preferred to avoid always skipping certain blocks. +// +static cl::opt<unsigned> LbrSamplingPeriod( + "x86-lbr-sample-period", + cl::desc("The sample period (nbranches/sample), used for LBR sampling"), + cl::cat(BenchmarkOptions), cl::init(0)); + +// FIXME: Validates that repetition-mode is loop if LBR is requested. + +// Returns a non-null reason if we cannot handle the memory references in this +// instruction. +static const char *isInvalidMemoryInstr(const Instruction &Instr) { + switch (Instr.Description.TSFlags & X86II::FormMask) { + default: + return "Unknown FormMask value"; + // These have no memory access. + case X86II::Pseudo: + case X86II::RawFrm: + case X86II::AddCCFrm: + case X86II::PrefixByte: + case X86II::MRMDestReg: + case X86II::MRMSrcReg: + case X86II::MRMSrcReg4VOp3: + case X86II::MRMSrcRegOp4: + case X86II::MRMSrcRegCC: + case X86II::MRMXrCC: + case X86II::MRMr0: + case X86II::MRMXr: + case X86II::MRM0r: + case X86II::MRM1r: + case X86II::MRM2r: + case X86II::MRM3r: + case X86II::MRM4r: + case X86II::MRM5r: + case X86II::MRM6r: + case X86II::MRM7r: + case X86II::MRM0X: + case X86II::MRM1X: + case X86II::MRM2X: + case X86II::MRM3X: + case X86II::MRM4X: + case X86II::MRM5X: + case X86II::MRM6X: + case X86II::MRM7X: + case X86II::MRM_C0: + case X86II::MRM_C1: + case X86II::MRM_C2: + case X86II::MRM_C3: + case X86II::MRM_C4: + case X86II::MRM_C5: + case X86II::MRM_C6: + case X86II::MRM_C7: + case X86II::MRM_C8: + case X86II::MRM_C9: + case X86II::MRM_CA: + case X86II::MRM_CB: + case X86II::MRM_CC: + case X86II::MRM_CD: + case X86II::MRM_CE: + case X86II::MRM_CF: + case X86II::MRM_D0: + case X86II::MRM_D1: + case X86II::MRM_D2: + case X86II::MRM_D3: + case X86II::MRM_D4: + case X86II::MRM_D5: + case X86II::MRM_D6: + case X86II::MRM_D7: + case X86II::MRM_D8: + case X86II::MRM_D9: + case X86II::MRM_DA: + case X86II::MRM_DB: + case X86II::MRM_DC: + case X86II::MRM_DD: + case X86II::MRM_DE: + case X86II::MRM_DF: + case X86II::MRM_E0: + case X86II::MRM_E1: + case X86II::MRM_E2: + case X86II::MRM_E3: + case X86II::MRM_E4: + case X86II::MRM_E5: + case X86II::MRM_E6: + case X86II::MRM_E7: + case X86II::MRM_E8: + case X86II::MRM_E9: + case X86II::MRM_EA: + case X86II::MRM_EB: + case X86II::MRM_EC: + case X86II::MRM_ED: + case X86II::MRM_EE: + case X86II::MRM_EF: + case X86II::MRM_F0: + case X86II::MRM_F1: + case X86II::MRM_F2: + case X86II::MRM_F3: + case X86II::MRM_F4: + case X86II::MRM_F5: + case X86II::MRM_F6: + case X86II::MRM_F7: + case X86II::MRM_F8: + case X86II::MRM_F9: + case X86II::MRM_FA: + case X86II::MRM_FB: + case X86II::MRM_FC: + case X86II::MRM_FD: + case X86II::MRM_FE: + case X86II::MRM_FF: + case X86II::RawFrmImm8: + return nullptr; + case X86II::AddRegFrm: + return (Instr.Description.Opcode == X86::POP16r || + Instr.Description.Opcode == X86::POP32r || + Instr.Description.Opcode == X86::PUSH16r || + Instr.Description.Opcode == X86::PUSH32r) + ? "unsupported opcode: unsupported memory access" + : nullptr; + // These access memory and are handled. + case X86II::MRMDestMem: + case X86II::MRMSrcMem: + case X86II::MRMSrcMem4VOp3: + case X86II::MRMSrcMemOp4: + case X86II::MRMSrcMemCC: + case X86II::MRMXmCC: + case X86II::MRMXm: + case X86II::MRM0m: + case X86II::MRM1m: + case X86II::MRM2m: + case X86II::MRM3m: + case X86II::MRM4m: + case X86II::MRM5m: + case X86II::MRM6m: + case X86II::MRM7m: + return nullptr; + // These access memory and are not handled yet. + case X86II::RawFrmImm16: + case X86II::RawFrmMemOffs: + case X86II::RawFrmSrc: + case X86II::RawFrmDst: + case X86II::RawFrmDstSrc: + return "unsupported opcode: non uniform memory access"; + } +} + +// If the opcode is invalid, returns a pointer to a character literal indicating +// the reason. nullptr indicates a valid opcode. +static const char *isInvalidOpcode(const Instruction &Instr) { + const auto OpcodeName = Instr.Name; + if ((Instr.Description.TSFlags & X86II::FormMask) == X86II::Pseudo) + return "unsupported opcode: pseudo instruction"; + if ((OpcodeName.startswith("POP") && !OpcodeName.startswith("POPCNT")) || + OpcodeName.startswith("PUSH") || OpcodeName.startswith("ADJCALLSTACK") || + OpcodeName.startswith("LEAVE")) + return "unsupported opcode: Push/Pop/AdjCallStack/Leave"; + switch (Instr.Description.Opcode) { + case X86::LFS16rm: + case X86::LFS32rm: + case X86::LFS64rm: + case X86::LGS16rm: + case X86::LGS32rm: + case X86::LGS64rm: + case X86::LSS16rm: + case X86::LSS32rm: + case X86::LSS64rm: + case X86::SYSENTER: + return "unsupported opcode"; + default: + break; + } + if (const auto reason = isInvalidMemoryInstr(Instr)) + return reason; + // We do not handle instructions with OPERAND_PCREL. + for (const Operand &Op : Instr.Operands) + if (Op.isExplicit() && + Op.getExplicitOperandInfo().OperandType == MCOI::OPERAND_PCREL) + return "unsupported opcode: PC relative operand"; + // We do not handle second-form X87 instructions. We only handle first-form + // ones (_Fp), see comment in X86InstrFPStack.td. + for (const Operand &Op : Instr.Operands) + if (Op.isReg() && Op.isExplicit() && + Op.getExplicitOperandInfo().RegClass == X86::RSTRegClassID) + return "unsupported second-form X87 instruction"; + return nullptr; +} + +static unsigned getX86FPFlags(const Instruction &Instr) { + return Instr.Description.TSFlags & X86II::FPTypeMask; +} + +// Helper to fill a memory operand with a value. +static void setMemOp(InstructionTemplate &IT, int OpIdx, + const MCOperand &OpVal) { + const auto Op = IT.getInstr().Operands[OpIdx]; + assert(Op.isExplicit() && "invalid memory pattern"); + IT.getValueFor(Op) = OpVal; +} + +// Common (latency, uops) code for LEA templates. `GetDestReg` takes the +// addressing base and index registers and returns the LEA destination register. +static Expected<std::vector<CodeTemplate>> generateLEATemplatesCommon( + const Instruction &Instr, const BitVector &ForbiddenRegisters, + const LLVMState &State, const SnippetGenerator::Options &Opts, + std::function<void(unsigned, unsigned, BitVector &CandidateDestRegs)> + RestrictDestRegs) { + assert(Instr.Operands.size() == 6 && "invalid LEA"); + assert(X86II::getMemoryOperandNo(Instr.Description.TSFlags) == 1 && + "invalid LEA"); + + constexpr const int kDestOp = 0; + constexpr const int kBaseOp = 1; + constexpr const int kIndexOp = 3; + auto PossibleDestRegs = + Instr.Operands[kDestOp].getRegisterAliasing().sourceBits(); + remove(PossibleDestRegs, ForbiddenRegisters); + auto PossibleBaseRegs = + Instr.Operands[kBaseOp].getRegisterAliasing().sourceBits(); + remove(PossibleBaseRegs, ForbiddenRegisters); + auto PossibleIndexRegs = + Instr.Operands[kIndexOp].getRegisterAliasing().sourceBits(); + remove(PossibleIndexRegs, ForbiddenRegisters); + + const auto &RegInfo = State.getRegInfo(); + std::vector<CodeTemplate> Result; + for (const unsigned BaseReg : PossibleBaseRegs.set_bits()) { + for (const unsigned IndexReg : PossibleIndexRegs.set_bits()) { + for (int LogScale = 0; LogScale <= 3; ++LogScale) { + // FIXME: Add an option for controlling how we explore immediates. + for (const int Disp : {0, 42}) { + InstructionTemplate IT(&Instr); + const int64_t Scale = 1ull << LogScale; + setMemOp(IT, 1, MCOperand::createReg(BaseReg)); + setMemOp(IT, 2, MCOperand::createImm(Scale)); + setMemOp(IT, 3, MCOperand::createReg(IndexReg)); + setMemOp(IT, 4, MCOperand::createImm(Disp)); + // SegmentReg must be 0 for LEA. + setMemOp(IT, 5, MCOperand::createReg(0)); + + // Output reg candidates are selected by the caller. + auto PossibleDestRegsNow = PossibleDestRegs; + RestrictDestRegs(BaseReg, IndexReg, PossibleDestRegsNow); + assert(PossibleDestRegsNow.set_bits().begin() != + PossibleDestRegsNow.set_bits().end() && + "no remaining registers"); + setMemOp( + IT, 0, + MCOperand::createReg(*PossibleDestRegsNow.set_bits().begin())); + + CodeTemplate CT; + CT.Instructions.push_back(std::move(IT)); + CT.Config = formatv("{3}(%{0}, %{1}, {2})", RegInfo.getName(BaseReg), + RegInfo.getName(IndexReg), Scale, Disp) + .str(); + Result.push_back(std::move(CT)); + if (Result.size() >= Opts.MaxConfigsPerOpcode) + return std::move(Result); + } + } + } + } + + return std::move(Result); +} + +namespace { +class X86SerialSnippetGenerator : public SerialSnippetGenerator { +public: + using SerialSnippetGenerator::SerialSnippetGenerator; + + Expected<std::vector<CodeTemplate>> + generateCodeTemplates(InstructionTemplate Variant, + const BitVector &ForbiddenRegisters) const override; +}; +} // namespace + +Expected<std::vector<CodeTemplate>> +X86SerialSnippetGenerator::generateCodeTemplates( + InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { + const Instruction &Instr = Variant.getInstr(); + + if (const auto reason = isInvalidOpcode(Instr)) + return make_error<Failure>(reason); + + // LEA gets special attention. + const auto Opcode = Instr.Description.getOpcode(); + if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) { + return generateLEATemplatesCommon( + Instr, ForbiddenRegisters, State, Opts, + [this](unsigned BaseReg, unsigned IndexReg, + BitVector &CandidateDestRegs) { + // We just select a destination register that aliases the base + // register. + CandidateDestRegs &= + State.getRATC().getRegister(BaseReg).aliasedBits(); + }); + } + + if (Instr.hasMemoryOperands()) + return make_error<Failure>( + "unsupported memory operand in latency measurements"); + + switch (getX86FPFlags(Instr)) { + case X86II::NotFP: + return SerialSnippetGenerator::generateCodeTemplates(Variant, + ForbiddenRegisters); + case X86II::ZeroArgFP: + case X86II::OneArgFP: + case X86II::SpecialFP: + case X86II::CompareFP: + case X86II::CondMovFP: + return make_error<Failure>("Unsupported x87 Instruction"); + case X86II::OneArgFPRW: + case X86II::TwoArgFP: + // These are instructions like + // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW) + // - `ST(0) = ST(0) + ST(i)` (TwoArgFP) + // They are intrinsically serial and do not modify the state of the stack. + return generateSelfAliasingCodeTemplates(Variant); + default: + llvm_unreachable("Unknown FP Type!"); + } +} + +namespace { +class X86ParallelSnippetGenerator : public ParallelSnippetGenerator { +public: + using ParallelSnippetGenerator::ParallelSnippetGenerator; + + Expected<std::vector<CodeTemplate>> + generateCodeTemplates(InstructionTemplate Variant, + const BitVector &ForbiddenRegisters) const override; +}; + +} // namespace + +Expected<std::vector<CodeTemplate>> +X86ParallelSnippetGenerator::generateCodeTemplates( + InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { + const Instruction &Instr = Variant.getInstr(); + + if (const auto reason = isInvalidOpcode(Instr)) + return make_error<Failure>(reason); + + // LEA gets special attention. + const auto Opcode = Instr.Description.getOpcode(); + if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) { + return generateLEATemplatesCommon( + Instr, ForbiddenRegisters, State, Opts, + [this](unsigned BaseReg, unsigned IndexReg, + BitVector &CandidateDestRegs) { + // Any destination register that is not used for addressing is fine. + remove(CandidateDestRegs, + State.getRATC().getRegister(BaseReg).aliasedBits()); + remove(CandidateDestRegs, + State.getRATC().getRegister(IndexReg).aliasedBits()); + }); + } + + switch (getX86FPFlags(Instr)) { + case X86II::NotFP: + return ParallelSnippetGenerator::generateCodeTemplates(Variant, + ForbiddenRegisters); + case X86II::ZeroArgFP: + case X86II::OneArgFP: + case X86II::SpecialFP: + return make_error<Failure>("Unsupported x87 Instruction"); + case X86II::OneArgFPRW: + case X86II::TwoArgFP: + // These are instructions like + // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW) + // - `ST(0) = ST(0) + ST(i)` (TwoArgFP) + // They are intrinsically serial and do not modify the state of the stack. + // We generate the same code for latency and uops. + return generateSelfAliasingCodeTemplates(Variant); + case X86II::CompareFP: + case X86II::CondMovFP: + // We can compute uops for any FP instruction that does not grow or shrink + // the stack (either do not touch the stack or push as much as they pop). + return generateUnconstrainedCodeTemplates( + Variant, "instruction does not grow/shrink the FP stack"); + default: + llvm_unreachable("Unknown FP Type!"); + } +} + +static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) { + switch (RegBitWidth) { + case 8: + return X86::MOV8ri; + case 16: + return X86::MOV16ri; + case 32: + return X86::MOV32ri; + case 64: + return X86::MOV64ri; + } + llvm_unreachable("Invalid Value Width"); +} + +// Generates instruction to load an immediate value into a register. +static MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth, + const APInt &Value) { + if (Value.getBitWidth() > RegBitWidth) + llvm_unreachable("Value must fit in the Register"); + return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth)) + .addReg(Reg) + .addImm(Value.getZExtValue()); +} + +// Allocates scratch memory on the stack. +static MCInst allocateStackSpace(unsigned Bytes) { + return MCInstBuilder(X86::SUB64ri8) + .addReg(X86::RSP) + .addReg(X86::RSP) + .addImm(Bytes); +} + +// Fills scratch memory at offset `OffsetBytes` with value `Imm`. +static MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes, + uint64_t Imm) { + return MCInstBuilder(MovOpcode) + // Address = ESP + .addReg(X86::RSP) // BaseReg + .addImm(1) // ScaleAmt + .addReg(0) // IndexReg + .addImm(OffsetBytes) // Disp + .addReg(0) // Segment + // Immediate. + .addImm(Imm); +} + +// Loads scratch memory into register `Reg` using opcode `RMOpcode`. +static MCInst loadToReg(unsigned Reg, unsigned RMOpcode) { + return MCInstBuilder(RMOpcode) + .addReg(Reg) + // Address = ESP + .addReg(X86::RSP) // BaseReg + .addImm(1) // ScaleAmt + .addReg(0) // IndexReg + .addImm(0) // Disp + .addReg(0); // Segment +} + +// Releases scratch memory. +static MCInst releaseStackSpace(unsigned Bytes) { + return MCInstBuilder(X86::ADD64ri8) + .addReg(X86::RSP) + .addReg(X86::RSP) + .addImm(Bytes); +} + +// Reserves some space on the stack, fills it with the content of the provided +// constant and provide methods to load the stack value into a register. +namespace { +struct ConstantInliner { + explicit ConstantInliner(const APInt &Constant) : Constant_(Constant) {} + + std::vector<MCInst> loadAndFinalize(unsigned Reg, unsigned RegBitWidth, + unsigned Opcode); + + std::vector<MCInst> loadX87STAndFinalize(unsigned Reg); + + std::vector<MCInst> loadX87FPAndFinalize(unsigned Reg); + + std::vector<MCInst> popFlagAndFinalize(); + + std::vector<MCInst> loadImplicitRegAndFinalize(unsigned Opcode, + unsigned Value); + +private: + ConstantInliner &add(const MCInst &Inst) { + Instructions.push_back(Inst); + return *this; + } + + void initStack(unsigned Bytes); + + static constexpr const unsigned kF80Bytes = 10; // 80 bits. + + APInt Constant_; + std::vector<MCInst> Instructions; +}; +} // namespace + +std::vector<MCInst> ConstantInliner::loadAndFinalize(unsigned Reg, + unsigned RegBitWidth, + unsigned Opcode) { + assert((RegBitWidth & 7) == 0 && "RegBitWidth must be a multiple of 8 bits"); + initStack(RegBitWidth / 8); + add(loadToReg(Reg, Opcode)); + add(releaseStackSpace(RegBitWidth / 8)); + return std::move(Instructions); +} + +std::vector<MCInst> ConstantInliner::loadX87STAndFinalize(unsigned Reg) { + initStack(kF80Bytes); + add(MCInstBuilder(X86::LD_F80m) + // Address = ESP + .addReg(X86::RSP) // BaseReg + .addImm(1) // ScaleAmt + .addReg(0) // IndexReg + .addImm(0) // Disp + .addReg(0)); // Segment + if (Reg != X86::ST0) + add(MCInstBuilder(X86::ST_Frr).addReg(Reg)); + add(releaseStackSpace(kF80Bytes)); + return std::move(Instructions); +} + +std::vector<MCInst> ConstantInliner::loadX87FPAndFinalize(unsigned Reg) { + initStack(kF80Bytes); + add(MCInstBuilder(X86::LD_Fp80m) + .addReg(Reg) + // Address = ESP + .addReg(X86::RSP) // BaseReg + .addImm(1) // ScaleAmt + .addReg(0) // IndexReg + .addImm(0) // Disp + .addReg(0)); // Segment + add(releaseStackSpace(kF80Bytes)); + return std::move(Instructions); +} + +std::vector<MCInst> ConstantInliner::popFlagAndFinalize() { + initStack(8); + add(MCInstBuilder(X86::POPF64)); + return std::move(Instructions); +} + +std::vector<MCInst> +ConstantInliner::loadImplicitRegAndFinalize(unsigned Opcode, unsigned Value) { + add(allocateStackSpace(4)); + add(fillStackSpace(X86::MOV32mi, 0, Value)); // Mask all FP exceptions + add(MCInstBuilder(Opcode) + // Address = ESP + .addReg(X86::RSP) // BaseReg + .addImm(1) // ScaleAmt + .addReg(0) // IndexReg + .addImm(0) // Disp + .addReg(0)); // Segment + add(releaseStackSpace(4)); + return std::move(Instructions); +} + +void ConstantInliner::initStack(unsigned Bytes) { + assert(Constant_.getBitWidth() <= Bytes * 8 && + "Value does not have the correct size"); + const APInt WideConstant = Constant_.getBitWidth() < Bytes * 8 + ? Constant_.sext(Bytes * 8) + : Constant_; + add(allocateStackSpace(Bytes)); + size_t ByteOffset = 0; + for (; Bytes - ByteOffset >= 4; ByteOffset += 4) + add(fillStackSpace( + X86::MOV32mi, ByteOffset, + WideConstant.extractBits(32, ByteOffset * 8).getZExtValue())); + if (Bytes - ByteOffset >= 2) { + add(fillStackSpace( + X86::MOV16mi, ByteOffset, + WideConstant.extractBits(16, ByteOffset * 8).getZExtValue())); + ByteOffset += 2; + } + if (Bytes - ByteOffset >= 1) + add(fillStackSpace( + X86::MOV8mi, ByteOffset, + WideConstant.extractBits(8, ByteOffset * 8).getZExtValue())); +} + +#include "X86GenExegesis.inc" + +namespace { + +class X86SavedState : public ExegesisTarget::SavedState { +public: + X86SavedState() { +#ifdef __x86_64__ +# if defined(_MSC_VER) + _fxsave64(FPState); + Eflags = __readeflags(); +# elif defined(__GNUC__) + __builtin_ia32_fxsave64(FPState); + Eflags = __builtin_ia32_readeflags_u64(); +# endif +#else + llvm_unreachable("X86 exegesis running on non-X86 target"); +#endif + } + + ~X86SavedState() { + // Restoring the X87 state does not flush pending exceptions, make sure + // these exceptions are flushed now. +#ifdef __x86_64__ +# if defined(_MSC_VER) + _clearfp(); + _fxrstor64(FPState); + __writeeflags(Eflags); +# elif defined(__GNUC__) + asm volatile("fwait"); + __builtin_ia32_fxrstor64(FPState); + __builtin_ia32_writeeflags_u64(Eflags); +# endif +#else + llvm_unreachable("X86 exegesis running on non-X86 target"); +#endif + } + +private: +#ifdef __x86_64__ + alignas(16) char FPState[512]; + uint64_t Eflags; +#endif +}; + +class ExegesisX86Target : public ExegesisTarget { +public: + ExegesisX86Target() : ExegesisTarget(X86CpuPfmCounters) {} + + Expected<std::unique_ptr<pfm::Counter>> + createCounter(StringRef CounterName, const LLVMState &State) const override { + // If LbrSamplingPeriod was provided, then ignore the + // CounterName because we only have one for LBR. + if (LbrSamplingPeriod > 0) { + // Can't use LBR without HAVE_LIBPFM, LIBPFM_HAS_FIELD_CYCLES, or without + // __linux__ (for now) +#if defined(HAVE_LIBPFM) && defined(LIBPFM_HAS_FIELD_CYCLES) && \ + defined(__linux__) + return std::make_unique<X86LbrCounter>( + X86LbrPerfEvent(LbrSamplingPeriod)); +#else + return llvm::make_error<llvm::StringError>( + "LBR counter requested without HAVE_LIBPFM, LIBPFM_HAS_FIELD_CYCLES, " + "or running on Linux.", + llvm::errc::invalid_argument); +#endif + } + return ExegesisTarget::createCounter(CounterName, State); + } + +private: + void addTargetSpecificPasses(PassManagerBase &PM) const override; + + unsigned getScratchMemoryRegister(const Triple &TT) const override; + + unsigned getLoopCounterRegister(const Triple &) const override; + + unsigned getMaxMemoryAccessSize() const override { return 64; } + + Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var, + MCOperand &AssignedValue, + const BitVector &ForbiddenRegs) const override; + + void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg, + unsigned Offset) const override; + + void decrementLoopCounterAndJump(MachineBasicBlock &MBB, + MachineBasicBlock &TargetMBB, + const MCInstrInfo &MII) const override; + + std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg, + const APInt &Value) const override; + + ArrayRef<unsigned> getUnavailableRegisters() const override { + return makeArrayRef(kUnavailableRegisters, + sizeof(kUnavailableRegisters) / + sizeof(kUnavailableRegisters[0])); + } + + bool allowAsBackToBack(const Instruction &Instr) const override { + const unsigned Opcode = Instr.Description.Opcode; + return !isInvalidOpcode(Instr) && Opcode != X86::LEA64r && + Opcode != X86::LEA64_32r && Opcode != X86::LEA16r; + } + + std::vector<InstructionTemplate> + generateInstructionVariants(const Instruction &Instr, + unsigned MaxConfigsPerOpcode) const override; + + std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator( + const LLVMState &State, + const SnippetGenerator::Options &Opts) const override { + return std::make_unique<X86SerialSnippetGenerator>(State, Opts); + } + + std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator( + const LLVMState &State, + const SnippetGenerator::Options &Opts) const override { + return std::make_unique<X86ParallelSnippetGenerator>(State, Opts); + } + + bool matchesArch(Triple::ArchType Arch) const override { + return Arch == Triple::x86_64 || Arch == Triple::x86; + } + + Error checkFeatureSupport() const override { + // LBR is the only feature we conditionally support now. + // So if LBR is not requested, then we should be able to run the benchmarks. + if (LbrSamplingPeriod == 0) + return Error::success(); + +#if defined(__linux__) && defined(HAVE_LIBPFM) && \ + defined(LIBPFM_HAS_FIELD_CYCLES) + // FIXME: Fix this. + // https://bugs.llvm.org/show_bug.cgi?id=48918 + // For now, only do the check if we see an Intel machine because + // the counter uses some intel-specific magic and it could + // be confuse and think an AMD machine actually has LBR support. +#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ + defined(_M_X64) + using namespace sys::detail::x86; + + if (getVendorSignature() == VendorSignatures::GENUINE_INTEL) + // If the kernel supports it, the hardware still may not have it. + return X86LbrCounter::checkLbrSupport(); +#else + llvm_unreachable("Running X86 exegesis on non-X86 target"); +#endif +#endif + return llvm::make_error<llvm::StringError>( + "LBR not supported on this kernel and/or platform", + llvm::errc::not_supported); + } + + std::unique_ptr<SavedState> withSavedState() const override { + return std::make_unique<X86SavedState>(); + } + + static const unsigned kUnavailableRegisters[4]; +}; + +// We disable a few registers that cannot be encoded on instructions with a REX +// prefix. +const unsigned ExegesisX86Target::kUnavailableRegisters[4] = {X86::AH, X86::BH, + X86::CH, X86::DH}; + +// We're using one of R8-R15 because these registers are never hardcoded in +// instructions (e.g. MOVS writes to EDI, ESI, EDX), so they have less +// conflicts. +constexpr const unsigned kLoopCounterReg = X86::R8; + +} // namespace + +void ExegesisX86Target::addTargetSpecificPasses(PassManagerBase &PM) const { + // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F. + PM.add(createX86FloatingPointStackifierPass()); +} + +unsigned ExegesisX86Target::getScratchMemoryRegister(const Triple &TT) const { + if (!TT.isArch64Bit()) { + // FIXME: This would require popping from the stack, so we would have to + // add some additional setup code. + return 0; + } + return TT.isOSWindows() ? X86::RCX : X86::RDI; +} + +unsigned ExegesisX86Target::getLoopCounterRegister(const Triple &TT) const { + if (!TT.isArch64Bit()) { + return 0; + } + return kLoopCounterReg; +} + +Error ExegesisX86Target::randomizeTargetMCOperand( + const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue, + const BitVector &ForbiddenRegs) const { + const Operand &Op = Instr.getPrimaryOperand(Var); + switch (Op.getExplicitOperandInfo().OperandType) { + case X86::OperandType::OPERAND_ROUNDING_CONTROL: + AssignedValue = + MCOperand::createImm(randomIndex(X86::STATIC_ROUNDING::TO_ZERO)); + return Error::success(); + default: + break; + } + return make_error<Failure>( + Twine("unimplemented operand type ") + .concat(Twine(Op.getExplicitOperandInfo().OperandType))); +} + +void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT, + unsigned Reg, + unsigned Offset) const { + assert(!isInvalidMemoryInstr(IT.getInstr()) && + "fillMemoryOperands requires a valid memory instruction"); + int MemOpIdx = X86II::getMemoryOperandNo(IT.getInstr().Description.TSFlags); + assert(MemOpIdx >= 0 && "invalid memory operand index"); + // getMemoryOperandNo() ignores tied operands, so we have to add them back. + MemOpIdx += X86II::getOperandBias(IT.getInstr().Description); + setMemOp(IT, MemOpIdx + 0, MCOperand::createReg(Reg)); // BaseReg + setMemOp(IT, MemOpIdx + 1, MCOperand::createImm(1)); // ScaleAmt + setMemOp(IT, MemOpIdx + 2, MCOperand::createReg(0)); // IndexReg + setMemOp(IT, MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp + setMemOp(IT, MemOpIdx + 4, MCOperand::createReg(0)); // Segment +} + +void ExegesisX86Target::decrementLoopCounterAndJump( + MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB, + const MCInstrInfo &MII) const { + BuildMI(&MBB, DebugLoc(), MII.get(X86::ADD64ri8)) + .addDef(kLoopCounterReg) + .addUse(kLoopCounterReg) + .addImm(-1); + BuildMI(&MBB, DebugLoc(), MII.get(X86::JCC_1)) + .addMBB(&TargetMBB) + .addImm(X86::COND_NE); +} + +std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI, + unsigned Reg, + const APInt &Value) const { + if (X86::GR8RegClass.contains(Reg)) + return {loadImmediate(Reg, 8, Value)}; + if (X86::GR16RegClass.contains(Reg)) + return {loadImmediate(Reg, 16, Value)}; + if (X86::GR32RegClass.contains(Reg)) + return {loadImmediate(Reg, 32, Value)}; + if (X86::GR64RegClass.contains(Reg)) + return {loadImmediate(Reg, 64, Value)}; + ConstantInliner CI(Value); + if (X86::VR64RegClass.contains(Reg)) + return CI.loadAndFinalize(Reg, 64, X86::MMX_MOVQ64rm); + if (X86::VR128XRegClass.contains(Reg)) { + if (STI.getFeatureBits()[X86::FeatureAVX512]) + return CI.loadAndFinalize(Reg, 128, X86::VMOVDQU32Z128rm); + if (STI.getFeatureBits()[X86::FeatureAVX]) + return CI.loadAndFinalize(Reg, 128, X86::VMOVDQUrm); + return CI.loadAndFinalize(Reg, 128, X86::MOVDQUrm); + } + if (X86::VR256XRegClass.contains(Reg)) { + if (STI.getFeatureBits()[X86::FeatureAVX512]) + return CI.loadAndFinalize(Reg, 256, X86::VMOVDQU32Z256rm); + if (STI.getFeatureBits()[X86::FeatureAVX]) + return CI.loadAndFinalize(Reg, 256, X86::VMOVDQUYrm); + } + if (X86::VR512RegClass.contains(Reg)) + if (STI.getFeatureBits()[X86::FeatureAVX512]) + return CI.loadAndFinalize(Reg, 512, X86::VMOVDQU32Zrm); + if (X86::RSTRegClass.contains(Reg)) { + return CI.loadX87STAndFinalize(Reg); + } + if (X86::RFP32RegClass.contains(Reg) || X86::RFP64RegClass.contains(Reg) || + X86::RFP80RegClass.contains(Reg)) { + return CI.loadX87FPAndFinalize(Reg); + } + if (Reg == X86::EFLAGS) + return CI.popFlagAndFinalize(); + if (Reg == X86::MXCSR) + return CI.loadImplicitRegAndFinalize( + STI.getFeatureBits()[X86::FeatureAVX] ? X86::VLDMXCSR : X86::LDMXCSR, + 0x1f80); + if (Reg == X86::FPCW) + return CI.loadImplicitRegAndFinalize(X86::FLDCW16m, 0x37f); + return {}; // Not yet implemented. +} + +// Instruction can have some variable operands, and we may want to see how +// different operands affect performance. So for each operand position, +// precompute all the possible choices we might care about, +// and greedily generate all the possible combinations of choices. +std::vector<InstructionTemplate> ExegesisX86Target::generateInstructionVariants( + const Instruction &Instr, unsigned MaxConfigsPerOpcode) const { + bool Exploration = false; + SmallVector<SmallVector<MCOperand, 1>, 4> VariableChoices; + VariableChoices.resize(Instr.Variables.size()); + for (auto I : llvm::zip(Instr.Variables, VariableChoices)) { + const Variable &Var = std::get<0>(I); + SmallVectorImpl<MCOperand> &Choices = std::get<1>(I); + + switch (Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType) { + default: + // We don't wish to explicitly explore this variable. + Choices.emplace_back(); // But add invalid MCOperand to simplify logic. + continue; + case X86::OperandType::OPERAND_COND_CODE: { + Exploration = true; + auto CondCodes = enum_seq_inclusive(X86::CondCode::COND_O, + X86::CondCode::LAST_VALID_COND, + force_iteration_on_noniterable_enum); + Choices.reserve(CondCodes.size()); + for (int CondCode : CondCodes) + Choices.emplace_back(MCOperand::createImm(CondCode)); + break; + } + } + } + + // If we don't wish to explore any variables, defer to the baseline method. + if (!Exploration) + return ExegesisTarget::generateInstructionVariants(Instr, + MaxConfigsPerOpcode); + + std::vector<InstructionTemplate> Variants; + size_t NumVariants; + CombinationGenerator<MCOperand, decltype(VariableChoices)::value_type, 4> G( + VariableChoices); + + // How many operand combinations can we produce, within the limit? + NumVariants = std::min(G.numCombinations(), (size_t)MaxConfigsPerOpcode); + // And actually produce all the wanted operand combinations. + Variants.reserve(NumVariants); + G.generate([&](ArrayRef<MCOperand> State) -> bool { + Variants.emplace_back(&Instr); + Variants.back().setVariableValues(State); + // Did we run out of space for variants? + return Variants.size() >= NumVariants; + }); + + assert(Variants.size() == NumVariants && + Variants.size() <= MaxConfigsPerOpcode && + "Should not produce too many variants"); + return Variants; +} + +static ExegesisTarget *getTheExegesisX86Target() { + static ExegesisX86Target Target; + return &Target; +} + +void InitializeX86ExegesisTarget() { + ExegesisTarget::registerTarget(getTheExegesisX86Target()); +} + +} // namespace exegesis +} // namespace llvm diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/X86/X86Counter.cpp b/contrib/libs/llvm14/tools/llvm-exegesis/lib/X86/X86Counter.cpp new file mode 100644 index 0000000000..a91a2e8ac8 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/X86/X86Counter.cpp @@ -0,0 +1,261 @@ +//===-- X86Counter.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "X86Counter.h" + +#if defined(__linux__) && defined(HAVE_LIBPFM) && \ + defined(LIBPFM_HAS_FIELD_CYCLES) + +// FIXME: Use appropriate wrappers for poll.h and mman.h +// to support Windows and remove this linux-only guard. + +#include "llvm/Support/Endian.h" +#include "llvm/Support/Errc.h" + +#error #include <perfmon/perf_event.h> +#error #include <perfmon/pfmlib.h> +#error #include <perfmon/pfmlib_perf_event.h> + +#include <atomic> +#include <chrono> +#include <cstddef> +#include <cstdint> +#include <limits> +#include <memory> +#include <vector> + +#include <poll.h> +#include <sys/mman.h> +#include <unistd.h> + +namespace llvm { +namespace exegesis { + +// Number of entries in the LBR. +static constexpr int kLbrEntries = 16; +static constexpr size_t kBufferPages = 8; +static const size_t kDataBufferSize = kBufferPages * getpagesize(); + +// First page is reserved for perf_event_mmap_page. Data buffer starts on +// the next page, so we allocate one more page. +static const size_t kMappedBufferSize = (kBufferPages + 1) * getpagesize(); + +// Waits for the LBR perf events. +static int pollLbrPerfEvent(const int FileDescriptor) { + struct pollfd PollFd; + PollFd.fd = FileDescriptor; + PollFd.events = POLLIN; + PollFd.revents = 0; + return poll(&PollFd, 1 /* num of fds */, 10000 /* timeout in ms */); +} + +// Copies the data-buffer into Buf, given the pointer to MMapped. +static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail, + size_t DataSize) { + // First page is reserved for perf_event_mmap_page. Data buffer starts on + // the next page. + char *Start = reinterpret_cast<char *>(MMappedBuffer) + getpagesize(); + // The LBR buffer is a cyclic buffer, we copy data to another buffer. + uint64_t Offset = Tail % kDataBufferSize; + size_t CopySize = kDataBufferSize - Offset; + memcpy(Buf, Start + Offset, CopySize); + if (CopySize >= DataSize) + return; + + memcpy(Buf + CopySize, Start, Offset); + return; +} + +// Parses the given data-buffer for stats and fill the CycleArray. +// If data has been extracted successfully, also modifies the code to jump +// out the benchmark loop. +static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize, + const void *From, const void *To, + llvm::SmallVector<int64_t, 4> *CycleArray) { + const char *DataPtr = DataBuf; + while (DataPtr < DataBuf + DataSize) { + struct perf_event_header Header; + memcpy(&Header, DataPtr, sizeof(struct perf_event_header)); + if (Header.type != PERF_RECORD_SAMPLE) { + // Ignores non-sample records. + DataPtr += Header.size; + continue; + } + DataPtr += sizeof(Header); + uint64_t Count = llvm::support::endian::read64(DataPtr, support::native); + DataPtr += sizeof(Count); + + struct perf_branch_entry Entry; + memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry)); + + // Read the perf_branch_entry array. + for (uint64_t i = 0; i < Count; ++i) { + const uint64_t BlockStart = From == nullptr + ? std::numeric_limits<uint64_t>::min() + : reinterpret_cast<uint64_t>(From); + const uint64_t BlockEnd = To == nullptr + ? std::numeric_limits<uint64_t>::max() + : reinterpret_cast<uint64_t>(To); + + if (BlockStart <= Entry.from && BlockEnd >= Entry.to) + CycleArray->push_back(Entry.cycles); + + if (i == Count - 1) + // We've reached the last entry. + return llvm::Error::success(); + + // Advance to next entry + DataPtr += sizeof(Entry); + memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry)); + } + } + return llvm::make_error<llvm::StringError>("Unable to parse databuffer.", + llvm::errc::io_error); +} + +X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) { + assert(SamplingPeriod > 0 && "SamplingPeriod must be positive"); + EventString = "BR_INST_RETIRED.NEAR_TAKEN"; + Attr = new perf_event_attr(); + Attr->size = sizeof(*Attr); + Attr->type = PERF_TYPE_RAW; + // FIXME This is SKL's encoding. Not sure if it'll change. + Attr->config = 0x20c4; // BR_INST_RETIRED.NEAR_TAKEN + Attr->sample_type = PERF_SAMPLE_BRANCH_STACK; + // Don't need to specify "USER" because we've already excluded HV and Kernel. + Attr->branch_sample_type = PERF_SAMPLE_BRANCH_ANY; + Attr->sample_period = SamplingPeriod; + Attr->wakeup_events = 1; // We need this even when using ioctl REFRESH. + Attr->disabled = 1; + Attr->exclude_kernel = 1; + Attr->exclude_hv = 1; + Attr->read_format = PERF_FORMAT_GROUP; + + FullQualifiedEventString = EventString; +} + +X86LbrCounter::X86LbrCounter(pfm::PerfEvent &&NewEvent) + : Counter(std::move(NewEvent)) { + MMappedBuffer = mmap(nullptr, kMappedBufferSize, PROT_READ | PROT_WRITE, + MAP_SHARED, FileDescriptor, 0); + if (MMappedBuffer == MAP_FAILED) + llvm::errs() << "Failed to mmap buffer."; +} + +X86LbrCounter::~X86LbrCounter() { + if (0 != munmap(MMappedBuffer, kMappedBufferSize)) + llvm::errs() << "Failed to munmap buffer."; +} + +void X86LbrCounter::start() { + ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */); +} + +llvm::Error X86LbrCounter::checkLbrSupport() { + // Do a sample read and check if the results contain non-zero values. + + X86LbrCounter counter(X86LbrPerfEvent(123)); + counter.start(); + + // Prevent the compiler from unrolling the loop and get rid of all the + // branches. We need at least 16 iterations. + int Sum = 0; + int V = 1; + + volatile int *P = &V; + auto TimeLimit = + std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5); + + for (int I = 0; + I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit; + ++I) { + Sum += *P; + } + + counter.stop(); + (void)Sum; + + auto ResultOrError = counter.doReadCounter(nullptr, nullptr); + if (ResultOrError) + if (!ResultOrError.get().empty()) + // If there is at least one non-zero entry, then LBR is supported. + for (const int64_t &Value : ResultOrError.get()) + if (Value != 0) + return Error::success(); + + return llvm::make_error<llvm::StringError>( + "LBR format with cycles is not suppported on the host.", + llvm::errc::not_supported); +} + +llvm::Expected<llvm::SmallVector<int64_t, 4>> +X86LbrCounter::readOrError(StringRef FunctionBytes) const { + // Disable the event before reading + ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0); + + // Find the boundary of the function so that we could filter the LBRs + // to keep only the relevant records. + if (FunctionBytes.empty()) + return llvm::make_error<llvm::StringError>("Empty function bytes", + llvm::errc::invalid_argument); + const void *From = reinterpret_cast<const void *>(FunctionBytes.data()); + const void *To = reinterpret_cast<const void *>(FunctionBytes.data() + + FunctionBytes.size()); + return doReadCounter(From, To); +} + +llvm::Expected<llvm::SmallVector<int64_t, 4>> +X86LbrCounter::doReadCounter(const void *From, const void *To) const { + // The max number of time-outs/retries before we give up. + static constexpr int kMaxTimeouts = 160; + + // Parses the LBR buffer and fills CycleArray with the sequence of cycle + // counts from the buffer. + llvm::SmallVector<int64_t, 4> CycleArray; + auto DataBuf = std::make_unique<char[]>(kDataBufferSize); + int NumTimeouts = 0; + int PollResult = 0; + + while (PollResult <= 0) { + PollResult = pollLbrPerfEvent(FileDescriptor); + if (PollResult > 0) + break; + if (PollResult == -1) + return llvm::make_error<llvm::StringError>("Cannot poll LBR perf event.", + llvm::errc::io_error); + if (NumTimeouts++ >= kMaxTimeouts) + return llvm::make_error<llvm::StringError>( + "LBR polling still timed out after max number of attempts.", + llvm::errc::device_or_resource_busy); + } + + struct perf_event_mmap_page Page; + memcpy(&Page, MMappedBuffer, sizeof(struct perf_event_mmap_page)); + + const uint64_t DataTail = Page.data_tail; + const uint64_t DataHead = Page.data_head; + // We're supposed to use a barrier after reading data_head. + std::atomic_thread_fence(std::memory_order_acq_rel); + const size_t DataSize = DataHead - DataTail; + if (DataSize > kDataBufferSize) + return llvm::make_error<llvm::StringError>( + "DataSize larger than buffer size.", llvm::errc::invalid_argument); + + copyDataBuffer(MMappedBuffer, DataBuf.get(), DataTail, DataSize); + llvm::Error error = + parseDataBuffer(DataBuf.get(), DataSize, From, To, &CycleArray); + if (!error) + return CycleArray; + return std::move(error); +} + +} // namespace exegesis +} // namespace llvm + +#endif // defined(__linux__) && defined(HAVE_LIBPFM) && + // defined(LIBPFM_HAS_FIELD_CYCLES) diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/X86/X86Counter.h b/contrib/libs/llvm14/tools/llvm-exegesis/lib/X86/X86Counter.h new file mode 100644 index 0000000000..73e4dc5b99 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/X86/X86Counter.h @@ -0,0 +1,60 @@ +//===-- X86Counter.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Perf counter that reads the LBRs for measuring the benchmarked block's +/// throughput. +/// +/// More info at: https://lwn.net/Articles/680985 +//===----------------------------------------------------------------------===// +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_LIB_X86_X86COUNTER_H +#define LLVM_TOOLS_LLVM_EXEGESIS_LIB_X86_X86COUNTER_H + +#include "../PerfHelper.h" +#include "llvm/Support/Error.h" + +// FIXME: Use appropriate wrappers for poll.h and mman.h +// to support Windows and remove this linux-only guard. +#if defined(__linux__) && defined(HAVE_LIBPFM) && \ + defined(LIBPFM_HAS_FIELD_CYCLES) + +namespace llvm { +namespace exegesis { + +class X86LbrPerfEvent : public pfm::PerfEvent { +public: + X86LbrPerfEvent(unsigned SamplingPeriod); +}; + +class X86LbrCounter : public pfm::Counter { +public: + static llvm::Error checkLbrSupport(); + + explicit X86LbrCounter(pfm::PerfEvent &&Event); + + virtual ~X86LbrCounter(); + + void start() override; + + llvm::Expected<llvm::SmallVector<int64_t, 4>> + readOrError(StringRef FunctionBytes) const override; + +private: + llvm::Expected<llvm::SmallVector<int64_t, 4>> + doReadCounter(const void *From, const void *To) const; + + void *MMappedBuffer = nullptr; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // defined(__linux__) && defined(HAVE_LIBPFM) && + // defined(LIBPFM_HAS_FIELD_CYCLES) + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_LIB_X86_X86COUNTER_H diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/X86/ya.make b/contrib/libs/llvm14/tools/llvm-exegesis/lib/X86/ya.make new file mode 100644 index 0000000000..de6d676424 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/X86/ya.make @@ -0,0 +1,38 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/libs/llvm14 + contrib/libs/llvm14/include + contrib/libs/llvm14/lib/CodeGen + contrib/libs/llvm14/lib/IR + contrib/libs/llvm14/lib/Support + contrib/libs/llvm14/lib/Target/X86 + contrib/libs/llvm14/lib/Target/X86/AsmParser + contrib/libs/llvm14/lib/Target/X86/Disassembler + contrib/libs/llvm14/lib/Target/X86/MCTargetDesc + contrib/libs/llvm14/lib/Target/X86/TargetInfo + contrib/libs/llvm14/tools/llvm-exegesis/lib +) + +ADDINCL( + ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm14/lib/Target/X86 + contrib/libs/llvm14/lib/Target/X86 + contrib/libs/llvm14/tools/llvm-exegesis/lib/X86 +) + +NO_COMPILER_WARNINGS() + +NO_UTIL() + +SRCS( + Target.cpp + X86Counter.cpp +) + +END() diff --git a/contrib/libs/llvm14/tools/llvm-exegesis/lib/ya.make b/contrib/libs/llvm14/tools/llvm-exegesis/lib/ya.make new file mode 100644 index 0000000000..18733ffb03 --- /dev/null +++ b/contrib/libs/llvm14/tools/llvm-exegesis/lib/ya.make @@ -0,0 +1,59 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/libs/llvm14 + contrib/libs/llvm14/include + contrib/libs/llvm14/lib/Analysis + contrib/libs/llvm14/lib/CodeGen + contrib/libs/llvm14/lib/CodeGen/GlobalISel + contrib/libs/llvm14/lib/ExecutionEngine + contrib/libs/llvm14/lib/ExecutionEngine/MCJIT + contrib/libs/llvm14/lib/ExecutionEngine/RuntimeDyld + contrib/libs/llvm14/lib/IR + contrib/libs/llvm14/lib/MC + contrib/libs/llvm14/lib/MC/MCDisassembler + contrib/libs/llvm14/lib/MC/MCParser + contrib/libs/llvm14/lib/Object + contrib/libs/llvm14/lib/ObjectYAML + contrib/libs/llvm14/lib/Support +) + +ADDINCL( + contrib/libs/llvm14/tools/llvm-exegesis/lib +) + +NO_COMPILER_WARNINGS() + +NO_UTIL() + +SRCS( + Analysis.cpp + Assembler.cpp + BenchmarkResult.cpp + BenchmarkRunner.cpp + Clustering.cpp + CodeTemplate.cpp + Error.cpp + LatencyBenchmarkRunner.cpp + LlvmState.cpp + MCInstrDescView.cpp + ParallelSnippetGenerator.cpp + PerfHelper.cpp + RegisterAliasing.cpp + RegisterValue.cpp + SchedClassResolution.cpp + SerialSnippetGenerator.cpp + SnippetFile.cpp + SnippetGenerator.cpp + SnippetRepetitor.cpp + Target.cpp + UopsBenchmarkRunner.cpp +) + +END() |