aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm14/tools/llvm-profgen
diff options
context:
space:
mode:
authorvitalyisaev <vitalyisaev@yandex-team.com>2023-06-29 10:00:50 +0300
committervitalyisaev <vitalyisaev@yandex-team.com>2023-06-29 10:00:50 +0300
commit6ffe9e53658409f212834330e13564e4952558f6 (patch)
tree85b1e00183517648b228aafa7c8fb07f5276f419 /contrib/libs/llvm14/tools/llvm-profgen
parent726057070f9c5a91fc10fde0d5024913d10f1ab9 (diff)
downloadydb-6ffe9e53658409f212834330e13564e4952558f6.tar.gz
YQ Connector: support managed ClickHouse
Со стороны dqrun можно обратиться к инстансу коннектора, который работает на streaming стенде, и извлечь данные из облачного CH.
Diffstat (limited to 'contrib/libs/llvm14/tools/llvm-profgen')
-rw-r--r--contrib/libs/llvm14/tools/llvm-profgen/CSPreInliner.cpp285
-rw-r--r--contrib/libs/llvm14/tools/llvm-profgen/CSPreInliner.h95
-rw-r--r--contrib/libs/llvm14/tools/llvm-profgen/CallContext.h59
-rw-r--r--contrib/libs/llvm14/tools/llvm-profgen/ErrorHandling.h56
-rw-r--r--contrib/libs/llvm14/tools/llvm-profgen/PerfReader.cpp1222
-rw-r--r--contrib/libs/llvm14/tools/llvm-profgen/PerfReader.h728
-rw-r--r--contrib/libs/llvm14/tools/llvm-profgen/ProfileGenerator.cpp979
-rw-r--r--contrib/libs/llvm14/tools/llvm-profgen/ProfileGenerator.h312
-rw-r--r--contrib/libs/llvm14/tools/llvm-profgen/ProfiledBinary.cpp790
-rw-r--r--contrib/libs/llvm14/tools/llvm-profgen/ProfiledBinary.h541
-rw-r--r--contrib/libs/llvm14/tools/llvm-profgen/llvm-profgen.cpp164
-rw-r--r--contrib/libs/llvm14/tools/llvm-profgen/ya.make80
12 files changed, 5311 insertions, 0 deletions
diff --git a/contrib/libs/llvm14/tools/llvm-profgen/CSPreInliner.cpp b/contrib/libs/llvm14/tools/llvm-profgen/CSPreInliner.cpp
new file mode 100644
index 0000000000..1e64263990
--- /dev/null
+++ b/contrib/libs/llvm14/tools/llvm-profgen/CSPreInliner.cpp
@@ -0,0 +1,285 @@
+//===-- CSPreInliner.cpp - Profile guided preinliner -------------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSPreInliner.h"
+#include "ProfiledBinary.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <cstdint>
+#include <queue>
+
+#define DEBUG_TYPE "cs-preinliner"
+
+using namespace llvm;
+using namespace sampleprof;
+
+STATISTIC(PreInlNumCSInlined,
+ "Number of functions inlined with context sensitive profile");
+STATISTIC(PreInlNumCSNotInlined,
+ "Number of functions not inlined with context sensitive profile");
+STATISTIC(PreInlNumCSInlinedHitMinLimit,
+ "Number of functions with FDO inline stopped due to min size limit");
+STATISTIC(PreInlNumCSInlinedHitMaxLimit,
+ "Number of functions with FDO inline stopped due to max size limit");
+STATISTIC(
+ PreInlNumCSInlinedHitGrowthLimit,
+ "Number of functions with FDO inline stopped due to growth size limit");
+
+// The switches specify inline thresholds used in SampleProfileLoader inlining.
+// TODO: the actual threshold to be tuned here because the size here is based
+// on machine code not LLVM IR.
+extern cl::opt<int> SampleHotCallSiteThreshold;
+extern cl::opt<int> SampleColdCallSiteThreshold;
+extern cl::opt<int> ProfileInlineGrowthLimit;
+extern cl::opt<int> ProfileInlineLimitMin;
+extern cl::opt<int> ProfileInlineLimitMax;
+extern cl::opt<bool> SortProfiledSCC;
+
+cl::opt<bool> EnableCSPreInliner(
+ "csspgo-preinliner", cl::Hidden, cl::init(true),
+ cl::desc("Run a global pre-inliner to merge context profile based on "
+ "estimated global top-down inline decisions"));
+
+cl::opt<bool> UseContextCostForPreInliner(
+ "use-context-cost-for-preinliner", cl::Hidden, cl::init(true),
+ cl::desc("Use context-sensitive byte size cost for preinliner decisions"));
+
+static cl::opt<bool> SamplePreInlineReplay(
+ "csspgo-replay-preinline", cl::Hidden, cl::init(false),
+ cl::desc(
+ "Replay previous inlining and adjust context profile accordingly"));
+
+CSPreInliner::CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary,
+ uint64_t HotThreshold, uint64_t ColdThreshold)
+ : UseContextCost(UseContextCostForPreInliner),
+ // TODO: Pass in a guid-to-name map in order for
+ // ContextTracker.getFuncNameFor to work, if `Profiles` can have md5 codes
+ // as their profile context.
+ ContextTracker(Profiles, nullptr), ProfileMap(Profiles), Binary(Binary),
+ HotCountThreshold(HotThreshold), ColdCountThreshold(ColdThreshold) {
+ // Set default preinliner hot/cold call site threshold tuned with CSSPGO.
+ // for good performance with reasonable profile size.
+ if (!SampleHotCallSiteThreshold.getNumOccurrences())
+ SampleHotCallSiteThreshold = 1500;
+ if (!SampleColdCallSiteThreshold.getNumOccurrences())
+ SampleColdCallSiteThreshold = 0;
+}
+
+std::vector<StringRef> CSPreInliner::buildTopDownOrder() {
+ std::vector<StringRef> Order;
+ ProfiledCallGraph ProfiledCG(ContextTracker);
+
+ // Now that we have a profiled call graph, construct top-down order
+ // by building up SCC and reversing SCC order.
+ scc_iterator<ProfiledCallGraph *> I = scc_begin(&ProfiledCG);
+ while (!I.isAtEnd()) {
+ auto Range = *I;
+ if (SortProfiledSCC) {
+ // Sort nodes in one SCC based on callsite hotness.
+ scc_member_iterator<ProfiledCallGraph *> SI(*I);
+ Range = *SI;
+ }
+ for (auto *Node : Range) {
+ if (Node != ProfiledCG.getEntryNode())
+ Order.push_back(Node->Name);
+ }
+ ++I;
+ }
+ std::reverse(Order.begin(), Order.end());
+
+ return Order;
+}
+
+bool CSPreInliner::getInlineCandidates(ProfiledCandidateQueue &CQueue,
+ const FunctionSamples *CallerSamples) {
+ assert(CallerSamples && "Expect non-null caller samples");
+
+ // Ideally we want to consider everything a function calls, but as far as
+ // context profile is concerned, only those frames that are children of
+ // current one in the trie is relavent. So we walk the trie instead of call
+ // targets from function profile.
+ ContextTrieNode *CallerNode =
+ ContextTracker.getContextFor(CallerSamples->getContext());
+
+ bool HasNewCandidate = false;
+ for (auto &Child : CallerNode->getAllChildContext()) {
+ ContextTrieNode *CalleeNode = &Child.second;
+ FunctionSamples *CalleeSamples = CalleeNode->getFunctionSamples();
+ if (!CalleeSamples)
+ continue;
+
+ // Call site count is more reliable, so we look up the corresponding call
+ // target profile in caller's context profile to retrieve call site count.
+ uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples();
+ uint64_t CallsiteCount = 0;
+ LineLocation Callsite = CalleeNode->getCallSiteLoc();
+ if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) {
+ SampleRecord::CallTargetMap &TargetCounts = CallTargets.get();
+ auto It = TargetCounts.find(CalleeSamples->getName());
+ if (It != TargetCounts.end())
+ CallsiteCount = It->second;
+ }
+
+ // TODO: call site and callee entry count should be mostly consistent, add
+ // check for that.
+ HasNewCandidate = true;
+ uint32_t CalleeSize = getFuncSize(*CalleeSamples);
+ CQueue.emplace(CalleeSamples, std::max(CallsiteCount, CalleeEntryCount),
+ CalleeSize);
+ }
+
+ return HasNewCandidate;
+}
+
+uint32_t CSPreInliner::getFuncSize(const FunctionSamples &FSamples) {
+ if (UseContextCost) {
+ return Binary.getFuncSizeForContext(FSamples.getContext());
+ }
+
+ return FSamples.getBodySamples().size();
+}
+
+bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) {
+ // If replay inline is requested, simply follow the inline decision of the
+ // profiled binary.
+ if (SamplePreInlineReplay)
+ return Candidate.CalleeSamples->getContext().hasAttribute(
+ ContextWasInlined);
+
+ // Adjust threshold based on call site hotness, only do this for callsite
+ // prioritized inliner because otherwise cost-benefit check is done earlier.
+ unsigned int SampleThreshold = SampleColdCallSiteThreshold;
+ if (Candidate.CallsiteCount > HotCountThreshold)
+ SampleThreshold = SampleHotCallSiteThreshold;
+
+ // TODO: for small cold functions, we may inlined them and we need to keep
+ // context profile accordingly.
+ if (Candidate.CallsiteCount < ColdCountThreshold)
+ SampleThreshold = SampleColdCallSiteThreshold;
+
+ return (Candidate.SizeCost < SampleThreshold);
+}
+
+void CSPreInliner::processFunction(const StringRef Name) {
+ FunctionSamples *FSamples = ContextTracker.getBaseSamplesFor(Name);
+ if (!FSamples)
+ return;
+
+ unsigned FuncSize = getFuncSize(*FSamples);
+ unsigned FuncFinalSize = FuncSize;
+ unsigned SizeLimit = FuncSize * ProfileInlineGrowthLimit;
+ SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
+ SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
+
+ LLVM_DEBUG(dbgs() << "Process " << Name
+ << " for context-sensitive pre-inlining (pre-inline size: "
+ << FuncSize << ", size limit: " << SizeLimit << ")\n");
+
+ ProfiledCandidateQueue CQueue;
+ getInlineCandidates(CQueue, FSamples);
+
+ while (!CQueue.empty() && FuncFinalSize < SizeLimit) {
+ ProfiledInlineCandidate Candidate = CQueue.top();
+ CQueue.pop();
+ bool ShouldInline = false;
+ if ((ShouldInline = shouldInline(Candidate))) {
+ // We mark context as inlined as the corresponding context profile
+ // won't be merged into that function's base profile.
+ ++PreInlNumCSInlined;
+ ContextTracker.markContextSamplesInlined(Candidate.CalleeSamples);
+ Candidate.CalleeSamples->getContext().setAttribute(
+ ContextShouldBeInlined);
+ FuncFinalSize += Candidate.SizeCost;
+ getInlineCandidates(CQueue, Candidate.CalleeSamples);
+ } else {
+ ++PreInlNumCSNotInlined;
+ }
+ LLVM_DEBUG(dbgs() << (ShouldInline ? " Inlined" : " Outlined")
+ << " context profile for: "
+ << Candidate.CalleeSamples->getContext().toString()
+ << " (callee size: " << Candidate.SizeCost
+ << ", call count:" << Candidate.CallsiteCount << ")\n");
+ }
+
+ if (!CQueue.empty()) {
+ if (SizeLimit == (unsigned)ProfileInlineLimitMax)
+ ++PreInlNumCSInlinedHitMaxLimit;
+ else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
+ ++PreInlNumCSInlinedHitMinLimit;
+ else
+ ++PreInlNumCSInlinedHitGrowthLimit;
+ }
+
+ LLVM_DEBUG({
+ if (!CQueue.empty())
+ dbgs() << " Inline candidates ignored due to size limit (inliner "
+ "original size: "
+ << FuncSize << ", inliner final size: " << FuncFinalSize
+ << ", size limit: " << SizeLimit << ")\n";
+
+ while (!CQueue.empty()) {
+ ProfiledInlineCandidate Candidate = CQueue.top();
+ CQueue.pop();
+ bool WasInlined =
+ Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined);
+ dbgs() << " " << Candidate.CalleeSamples->getContext().toString()
+ << " (candidate size:" << Candidate.SizeCost
+ << ", call count: " << Candidate.CallsiteCount << ", previously "
+ << (WasInlined ? "inlined)\n" : "not inlined)\n");
+ }
+ });
+}
+
+void CSPreInliner::run() {
+#ifndef NDEBUG
+ auto printProfileNames = [](SampleProfileMap &Profiles, bool IsInput) {
+ dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles ("
+ << Profiles.size() << " total):\n";
+ for (auto &It : Profiles) {
+ const FunctionSamples &Samples = It.second;
+ dbgs() << " [" << Samples.getContext().toString() << "] "
+ << Samples.getTotalSamples() << ":" << Samples.getHeadSamples()
+ << "\n";
+ }
+ };
+#endif
+
+ LLVM_DEBUG(printProfileNames(ProfileMap, true));
+
+ // Execute global pre-inliner to estimate a global top-down inline
+ // decision and merge profiles accordingly. This helps with profile
+ // merge for ThinLTO otherwise we won't be able to merge profiles back
+ // to base profile across module/thin-backend boundaries.
+ // It also helps better compress context profile to control profile
+ // size, as we now only need context profile for functions going to
+ // be inlined.
+ for (StringRef FuncName : buildTopDownOrder()) {
+ processFunction(FuncName);
+ }
+
+ // Not inlined context profiles are merged into its base, so we can
+ // trim out such profiles from the output.
+ std::vector<SampleContext> ProfilesToBeRemoved;
+ for (auto &It : ProfileMap) {
+ SampleContext &Context = It.second.getContext();
+ if (!Context.isBaseContext() && !Context.hasState(InlinedContext)) {
+ assert(Context.hasState(MergedContext) &&
+ "Not inlined context profile should be merged already");
+ ProfilesToBeRemoved.push_back(It.first);
+ }
+ }
+
+ for (auto &ContextName : ProfilesToBeRemoved) {
+ ProfileMap.erase(ContextName);
+ }
+
+ // Make sure ProfileMap's key is consistent with FunctionSamples' name.
+ SampleContextTrimmer(ProfileMap).canonicalizeContextProfiles();
+
+ LLVM_DEBUG(printProfileNames(ProfileMap, false));
+}
diff --git a/contrib/libs/llvm14/tools/llvm-profgen/CSPreInliner.h b/contrib/libs/llvm14/tools/llvm-profgen/CSPreInliner.h
new file mode 100644
index 0000000000..9f63f7ef7b
--- /dev/null
+++ b/contrib/libs/llvm14/tools/llvm-profgen/CSPreInliner.h
@@ -0,0 +1,95 @@
+//===-- CSPreInliner.h - Profile guided preinliner ---------------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_PROFGEN_PGOINLINEADVISOR_H
+#define LLVM_TOOLS_LLVM_PROFGEN_PGOINLINEADVISOR_H
+
+#include "ProfiledBinary.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Transforms/IPO/ProfiledCallGraph.h"
+#include "llvm/Transforms/IPO/SampleContextTracker.h"
+
+using namespace llvm;
+using namespace sampleprof;
+
+namespace llvm {
+namespace sampleprof {
+
+// Inline candidate seen from profile
+struct ProfiledInlineCandidate {
+ ProfiledInlineCandidate(const FunctionSamples *Samples, uint64_t Count,
+ uint32_t Size)
+ : CalleeSamples(Samples), CallsiteCount(Count), SizeCost(Size) {}
+ // Context-sensitive function profile for inline candidate
+ const FunctionSamples *CalleeSamples;
+ // Call site count for an inline candidate
+ // TODO: make sure entry count for context profile and call site
+ // target count for corresponding call are consistent.
+ uint64_t CallsiteCount;
+ // Size proxy for function under particular call context.
+ uint64_t SizeCost;
+};
+
+// Inline candidate comparer using call site weight
+struct ProfiledCandidateComparer {
+ bool operator()(const ProfiledInlineCandidate &LHS,
+ const ProfiledInlineCandidate &RHS) {
+ if (LHS.CallsiteCount != RHS.CallsiteCount)
+ return LHS.CallsiteCount < RHS.CallsiteCount;
+
+ if (LHS.SizeCost != RHS.SizeCost)
+ return LHS.SizeCost > RHS.SizeCost;
+
+ // Tie breaker using GUID so we have stable/deterministic inlining order
+ assert(LHS.CalleeSamples && RHS.CalleeSamples &&
+ "Expect non-null FunctionSamples");
+ return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) <
+ RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName());
+ }
+};
+
+using ProfiledCandidateQueue =
+ PriorityQueue<ProfiledInlineCandidate, std::vector<ProfiledInlineCandidate>,
+ ProfiledCandidateComparer>;
+
+// Pre-compilation inliner based on context-sensitive profile.
+// The PreInliner estimates inline decision using hotness from profile
+// and cost estimation from machine code size. It helps merges context
+// profile globally and achieves better post-inine profile quality, which
+// otherwise won't be possible for ThinLTO. It also reduce context profile
+// size by only keep context that is estimated to be inlined.
+class CSPreInliner {
+public:
+ CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary,
+ uint64_t HotThreshold, uint64_t ColdThreshold);
+ void run();
+
+private:
+ bool getInlineCandidates(ProfiledCandidateQueue &CQueue,
+ const FunctionSamples *FCallerContextSamples);
+ std::vector<StringRef> buildTopDownOrder();
+ void processFunction(StringRef Name);
+ bool shouldInline(ProfiledInlineCandidate &Candidate);
+ uint32_t getFuncSize(const FunctionSamples &FSamples);
+ bool UseContextCost;
+ SampleContextTracker ContextTracker;
+ SampleProfileMap &ProfileMap;
+ ProfiledBinary &Binary;
+
+ // Count thresholds to answer isHotCount and isColdCount queries.
+ // Mirrors the threshold in ProfileSummaryInfo.
+ uint64_t HotCountThreshold;
+ uint64_t ColdCountThreshold;
+};
+
+} // end namespace sampleprof
+} // end namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm14/tools/llvm-profgen/CallContext.h b/contrib/libs/llvm14/tools/llvm-profgen/CallContext.h
new file mode 100644
index 0000000000..5e552130d0
--- /dev/null
+++ b/contrib/libs/llvm14/tools/llvm-profgen/CallContext.h
@@ -0,0 +1,59 @@
+//===-- CallContext.h - Call Context Handler ---------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H
+#define LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H
+
+#include "llvm/ProfileData/SampleProf.h"
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace sampleprof {
+
+inline std::string getCallSite(const SampleContextFrame &Callsite) {
+ std::string CallsiteStr = Callsite.FuncName.str();
+ CallsiteStr += ":";
+ CallsiteStr += Twine(Callsite.Location.LineOffset).str();
+ if (Callsite.Location.Discriminator > 0) {
+ CallsiteStr += ".";
+ CallsiteStr += Twine(Callsite.Location.Discriminator).str();
+ }
+ return CallsiteStr;
+}
+
+// TODO: This operation is expansive. If it ever gets called multiple times we
+// may think of making a class wrapper with internal states for it.
+inline std::string getLocWithContext(const SampleContextFrameVector &Context) {
+ std::ostringstream OContextStr;
+ for (const auto &Callsite : Context) {
+ if (OContextStr.str().size())
+ OContextStr << " @ ";
+ OContextStr << getCallSite(Callsite);
+ }
+ return OContextStr.str();
+}
+
+// Reverse call context, i.e., in the order of callee frames to caller frames,
+// is useful during instruction printing or pseudo probe printing.
+inline std::string
+getReversedLocWithContext(const SampleContextFrameVector &Context) {
+ std::ostringstream OContextStr;
+ for (const auto &Callsite : reverse(Context)) {
+ if (OContextStr.str().size())
+ OContextStr << " @ ";
+ OContextStr << getCallSite(Callsite);
+ }
+ return OContextStr.str();
+}
+
+} // end namespace sampleprof
+} // end namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm14/tools/llvm-profgen/ErrorHandling.h b/contrib/libs/llvm14/tools/llvm-profgen/ErrorHandling.h
new file mode 100644
index 0000000000..b797add8a8
--- /dev/null
+++ b/contrib/libs/llvm14/tools/llvm-profgen/ErrorHandling.h
@@ -0,0 +1,56 @@
+//===-- ErrorHandling.h - Error handler -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H
+#define LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/WithColor.h"
+#include <system_error>
+
+using namespace llvm;
+
+[[noreturn]] inline void exitWithError(const Twine &Message,
+ StringRef Whence = StringRef(),
+ StringRef Hint = StringRef()) {
+ WithColor::error(errs(), "llvm-profgen");
+ if (!Whence.empty())
+ errs() << Whence.str() << ": ";
+ errs() << Message << "\n";
+ if (!Hint.empty())
+ WithColor::note() << Hint.str() << "\n";
+ ::exit(EXIT_FAILURE);
+}
+
+[[noreturn]] inline void exitWithError(std::error_code EC,
+ StringRef Whence = StringRef()) {
+ exitWithError(EC.message(), Whence);
+}
+
+[[noreturn]] inline void exitWithError(Error E, StringRef Whence) {
+ exitWithError(errorToErrorCode(std::move(E)), Whence);
+}
+
+template <typename T, typename... Ts>
+T unwrapOrError(Expected<T> EO, Ts &&... Args) {
+ if (EO)
+ return std::move(*EO);
+ exitWithError(EO.takeError(), std::forward<Ts>(Args)...);
+}
+
+inline void emitWarningSummary(uint64_t Num, uint64_t Total, StringRef Msg) {
+ if (!Total || !Num)
+ return;
+ WithColor::warning() << format("%.2f", static_cast<double>(Num) * 100 / Total)
+ << "%(" << Num << "/" << Total << ") " << Msg << "\n";
+}
+
+#endif
diff --git a/contrib/libs/llvm14/tools/llvm-profgen/PerfReader.cpp b/contrib/libs/llvm14/tools/llvm-profgen/PerfReader.cpp
new file mode 100644
index 0000000000..98b4c7cdf1
--- /dev/null
+++ b/contrib/libs/llvm14/tools/llvm-profgen/PerfReader.cpp
@@ -0,0 +1,1222 @@
+//===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "PerfReader.h"
+#include "ProfileGenerator.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Process.h"
+
+#define DEBUG_TYPE "perf-reader"
+
+cl::opt<bool> SkipSymbolization("skip-symbolization", cl::init(false),
+ cl::ZeroOrMore,
+ cl::desc("Dump the unsymbolized profile to the "
+ "output file. It will show unwinder "
+ "output for CS profile generation."));
+
+static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::init(false),
+ cl::ZeroOrMore,
+ cl::desc("Print binary load events."));
+
+static cl::opt<bool>
+ UseOffset("use-offset", cl::init(true), cl::ZeroOrMore,
+ cl::desc("Work with `--skip-symbolization` or "
+ "`--unsymbolized-profile` to write/read the "
+ "offset instead of virtual address."));
+
+static cl::opt<bool> UseLoadableSegmentAsBase(
+ "use-first-loadable-segment-as-base", cl::init(false), cl::ZeroOrMore,
+ cl::desc("Use first loadable segment address as base address "
+ "for offsets in unsymbolized profile. By default "
+ "first executable segment address is used"));
+
+static cl::opt<bool>
+ IgnoreStackSamples("ignore-stack-samples", cl::init(false), cl::ZeroOrMore,
+ cl::desc("Ignore call stack samples for hybrid samples "
+ "and produce context-insensitive profile."));
+cl::opt<bool> ShowDetailedWarning("show-detailed-warning", cl::init(false),
+ cl::ZeroOrMore,
+ cl::desc("Show detailed warning message."));
+
+extern cl::opt<std::string> PerfTraceFilename;
+extern cl::opt<bool> ShowDisassemblyOnly;
+extern cl::opt<bool> ShowSourceLocations;
+extern cl::opt<std::string> OutputFilename;
+
+namespace llvm {
+namespace sampleprof {
+
+void VirtualUnwinder::unwindCall(UnwindState &State) {
+ uint64_t Source = State.getCurrentLBRSource();
+ // An artificial return should push an external frame and an artificial call
+ // will match it and pop the external frame so that the context before and
+ // after the external call will be the same.
+ if (State.getCurrentLBR().IsArtificial) {
+ NumExtCallBranch++;
+ // A return is matched and pop the external frame.
+ if (State.getParentFrame()->isExternalFrame()) {
+ State.popFrame();
+ } else {
+ // An artificial return is missing, it happens that the sample is just hit
+ // in the middle of the external code. In this case, the leading branch is
+ // a call to external, we just keep unwinding use a context-less stack.
+ if (State.getParentFrame() != State.getDummyRootPtr())
+ NumMissingExternalFrame++;
+ State.clearCallStack();
+ State.pushFrame(Source);
+ State.InstPtr.update(Source);
+ return;
+ }
+ }
+
+ auto *ParentFrame = State.getParentFrame();
+ // The 2nd frame after leaf could be missing if stack sample is
+ // taken when IP is within prolog/epilog, as frame chain isn't
+ // setup yet. Fill in the missing frame in that case.
+ // TODO: Currently we just assume all the addr that can't match the
+ // 2nd frame is in prolog/epilog. In the future, we will switch to
+ // pro/epi tracker(Dwarf CFI) for the precise check.
+ if (ParentFrame == State.getDummyRootPtr() ||
+ ParentFrame->Address != Source) {
+ State.switchToFrame(Source);
+ if (ParentFrame != State.getDummyRootPtr()) {
+ if (State.getCurrentLBR().IsArtificial)
+ NumMismatchedExtCallBranch++;
+ else
+ NumMismatchedProEpiBranch++;
+ }
+ } else {
+ State.popFrame();
+ }
+ State.InstPtr.update(Source);
+}
+
+void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
+ InstructionPointer &IP = State.InstPtr;
+ uint64_t Target = State.getCurrentLBRTarget();
+ uint64_t End = IP.Address;
+ if (Binary->usePseudoProbes()) {
+ // We don't need to top frame probe since it should be extracted
+ // from the range.
+ // The outcome of the virtual unwinding with pseudo probes is a
+ // map from a context key to the address range being unwound.
+ // This means basically linear unwinding is not needed for pseudo
+ // probes. The range will be simply recorded here and will be
+ // converted to a list of pseudo probes to report in ProfileGenerator.
+ State.getParentFrame()->recordRangeCount(Target, End, Repeat);
+ } else {
+ // Unwind linear execution part.
+ // Split and record the range by different inline context. For example:
+ // [0x01] ... main:1 # Target
+ // [0x02] ... main:2
+ // [0x03] ... main:3 @ foo:1
+ // [0x04] ... main:3 @ foo:2
+ // [0x05] ... main:3 @ foo:3
+ // [0x06] ... main:4
+ // [0x07] ... main:5 # End
+ // It will be recorded:
+ // [main:*] : [0x06, 0x07], [0x01, 0x02]
+ // [main:3 @ foo:*] : [0x03, 0x05]
+ while (IP.Address > Target) {
+ uint64_t PrevIP = IP.Address;
+ IP.backward();
+ // Break into segments for implicit call/return due to inlining
+ bool SameInlinee = Binary->inlineContextEqual(PrevIP, IP.Address);
+ if (!SameInlinee) {
+ State.switchToFrame(PrevIP);
+ State.CurrentLeafFrame->recordRangeCount(PrevIP, End, Repeat);
+ End = IP.Address;
+ }
+ }
+ assert(IP.Address == Target && "The last one must be the target address.");
+ // Record the remaining range, [0x01, 0x02] in the example
+ State.switchToFrame(IP.Address);
+ State.CurrentLeafFrame->recordRangeCount(IP.Address, End, Repeat);
+ }
+}
+
+void VirtualUnwinder::unwindReturn(UnwindState &State) {
+ // Add extra frame as we unwind through the return
+ const LBREntry &LBR = State.getCurrentLBR();
+ uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(LBR.Target);
+ State.switchToFrame(CallAddr);
+ // Push an external frame for the case of returning to external
+ // address(callback), later if an aitificial call is matched and it will be
+ // popped up. This is to 1)avoid context being interrupted by callback,
+ // context before or after the callback should be the same. 2) the call stack
+ // of function called by callback should be truncated which is done during
+ // recording the context on trie. For example:
+ // main (call)--> foo (call)--> callback (call)--> bar (return)--> callback
+ // (return)--> foo (return)--> main
+ // Context for bar should not include main and foo.
+ // For the code of foo, the context of before and after callback should both
+ // be [foo, main].
+ if (LBR.IsArtificial)
+ State.pushFrame(ExternalAddr);
+ State.pushFrame(LBR.Source);
+ State.InstPtr.update(LBR.Source);
+}
+
+void VirtualUnwinder::unwindBranch(UnwindState &State) {
+ // TODO: Tolerate tail call for now, as we may see tail call from libraries.
+ // This is only for intra function branches, excluding tail calls.
+ uint64_t Source = State.getCurrentLBRSource();
+ State.switchToFrame(Source);
+ State.InstPtr.update(Source);
+}
+
+std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
+ std::shared_ptr<StringBasedCtxKey> KeyStr =
+ std::make_shared<StringBasedCtxKey>();
+ KeyStr->Context = Binary->getExpandedContext(Stack, KeyStr->WasLeafInlined);
+ if (KeyStr->Context.empty())
+ return nullptr;
+ return KeyStr;
+}
+
+std::shared_ptr<ProbeBasedCtxKey> ProbeStack::getContextKey() {
+ std::shared_ptr<ProbeBasedCtxKey> ProbeBasedKey =
+ std::make_shared<ProbeBasedCtxKey>();
+ for (auto CallProbe : Stack) {
+ ProbeBasedKey->Probes.emplace_back(CallProbe);
+ }
+ CSProfileGenerator::compressRecursionContext<const MCDecodedPseudoProbe *>(
+ ProbeBasedKey->Probes);
+ CSProfileGenerator::trimContext<const MCDecodedPseudoProbe *>(
+ ProbeBasedKey->Probes);
+ return ProbeBasedKey;
+}
+
+template <typename T>
+void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur,
+ T &Stack) {
+ if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty())
+ return;
+
+ std::shared_ptr<ContextKey> Key = Stack.getContextKey();
+ if (Key == nullptr)
+ return;
+ auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter());
+ SampleCounter &SCounter = Ret.first->second;
+ for (auto &Item : Cur->RangeSamples) {
+ uint64_t StartOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
+ uint64_t EndOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
+ SCounter.recordRangeCount(StartOffset, EndOffset, std::get<2>(Item));
+ }
+
+ for (auto &Item : Cur->BranchSamples) {
+ uint64_t SourceOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
+ uint64_t TargetOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
+ SCounter.recordBranchCount(SourceOffset, TargetOffset, std::get<2>(Item));
+ }
+}
+
+template <typename T>
+void VirtualUnwinder::collectSamplesFromFrameTrie(
+ UnwindState::ProfiledFrame *Cur, T &Stack) {
+ if (!Cur->isDummyRoot()) {
+ // Truncate the context for external frame since this isn't a real call
+ // context the compiler will see.
+ if (Cur->isExternalFrame() || !Stack.pushFrame(Cur)) {
+ // Process truncated context
+ // Start a new traversal ignoring its bottom context
+ T EmptyStack(Binary);
+ collectSamplesFromFrame(Cur, EmptyStack);
+ for (const auto &Item : Cur->Children) {
+ collectSamplesFromFrameTrie(Item.second.get(), EmptyStack);
+ }
+
+ // Keep note of untracked call site and deduplicate them
+ // for warning later.
+ if (!Cur->isLeafFrame())
+ UntrackedCallsites.insert(Cur->Address);
+
+ return;
+ }
+ }
+
+ collectSamplesFromFrame(Cur, Stack);
+ // Process children frame
+ for (const auto &Item : Cur->Children) {
+ collectSamplesFromFrameTrie(Item.second.get(), Stack);
+ }
+ // Recover the call stack
+ Stack.popFrame();
+}
+
+void VirtualUnwinder::collectSamplesFromFrameTrie(
+ UnwindState::ProfiledFrame *Cur) {
+ if (Binary->usePseudoProbes()) {
+ ProbeStack Stack(Binary);
+ collectSamplesFromFrameTrie<ProbeStack>(Cur, Stack);
+ } else {
+ FrameStack Stack(Binary);
+ collectSamplesFromFrameTrie<FrameStack>(Cur, Stack);
+ }
+}
+
+void VirtualUnwinder::recordBranchCount(const LBREntry &Branch,
+ UnwindState &State, uint64_t Repeat) {
+ if (Branch.IsArtificial || Branch.Target == ExternalAddr)
+ return;
+
+ if (Binary->usePseudoProbes()) {
+ // Same as recordRangeCount, We don't need to top frame probe since we will
+ // extract it from branch's source address
+ State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target,
+ Repeat);
+ } else {
+ State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target,
+ Repeat);
+ }
+}
+
+bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
+ // Capture initial state as starting point for unwinding.
+ UnwindState State(Sample, Binary);
+
+ // Sanity check - making sure leaf of LBR aligns with leaf of stack sample
+ // Stack sample sometimes can be unreliable, so filter out bogus ones.
+ if (!State.validateInitialState())
+ return false;
+
+ // Now process the LBR samples in parrallel with stack sample
+ // Note that we do not reverse the LBR entry order so we can
+ // unwind the sample stack as we walk through LBR entries.
+ while (State.hasNextLBR()) {
+ State.checkStateConsistency();
+
+ // Do not attempt linear unwind for the leaf range as it's incomplete.
+ if (!State.IsLastLBR()) {
+ // Unwind implicit calls/returns from inlining, along the linear path,
+ // break into smaller sub section each with its own calling context.
+ unwindLinear(State, Repeat);
+ }
+
+ // Save the LBR branch before it gets unwound.
+ const LBREntry &Branch = State.getCurrentLBR();
+
+ if (isCallState(State)) {
+ // Unwind calls - we know we encountered call if LBR overlaps with
+ // transition between leaf the 2nd frame. Note that for calls that
+ // were not in the original stack sample, we should have added the
+ // extra frame when processing the return paired with this call.
+ unwindCall(State);
+ } else if (isReturnState(State)) {
+ // Unwind returns - check whether the IP is indeed at a return instruction
+ unwindReturn(State);
+ } else {
+ // Unwind branches
+ // For regular intra function branches, we only need to record branch with
+ // context. For an artificial branch cross function boundaries, we got an
+ // issue with returning to external code. Take the two LBR enties for
+ // example: [foo:8(RETURN), ext:1] [ext:3(CALL), bar:1] After perf reader,
+ // we only get[foo:8(RETURN), bar:1], unwinder will be confused like foo
+ // return to bar. Here we detect and treat this case as BRANCH instead of
+ // RETURN which only update the source address.
+ unwindBranch(State);
+ }
+ State.advanceLBR();
+ // Record `branch` with calling context after unwinding.
+ recordBranchCount(Branch, State, Repeat);
+ }
+ // As samples are aggregated on trie, record them into counter map
+ collectSamplesFromFrameTrie(State.getDummyRootPtr());
+
+ return true;
+}
+
+std::unique_ptr<PerfReaderBase>
+PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput) {
+ std::unique_ptr<PerfReaderBase> PerfReader;
+
+ if (PerfInput.Format == PerfFormat::UnsymbolizedProfile) {
+ PerfReader.reset(
+ new UnsymbolizedProfileReader(Binary, PerfInput.InputFile));
+ return PerfReader;
+ }
+
+ // For perf data input, we need to convert them into perf script first.
+ if (PerfInput.Format == PerfFormat::PerfData)
+ PerfInput = PerfScriptReader::convertPerfDataToTrace(Binary, PerfInput);
+
+ assert((PerfInput.Format == PerfFormat::PerfScript) &&
+ "Should be a perfscript!");
+
+ PerfInput.Content =
+ PerfScriptReader::checkPerfScriptType(PerfInput.InputFile);
+ if (PerfInput.Content == PerfContent::LBRStack) {
+ PerfReader.reset(new HybridPerfReader(Binary, PerfInput.InputFile));
+ } else if (PerfInput.Content == PerfContent::LBR) {
+ PerfReader.reset(new LBRPerfReader(Binary, PerfInput.InputFile));
+ } else {
+ exitWithError("Unsupported perfscript!");
+ }
+
+ return PerfReader;
+}
+
+PerfInputFile PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary,
+ PerfInputFile &File) {
+ StringRef PerfData = File.InputFile;
+ // Run perf script to retrieve PIDs matching binary we're interested in.
+ auto PerfExecutable = sys::Process::FindInEnvPath("PATH", "perf");
+ if (!PerfExecutable) {
+ exitWithError("Perf not found.");
+ }
+ std::string PerfPath = *PerfExecutable;
+ std::string PerfTraceFile = PerfData.str() + ".script.tmp";
+ StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events",
+ "-F", "comm,pid", "-i",
+ PerfData};
+ Optional<StringRef> Redirects[] = {llvm::None, // Stdin
+ StringRef(PerfTraceFile), // Stdout
+ StringRef(PerfTraceFile)}; // Stderr
+ sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, llvm::None, Redirects);
+
+ // Collect the PIDs
+ TraceStream TraceIt(PerfTraceFile);
+ std::string PIDs;
+ std::unordered_set<uint32_t> PIDSet;
+ while (!TraceIt.isAtEoF()) {
+ MMapEvent MMap;
+ if (isMMap2Event(TraceIt.getCurrentLine()) &&
+ extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) {
+ auto It = PIDSet.emplace(MMap.PID);
+ if (It.second) {
+ if (!PIDs.empty()) {
+ PIDs.append(",");
+ }
+ PIDs.append(utostr(MMap.PID));
+ }
+ }
+ TraceIt.advance();
+ }
+
+ if (PIDs.empty()) {
+ exitWithError("No relevant mmap event is found in perf data.");
+ }
+
+ // Run perf script again to retrieve events for PIDs collected above
+ StringRef ScriptSampleArgs[] = {PerfPath, "script", "--show-mmap-events",
+ "-F", "ip,brstack", "--pid",
+ PIDs, "-i", PerfData};
+ sys::ExecuteAndWait(PerfPath, ScriptSampleArgs, llvm::None, Redirects);
+
+ return {PerfTraceFile, PerfFormat::PerfScript, PerfContent::UnknownContent};
+}
+
+void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) {
+ // Drop the event which doesn't belong to user-provided binary
+ StringRef BinaryName = llvm::sys::path::filename(Event.BinaryPath);
+ if (Binary->getName() != BinaryName)
+ return;
+
+ // Drop the event if its image is loaded at the same address
+ if (Event.Address == Binary->getBaseAddress()) {
+ Binary->setIsLoadedByMMap(true);
+ return;
+ }
+
+ if (Event.Offset == Binary->getTextSegmentOffset()) {
+ // A binary image could be unloaded and then reloaded at different
+ // place, so update binary load address.
+ // Only update for the first executable segment and assume all other
+ // segments are loaded at consecutive memory addresses, which is the case on
+ // X64.
+ Binary->setBaseAddress(Event.Address);
+ Binary->setIsLoadedByMMap(true);
+ } else {
+ // Verify segments are loaded consecutively.
+ const auto &Offsets = Binary->getTextSegmentOffsets();
+ auto It = std::lower_bound(Offsets.begin(), Offsets.end(), Event.Offset);
+ if (It != Offsets.end() && *It == Event.Offset) {
+ // The event is for loading a separate executable segment.
+ auto I = std::distance(Offsets.begin(), It);
+ const auto &PreferredAddrs = Binary->getPreferredTextSegmentAddresses();
+ if (PreferredAddrs[I] - Binary->getPreferredBaseAddress() !=
+ Event.Address - Binary->getBaseAddress())
+ exitWithError("Executable segments not loaded consecutively");
+ } else {
+ if (It == Offsets.begin())
+ exitWithError("File offset not found");
+ else {
+ // Find the segment the event falls in. A large segment could be loaded
+ // via multiple mmap calls with consecutive memory addresses.
+ --It;
+ assert(*It < Event.Offset);
+ if (Event.Offset - *It != Event.Address - Binary->getBaseAddress())
+ exitWithError("Segment not loaded by consecutive mmaps");
+ }
+ }
+ }
+}
+
+static std::string getContextKeyStr(ContextKey *K,
+ const ProfiledBinary *Binary) {
+ if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(K)) {
+ return SampleContext::getContextString(CtxKey->Context);
+ } else if (const auto *CtxKey = dyn_cast<ProbeBasedCtxKey>(K)) {
+ SampleContextFrameVector ContextStack;
+ for (const auto *Probe : CtxKey->Probes) {
+ Binary->getInlineContextForProbe(Probe, ContextStack, true);
+ }
+ // Probe context key at this point does not have leaf probe, so do not
+ // include the leaf inline location.
+ return SampleContext::getContextString(ContextStack, true);
+ } else {
+ llvm_unreachable("unexpected key type");
+ }
+}
+
+void HybridPerfReader::unwindSamples() {
+ if (Binary->useFSDiscriminator())
+ exitWithError("FS discriminator is not supported in CS profile.");
+ VirtualUnwinder Unwinder(&SampleCounters, Binary);
+ for (const auto &Item : AggregatedSamples) {
+ const PerfSample *Sample = Item.first.getPtr();
+ Unwinder.unwind(Sample, Item.second);
+ }
+
+ // Warn about untracked frames due to missing probes.
+ if (ShowDetailedWarning) {
+ for (auto Address : Unwinder.getUntrackedCallsites())
+ WithColor::warning() << "Profile context truncated due to missing probe "
+ << "for call instruction at "
+ << format("0x%" PRIx64, Address) << "\n";
+ }
+
+ emitWarningSummary(Unwinder.getUntrackedCallsites().size(),
+ SampleCounters.size(),
+ "of profiled contexts are truncated due to missing probe "
+ "for call instruction.");
+
+ emitWarningSummary(
+ Unwinder.NumMismatchedExtCallBranch, Unwinder.NumTotalBranches,
+ "of branches'source is a call instruction but doesn't match call frame "
+ "stack, likely due to unwinding error of external frame.");
+
+ emitWarningSummary(
+ Unwinder.NumMismatchedProEpiBranch, Unwinder.NumTotalBranches,
+ "of branches'source is a call instruction but doesn't match call frame "
+ "stack, likely due to frame in prolog/epilog.");
+
+ emitWarningSummary(Unwinder.NumMissingExternalFrame,
+ Unwinder.NumExtCallBranch,
+ "of artificial call branches but doesn't have an external "
+ "frame to match.");
+}
+
+bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt,
+ SmallVectorImpl<LBREntry> &LBRStack) {
+ // The raw format of LBR stack is like:
+ // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
+ // ... 0x4005c8/0x4005dc/P/-/-/0
+ // It's in FIFO order and seperated by whitespace.
+ SmallVector<StringRef, 32> Records;
+ TraceIt.getCurrentLine().split(Records, " ", -1, false);
+ auto WarnInvalidLBR = [](TraceStream &TraceIt) {
+ WithColor::warning() << "Invalid address in LBR record at line "
+ << TraceIt.getLineNumber() << ": "
+ << TraceIt.getCurrentLine() << "\n";
+ };
+
+ // Skip the leading instruction pointer.
+ size_t Index = 0;
+ uint64_t LeadingAddr;
+ if (!Records.empty() && !Records[0].contains('/')) {
+ if (Records[0].getAsInteger(16, LeadingAddr)) {
+ WarnInvalidLBR(TraceIt);
+ TraceIt.advance();
+ return false;
+ }
+ Index = 1;
+ }
+ // Now extract LBR samples - note that we do not reverse the
+ // LBR entry order so we can unwind the sample stack as we walk
+ // through LBR entries.
+ uint64_t PrevTrDst = 0;
+
+ while (Index < Records.size()) {
+ auto &Token = Records[Index++];
+ if (Token.size() == 0)
+ continue;
+
+ SmallVector<StringRef, 8> Addresses;
+ Token.split(Addresses, "/");
+ uint64_t Src;
+ uint64_t Dst;
+
+ // Stop at broken LBR records.
+ if (Addresses.size() < 2 || Addresses[0].substr(2).getAsInteger(16, Src) ||
+ Addresses[1].substr(2).getAsInteger(16, Dst)) {
+ WarnInvalidLBR(TraceIt);
+ break;
+ }
+
+ bool SrcIsInternal = Binary->addressIsCode(Src);
+ bool DstIsInternal = Binary->addressIsCode(Dst);
+ bool IsExternal = !SrcIsInternal && !DstIsInternal;
+ bool IsIncoming = !SrcIsInternal && DstIsInternal;
+ bool IsOutgoing = SrcIsInternal && !DstIsInternal;
+ bool IsArtificial = false;
+
+ // Ignore branches outside the current binary.
+ if (IsExternal) {
+ if (!PrevTrDst && !LBRStack.empty()) {
+ WithColor::warning()
+ << "Invalid transfer to external code in LBR record at line "
+ << TraceIt.getLineNumber() << ": " << TraceIt.getCurrentLine()
+ << "\n";
+ }
+ // Do not ignore the entire samples, the remaining LBR can still be
+ // unwound using a context-less stack.
+ continue;
+ }
+
+ if (IsOutgoing) {
+ if (!PrevTrDst) {
+ // This is a leading outgoing LBR, we should keep processing the LBRs.
+ if (LBRStack.empty()) {
+ NumLeadingOutgoingLBR++;
+ // Record this LBR since current source and next LBR' target is still
+ // a valid range.
+ LBRStack.emplace_back(LBREntry(Src, ExternalAddr, false));
+ continue;
+ }
+ // This is middle unpaired outgoing jump which is likely due to
+ // interrupt or incomplete LBR trace. Ignore current and subsequent
+ // entries since they are likely in different contexts.
+ break;
+ }
+
+ // For transition to external code, group the Source with the next
+ // availabe transition target.
+ Dst = PrevTrDst;
+ PrevTrDst = 0;
+ IsArtificial = true;
+ } else {
+ if (PrevTrDst) {
+ // If we have seen an incoming transition from external code to internal
+ // code, but not a following outgoing transition, the incoming
+ // transition is likely due to interrupt which is usually unpaired.
+ // Ignore current and subsequent entries since they are likely in
+ // different contexts.
+ break;
+ }
+
+ if (IsIncoming) {
+ // For transition from external code (such as dynamic libraries) to
+ // the current binary, keep track of the branch target which will be
+ // grouped with the Source of the last transition from the current
+ // binary.
+ PrevTrDst = Dst;
+ continue;
+ }
+ }
+
+ // TODO: filter out buggy duplicate branches on Skylake
+
+ LBRStack.emplace_back(LBREntry(Src, Dst, IsArtificial));
+ }
+ TraceIt.advance();
+ return !LBRStack.empty();
+}
+
+bool PerfScriptReader::extractCallstack(TraceStream &TraceIt,
+ SmallVectorImpl<uint64_t> &CallStack) {
+ // The raw format of call stack is like:
+ // 4005dc # leaf frame
+ // 400634
+ // 400684 # root frame
+ // It's in bottom-up order with each frame in one line.
+
+ // Extract stack frames from sample
+ while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) {
+ StringRef FrameStr = TraceIt.getCurrentLine().ltrim();
+ uint64_t FrameAddr = 0;
+ if (FrameStr.getAsInteger(16, FrameAddr)) {
+ // We might parse a non-perf sample line like empty line and comments,
+ // skip it
+ TraceIt.advance();
+ return false;
+ }
+ TraceIt.advance();
+ // Currently intermixed frame from different binaries is not supported.
+ if (!Binary->addressIsCode(FrameAddr)) {
+ if (CallStack.empty())
+ NumLeafExternalFrame++;
+ // Push a special value(ExternalAddr) for the external frames so that
+ // unwinder can still work on this with artificial Call/Return branch.
+ // After unwinding, the context will be truncated for external frame.
+ // Also deduplicate the consecutive external addresses.
+ if (CallStack.empty() || CallStack.back() != ExternalAddr)
+ CallStack.emplace_back(ExternalAddr);
+ continue;
+ }
+
+ // We need to translate return address to call address for non-leaf frames.
+ if (!CallStack.empty()) {
+ auto CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr);
+ if (!CallAddr) {
+ // Stop at an invalid return address caused by bad unwinding. This could
+ // happen to frame-pointer-based unwinding and the callee functions that
+ // do not have the frame pointer chain set up.
+ InvalidReturnAddresses.insert(FrameAddr);
+ break;
+ }
+ FrameAddr = CallAddr;
+ }
+
+ CallStack.emplace_back(FrameAddr);
+ }
+
+ // Strip out the bottom external addr.
+ if (CallStack.size() > 1 && CallStack.back() == ExternalAddr)
+ CallStack.pop_back();
+
+ // Skip other unrelated line, find the next valid LBR line
+ // Note that even for empty call stack, we should skip the address at the
+ // bottom, otherwise the following pass may generate a truncated callstack
+ while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) {
+ TraceIt.advance();
+ }
+ // Filter out broken stack sample. We may not have complete frame info
+ // if sample end up in prolog/epilog, the result is dangling context not
+ // connected to entry point. This should be relatively rare thus not much
+ // impact on overall profile quality. However we do want to filter them
+ // out to reduce the number of different calling contexts. One instance
+ // of such case - when sample landed in prolog/epilog, somehow stack
+ // walking will be broken in an unexpected way that higher frames will be
+ // missing.
+ return !CallStack.empty() &&
+ !Binary->addressInPrologEpilog(CallStack.front());
+}
+
+void PerfScriptReader::warnIfMissingMMap() {
+ if (!Binary->getMissingMMapWarned() && !Binary->getIsLoadedByMMap()) {
+ WithColor::warning() << "No relevant mmap event is matched for "
+ << Binary->getName()
+ << ", will use preferred address ("
+ << format("0x%" PRIx64,
+ Binary->getPreferredBaseAddress())
+ << ") as the base loading address!\n";
+ // Avoid redundant warning, only warn at the first unmatched sample.
+ Binary->setMissingMMapWarned(true);
+ }
+}
+
+void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
+ // The raw hybird sample started with call stack in FILO order and followed
+ // intermediately by LBR sample
+ // e.g.
+ // 4005dc # call stack leaf
+ // 400634
+ // 400684 # call stack root
+ // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
+ // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
+ //
+ std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>();
+
+ // Parsing call stack and populate into PerfSample.CallStack
+ if (!extractCallstack(TraceIt, Sample->CallStack)) {
+ // Skip the next LBR line matched current call stack
+ if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x"))
+ TraceIt.advance();
+ return;
+ }
+
+ warnIfMissingMMap();
+
+ if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) {
+ // Parsing LBR stack and populate into PerfSample.LBRStack
+ if (extractLBRStack(TraceIt, Sample->LBRStack)) {
+ if (IgnoreStackSamples) {
+ Sample->CallStack.clear();
+ } else {
+ // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR
+ // ranges
+ Sample->CallStack.front() = Sample->LBRStack[0].Target;
+ }
+ // Record samples by aggregation
+ AggregatedSamples[Hashable<PerfSample>(Sample)] += Count;
+ }
+ } else {
+ // LBR sample is encoded in single line after stack sample
+ exitWithError("'Hybrid perf sample is corrupted, No LBR sample line");
+ }
+}
+
+void PerfScriptReader::writeUnsymbolizedProfile(StringRef Filename) {
+ std::error_code EC;
+ raw_fd_ostream OS(Filename, EC, llvm::sys::fs::OF_TextWithCRLF);
+ if (EC)
+ exitWithError(EC, Filename);
+ writeUnsymbolizedProfile(OS);
+}
+
+// Use ordered map to make the output deterministic
+using OrderedCounterForPrint = std::map<std::string, SampleCounter *>;
+
+void PerfScriptReader::writeUnsymbolizedProfile(raw_fd_ostream &OS) {
+ OrderedCounterForPrint OrderedCounters;
+ for (auto &CI : SampleCounters) {
+ OrderedCounters[getContextKeyStr(CI.first.getPtr(), Binary)] = &CI.second;
+ }
+
+ auto SCounterPrinter = [&](RangeSample &Counter, StringRef Separator,
+ uint32_t Indent) {
+ OS.indent(Indent);
+ OS << Counter.size() << "\n";
+ for (auto &I : Counter) {
+ uint64_t Start = I.first.first;
+ uint64_t End = I.first.second;
+
+ if (!UseOffset || (UseOffset && UseLoadableSegmentAsBase)) {
+ Start = Binary->offsetToVirtualAddr(Start);
+ End = Binary->offsetToVirtualAddr(End);
+ }
+
+ if (UseOffset && UseLoadableSegmentAsBase) {
+ Start -= Binary->getFirstLoadableAddress();
+ End -= Binary->getFirstLoadableAddress();
+ }
+
+ OS.indent(Indent);
+ OS << Twine::utohexstr(Start) << Separator << Twine::utohexstr(End) << ":"
+ << I.second << "\n";
+ }
+ };
+
+ for (auto &CI : OrderedCounters) {
+ uint32_t Indent = 0;
+ if (ProfileIsCSFlat) {
+ // Context string key
+ OS << "[" << CI.first << "]\n";
+ Indent = 2;
+ }
+
+ SampleCounter &Counter = *CI.second;
+ SCounterPrinter(Counter.RangeCounter, "-", Indent);
+ SCounterPrinter(Counter.BranchCounter, "->", Indent);
+ }
+}
+
+// Format of input:
+// number of entries in RangeCounter
+// from_1-to_1:count_1
+// from_2-to_2:count_2
+// ......
+// from_n-to_n:count_n
+// number of entries in BranchCounter
+// src_1->dst_1:count_1
+// src_2->dst_2:count_2
+// ......
+// src_n->dst_n:count_n
+void UnsymbolizedProfileReader::readSampleCounters(TraceStream &TraceIt,
+ SampleCounter &SCounters) {
+ auto exitWithErrorForTraceLine = [](TraceStream &TraceIt) {
+ std::string Msg = TraceIt.isAtEoF()
+ ? "Invalid raw profile!"
+ : "Invalid raw profile at line " +
+ Twine(TraceIt.getLineNumber()).str() + ": " +
+ TraceIt.getCurrentLine().str();
+ exitWithError(Msg);
+ };
+ auto ReadNumber = [&](uint64_t &Num) {
+ if (TraceIt.isAtEoF())
+ exitWithErrorForTraceLine(TraceIt);
+ if (TraceIt.getCurrentLine().ltrim().getAsInteger(10, Num))
+ exitWithErrorForTraceLine(TraceIt);
+ TraceIt.advance();
+ };
+
+ auto ReadCounter = [&](RangeSample &Counter, StringRef Separator) {
+ uint64_t Num = 0;
+ ReadNumber(Num);
+ while (Num--) {
+ if (TraceIt.isAtEoF())
+ exitWithErrorForTraceLine(TraceIt);
+ StringRef Line = TraceIt.getCurrentLine().ltrim();
+
+ uint64_t Count = 0;
+ auto LineSplit = Line.split(":");
+ if (LineSplit.second.empty() || LineSplit.second.getAsInteger(10, Count))
+ exitWithErrorForTraceLine(TraceIt);
+
+ uint64_t Source = 0;
+ uint64_t Target = 0;
+ auto Range = LineSplit.first.split(Separator);
+ if (Range.second.empty() || Range.first.getAsInteger(16, Source) ||
+ Range.second.getAsInteger(16, Target))
+ exitWithErrorForTraceLine(TraceIt);
+
+ if (!UseOffset || (UseOffset && UseLoadableSegmentAsBase)) {
+ uint64_t BaseAddr = 0;
+ if (UseOffset && UseLoadableSegmentAsBase)
+ BaseAddr = Binary->getFirstLoadableAddress();
+
+ Source = Binary->virtualAddrToOffset(Source + BaseAddr);
+ Target = Binary->virtualAddrToOffset(Target + BaseAddr);
+ }
+
+ Counter[{Source, Target}] += Count;
+ TraceIt.advance();
+ }
+ };
+
+ ReadCounter(SCounters.RangeCounter, "-");
+ ReadCounter(SCounters.BranchCounter, "->");
+}
+
+void UnsymbolizedProfileReader::readUnsymbolizedProfile(StringRef FileName) {
+ TraceStream TraceIt(FileName);
+ while (!TraceIt.isAtEoF()) {
+ std::shared_ptr<StringBasedCtxKey> Key =
+ std::make_shared<StringBasedCtxKey>();
+ StringRef Line = TraceIt.getCurrentLine();
+ // Read context stack for CS profile.
+ if (Line.startswith("[")) {
+ ProfileIsCSFlat = true;
+ auto I = ContextStrSet.insert(Line.str());
+ SampleContext::createCtxVectorFromStr(*I.first, Key->Context);
+ TraceIt.advance();
+ }
+ auto Ret =
+ SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter());
+ readSampleCounters(TraceIt, Ret.first->second);
+ }
+}
+
+void UnsymbolizedProfileReader::parsePerfTraces() {
+ readUnsymbolizedProfile(PerfTraceFile);
+}
+
+void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample,
+ uint64_t Repeat) {
+ SampleCounter &Counter = SampleCounters.begin()->second;
+ uint64_t EndOffeset = 0;
+ for (const LBREntry &LBR : Sample->LBRStack) {
+ assert(LBR.Source != ExternalAddr &&
+ "Branch' source should not be an external address, it should be "
+ "converted to aritificial branch.");
+ uint64_t SourceOffset = Binary->virtualAddrToOffset(LBR.Source);
+ uint64_t TargetOffset = LBR.Target == static_cast<uint64_t>(ExternalAddr)
+ ? static_cast<uint64_t>(ExternalAddr)
+ : Binary->virtualAddrToOffset(LBR.Target);
+
+ if (!LBR.IsArtificial && TargetOffset != ExternalAddr) {
+ Counter.recordBranchCount(SourceOffset, TargetOffset, Repeat);
+ }
+
+ // If this not the first LBR, update the range count between TO of current
+ // LBR and FROM of next LBR.
+ uint64_t StartOffset = TargetOffset;
+ if (EndOffeset != 0)
+ Counter.recordRangeCount(StartOffset, EndOffeset, Repeat);
+ EndOffeset = SourceOffset;
+ }
+}
+
+void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
+ std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>();
+ // Parsing LBR stack and populate into PerfSample.LBRStack
+ if (extractLBRStack(TraceIt, Sample->LBRStack)) {
+ warnIfMissingMMap();
+ // Record LBR only samples by aggregation
+ AggregatedSamples[Hashable<PerfSample>(Sample)] += Count;
+ }
+}
+
+void PerfScriptReader::generateUnsymbolizedProfile() {
+ // There is no context for LBR only sample, so initialize one entry with
+ // fake "empty" context key.
+ assert(SampleCounters.empty() &&
+ "Sample counter map should be empty before raw profile generation");
+ std::shared_ptr<StringBasedCtxKey> Key =
+ std::make_shared<StringBasedCtxKey>();
+ SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter());
+ for (const auto &Item : AggregatedSamples) {
+ const PerfSample *Sample = Item.first.getPtr();
+ computeCounterFromLBR(Sample, Item.second);
+ }
+}
+
+uint64_t PerfScriptReader::parseAggregatedCount(TraceStream &TraceIt) {
+ // The aggregated count is optional, so do not skip the line and return 1 if
+ // it's unmatched
+ uint64_t Count = 1;
+ if (!TraceIt.getCurrentLine().getAsInteger(10, Count))
+ TraceIt.advance();
+ return Count;
+}
+
+void PerfScriptReader::parseSample(TraceStream &TraceIt) {
+ NumTotalSample++;
+ uint64_t Count = parseAggregatedCount(TraceIt);
+ assert(Count >= 1 && "Aggregated count should be >= 1!");
+ parseSample(TraceIt, Count);
+}
+
+bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary,
+ StringRef Line,
+ MMapEvent &MMap) {
+ // Parse a line like:
+ // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
+ // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
+ constexpr static const char *const Pattern =
+ "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: "
+ "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
+ "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)";
+ // Field 0 - whole line
+ // Field 1 - PID
+ // Field 2 - base address
+ // Field 3 - mmapped size
+ // Field 4 - page offset
+ // Field 5 - binary path
+ enum EventIndex {
+ WHOLE_LINE = 0,
+ PID = 1,
+ MMAPPED_ADDRESS = 2,
+ MMAPPED_SIZE = 3,
+ PAGE_OFFSET = 4,
+ BINARY_PATH = 5
+ };
+
+ Regex RegMmap2(Pattern);
+ SmallVector<StringRef, 6> Fields;
+ bool R = RegMmap2.match(Line, &Fields);
+ if (!R) {
+ std::string ErrorMsg = "Cannot parse mmap event: " + Line.str() + " \n";
+ exitWithError(ErrorMsg);
+ }
+ Fields[PID].getAsInteger(10, MMap.PID);
+ Fields[MMAPPED_ADDRESS].getAsInteger(0, MMap.Address);
+ Fields[MMAPPED_SIZE].getAsInteger(0, MMap.Size);
+ Fields[PAGE_OFFSET].getAsInteger(0, MMap.Offset);
+ MMap.BinaryPath = Fields[BINARY_PATH];
+ if (ShowMmapEvents) {
+ outs() << "Mmap: Binary " << MMap.BinaryPath << " loaded at "
+ << format("0x%" PRIx64 ":", MMap.Address) << " \n";
+ }
+
+ StringRef BinaryName = llvm::sys::path::filename(MMap.BinaryPath);
+ return Binary->getName() == BinaryName;
+}
+
+void PerfScriptReader::parseMMap2Event(TraceStream &TraceIt) {
+ MMapEvent MMap;
+ if (extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap))
+ updateBinaryAddress(MMap);
+ TraceIt.advance();
+}
+
+void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) {
+ if (isMMap2Event(TraceIt.getCurrentLine()))
+ parseMMap2Event(TraceIt);
+ else
+ parseSample(TraceIt);
+}
+
+void PerfScriptReader::parseAndAggregateTrace() {
+ // Trace line iterator
+ TraceStream TraceIt(PerfTraceFile);
+ while (!TraceIt.isAtEoF())
+ parseEventOrSample(TraceIt);
+}
+
+// A LBR sample is like:
+// 40062f 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ...
+// A heuristic for fast detection by checking whether a
+// leading " 0x" and the '/' exist.
+bool PerfScriptReader::isLBRSample(StringRef Line) {
+ // Skip the leading instruction pointer
+ SmallVector<StringRef, 32> Records;
+ Line.trim().split(Records, " ", 2, false);
+ if (Records.size() < 2)
+ return false;
+ if (Records[1].startswith("0x") && Records[1].contains('/'))
+ return true;
+ return false;
+}
+
+bool PerfScriptReader::isMMap2Event(StringRef Line) {
+ // Short cut to avoid string find is possible.
+ if (Line.empty() || Line.size() < 50)
+ return false;
+
+ if (std::isdigit(Line[0]))
+ return false;
+
+ // PERF_RECORD_MMAP2 does not appear at the beginning of the line
+ // for ` perf script --show-mmap-events -i ...`
+ return Line.contains("PERF_RECORD_MMAP2");
+}
+
+// The raw hybird sample is like
+// e.g.
+// 4005dc # call stack leaf
+// 400634
+// 400684 # call stack root
+// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
+// ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
+// Determine the perfscript contains hybrid samples(call stack + LBRs) by
+// checking whether there is a non-empty call stack immediately followed by
+// a LBR sample
+PerfContent PerfScriptReader::checkPerfScriptType(StringRef FileName) {
+ TraceStream TraceIt(FileName);
+ uint64_t FrameAddr = 0;
+ while (!TraceIt.isAtEoF()) {
+ // Skip the aggregated count
+ if (!TraceIt.getCurrentLine().getAsInteger(10, FrameAddr))
+ TraceIt.advance();
+
+ // Detect sample with call stack
+ int32_t Count = 0;
+ while (!TraceIt.isAtEoF() &&
+ !TraceIt.getCurrentLine().ltrim().getAsInteger(16, FrameAddr)) {
+ Count++;
+ TraceIt.advance();
+ }
+ if (!TraceIt.isAtEoF()) {
+ if (isLBRSample(TraceIt.getCurrentLine())) {
+ if (Count > 0)
+ return PerfContent::LBRStack;
+ else
+ return PerfContent::LBR;
+ }
+ TraceIt.advance();
+ }
+ }
+
+ exitWithError("Invalid perf script input!");
+ return PerfContent::UnknownContent;
+}
+
+void HybridPerfReader::generateUnsymbolizedProfile() {
+ ProfileIsCSFlat = !IgnoreStackSamples;
+ if (ProfileIsCSFlat)
+ unwindSamples();
+ else
+ PerfScriptReader::generateUnsymbolizedProfile();
+}
+
+void PerfScriptReader::warnTruncatedStack() {
+ if (ShowDetailedWarning) {
+ for (auto Address : InvalidReturnAddresses) {
+ WithColor::warning()
+ << "Truncated stack sample due to invalid return address at "
+ << format("0x%" PRIx64, Address)
+ << ", likely caused by frame pointer omission\n";
+ }
+ }
+ emitWarningSummary(
+ InvalidReturnAddresses.size(), AggregatedSamples.size(),
+ "of truncated stack samples due to invalid return address, "
+ "likely caused by frame pointer omission.");
+}
+
+void PerfScriptReader::warnInvalidRange() {
+ std::unordered_map<std::pair<uint64_t, uint64_t>, uint64_t,
+ pair_hash<uint64_t, uint64_t>>
+ Ranges;
+
+ for (const auto &Item : AggregatedSamples) {
+ const PerfSample *Sample = Item.first.getPtr();
+ uint64_t Count = Item.second;
+ uint64_t EndOffeset = 0;
+ for (const LBREntry &LBR : Sample->LBRStack) {
+ uint64_t SourceOffset = Binary->virtualAddrToOffset(LBR.Source);
+ uint64_t StartOffset = Binary->virtualAddrToOffset(LBR.Target);
+ if (EndOffeset != 0)
+ Ranges[{StartOffset, EndOffeset}] += Count;
+ EndOffeset = SourceOffset;
+ }
+ }
+
+ if (Ranges.empty()) {
+ WithColor::warning() << "No samples in perf script!\n";
+ return;
+ }
+
+ auto WarnInvalidRange =
+ [&](uint64_t StartOffset, uint64_t EndOffset, StringRef Msg) {
+ if (!ShowDetailedWarning)
+ return;
+ WithColor::warning()
+ << "["
+ << format("%8" PRIx64, Binary->offsetToVirtualAddr(StartOffset))
+ << ","
+ << format("%8" PRIx64, Binary->offsetToVirtualAddr(EndOffset))
+ << "]: " << Msg << "\n";
+ };
+
+ const char *EndNotBoundaryMsg = "Range is not on instruction boundary, "
+ "likely due to profile and binary mismatch.";
+ const char *DanglingRangeMsg = "Range does not belong to any functions, "
+ "likely from PLT, .init or .fini section.";
+ const char *RangeCrossFuncMsg =
+ "Fall through range should not cross function boundaries, likely due to "
+ "profile and binary mismatch.";
+
+ uint64_t InstNotBoundary = 0;
+ uint64_t UnmatchedRange = 0;
+ uint64_t RangeCrossFunc = 0;
+
+ for (auto &I : Ranges) {
+ uint64_t StartOffset = I.first.first;
+ uint64_t EndOffset = I.first.second;
+
+ if (!Binary->offsetIsCode(StartOffset) ||
+ !Binary->offsetIsTransfer(EndOffset)) {
+ InstNotBoundary++;
+ WarnInvalidRange(StartOffset, EndOffset, EndNotBoundaryMsg);
+ }
+
+ auto *FRange = Binary->findFuncRangeForOffset(StartOffset);
+ if (!FRange) {
+ UnmatchedRange++;
+ WarnInvalidRange(StartOffset, EndOffset, DanglingRangeMsg);
+ continue;
+ }
+
+ if (EndOffset >= FRange->EndOffset) {
+ RangeCrossFunc++;
+ WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg);
+ }
+ }
+
+ uint64_t TotalRangeNum = Ranges.size();
+ emitWarningSummary(InstNotBoundary, TotalRangeNum,
+ "of profiled ranges are not on instruction boundary.");
+ emitWarningSummary(UnmatchedRange, TotalRangeNum,
+ "of profiled ranges do not belong to any functions.");
+ emitWarningSummary(RangeCrossFunc, TotalRangeNum,
+ "of profiled ranges do cross function boundaries.");
+}
+
+void PerfScriptReader::parsePerfTraces() {
+ // Parse perf traces and do aggregation.
+ parseAndAggregateTrace();
+
+ emitWarningSummary(NumLeafExternalFrame, NumTotalSample,
+ "of samples have leaf external frame in call stack.");
+ emitWarningSummary(NumLeadingOutgoingLBR, NumTotalSample,
+ "of samples have leading external LBR.");
+
+ // Generate unsymbolized profile.
+ warnTruncatedStack();
+ warnInvalidRange();
+ generateUnsymbolizedProfile();
+ AggregatedSamples.clear();
+
+ if (SkipSymbolization)
+ writeUnsymbolizedProfile(OutputFilename);
+}
+
+} // end namespace sampleprof
+} // end namespace llvm
diff --git a/contrib/libs/llvm14/tools/llvm-profgen/PerfReader.h b/contrib/libs/llvm14/tools/llvm-profgen/PerfReader.h
new file mode 100644
index 0000000000..9d84ad34bb
--- /dev/null
+++ b/contrib/libs/llvm14/tools/llvm-profgen/PerfReader.h
@@ -0,0 +1,728 @@
+//===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
+#define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
+#include "ErrorHandling.h"
+#include "ProfiledBinary.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Regex.h"
+#include <cstdint>
+#include <fstream>
+#include <list>
+#include <map>
+#include <vector>
+
+using namespace llvm;
+using namespace sampleprof;
+
+namespace llvm {
+namespace sampleprof {
+
+// Stream based trace line iterator
+class TraceStream {
+ std::string CurrentLine;
+ std::ifstream Fin;
+ bool IsAtEoF = false;
+ uint64_t LineNumber = 0;
+
+public:
+ TraceStream(StringRef Filename) : Fin(Filename.str()) {
+ if (!Fin.good())
+ exitWithError("Error read input perf script file", Filename);
+ advance();
+ }
+
+ StringRef getCurrentLine() {
+ assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
+ return CurrentLine;
+ }
+
+ uint64_t getLineNumber() { return LineNumber; }
+
+ bool isAtEoF() { return IsAtEoF; }
+
+ // Read the next line
+ void advance() {
+ if (!std::getline(Fin, CurrentLine)) {
+ IsAtEoF = true;
+ return;
+ }
+ LineNumber++;
+ }
+};
+
+// The type of input format.
+enum PerfFormat {
+ UnknownFormat = 0,
+ PerfData = 1, // Raw linux perf.data.
+ PerfScript = 2, // Perf script create by `perf script` command.
+ UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
+
+};
+
+// The type of perfscript content.
+enum PerfContent {
+ UnknownContent = 0,
+ LBR = 1, // Only LBR sample.
+ LBRStack = 2, // Hybrid sample including call stack and LBR stack.
+};
+
+struct PerfInputFile {
+ std::string InputFile;
+ PerfFormat Format = PerfFormat::UnknownFormat;
+ PerfContent Content = PerfContent::UnknownContent;
+};
+
+// The parsed LBR sample entry.
+struct LBREntry {
+ uint64_t Source = 0;
+ uint64_t Target = 0;
+ // An artificial branch stands for a series of consecutive branches starting
+ // from the current binary with a transition through external code and
+ // eventually landing back in the current binary.
+ bool IsArtificial = false;
+ LBREntry(uint64_t S, uint64_t T, bool I)
+ : Source(S), Target(T), IsArtificial(I) {}
+
+#ifndef NDEBUG
+ void print() const {
+ dbgs() << "from " << format("%#010x", Source) << " to "
+ << format("%#010x", Target);
+ if (IsArtificial)
+ dbgs() << " Artificial";
+ }
+#endif
+};
+
+#ifndef NDEBUG
+static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
+ for (size_t I = 0; I < LBRStack.size(); I++) {
+ dbgs() << "[" << I << "] ";
+ LBRStack[I].print();
+ dbgs() << "\n";
+ }
+}
+
+static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
+ for (size_t I = 0; I < CallStack.size(); I++) {
+ dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
+ }
+}
+#endif
+
+// Hash interface for generic data of type T
+// Data should implement a \fn getHashCode and a \fn isEqual
+// Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
+// i.e we explicitly calculate hash of derived class, assign to base class's
+// HashCode. This also provides the flexibility for calculating the hash code
+// incrementally(like rolling hash) during frame stack unwinding since unwinding
+// only changes the leaf of frame stack. \fn isEqual is a virtual function,
+// which will have perf overhead. In the future, if we redesign a better hash
+// function, then we can just skip this or switch to non-virtual function(like
+// just ignore comparision if hash conflicts probabilities is low)
+template <class T> class Hashable {
+public:
+ std::shared_ptr<T> Data;
+ Hashable(const std::shared_ptr<T> &D) : Data(D) {}
+
+ // Hash code generation
+ struct Hash {
+ uint64_t operator()(const Hashable<T> &Key) const {
+ // Don't make it virtual for getHashCode
+ uint64_t Hash = Key.Data->getHashCode();
+ assert(Hash && "Should generate HashCode for it!");
+ return Hash;
+ }
+ };
+
+ // Hash equal
+ struct Equal {
+ bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
+ // Precisely compare the data, vtable will have overhead.
+ return LHS.Data->isEqual(RHS.Data.get());
+ }
+ };
+
+ T *getPtr() const { return Data.get(); }
+};
+
+struct PerfSample {
+ // LBR stack recorded in FIFO order.
+ SmallVector<LBREntry, 16> LBRStack;
+ // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
+ // generation
+ SmallVector<uint64_t, 16> CallStack;
+
+ virtual ~PerfSample() = default;
+ uint64_t getHashCode() const {
+ // Use simple DJB2 hash
+ auto HashCombine = [](uint64_t H, uint64_t V) {
+ return ((H << 5) + H) + V;
+ };
+ uint64_t Hash = 5381;
+ for (const auto &Value : CallStack) {
+ Hash = HashCombine(Hash, Value);
+ }
+ for (const auto &Entry : LBRStack) {
+ Hash = HashCombine(Hash, Entry.Source);
+ Hash = HashCombine(Hash, Entry.Target);
+ }
+ return Hash;
+ }
+
+ bool isEqual(const PerfSample *Other) const {
+ const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
+ const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
+
+ if (CallStack.size() != OtherCallStack.size() ||
+ LBRStack.size() != OtherLBRStack.size())
+ return false;
+
+ if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin()))
+ return false;
+
+ for (size_t I = 0; I < OtherLBRStack.size(); I++) {
+ if (LBRStack[I].Source != OtherLBRStack[I].Source ||
+ LBRStack[I].Target != OtherLBRStack[I].Target)
+ return false;
+ }
+ return true;
+ }
+
+#ifndef NDEBUG
+ void print() const {
+ dbgs() << "LBR stack\n";
+ printLBRStack(LBRStack);
+ dbgs() << "Call stack\n";
+ printCallStack(CallStack);
+ }
+#endif
+};
+// After parsing the sample, we record the samples by aggregating them
+// into this counter. The key stores the sample data and the value is
+// the sample repeat times.
+using AggregatedCounter =
+ std::unordered_map<Hashable<PerfSample>, uint64_t,
+ Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
+
+using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
+
+// The state for the unwinder, it doesn't hold the data but only keep the
+// pointer/index of the data, While unwinding, the CallStack is changed
+// dynamicially and will be recorded as the context of the sample
+struct UnwindState {
+ // Profiled binary that current frame address belongs to
+ const ProfiledBinary *Binary;
+ // Call stack trie node
+ struct ProfiledFrame {
+ const uint64_t Address = DummyRoot;
+ ProfiledFrame *Parent;
+ SampleVector RangeSamples;
+ SampleVector BranchSamples;
+ std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
+
+ ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
+ : Address(Addr), Parent(P) {}
+ ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
+ assert(Address && "Address can't be zero!");
+ auto Ret = Children.emplace(
+ Address, std::make_unique<ProfiledFrame>(Address, this));
+ return Ret.first->second.get();
+ }
+ void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
+ RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
+ }
+ void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
+ BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
+ }
+ bool isDummyRoot() { return Address == DummyRoot; }
+ bool isExternalFrame() { return Address == ExternalAddr; }
+ bool isLeafFrame() { return Children.empty(); }
+ };
+
+ ProfiledFrame DummyTrieRoot;
+ ProfiledFrame *CurrentLeafFrame;
+ // Used to fall through the LBR stack
+ uint32_t LBRIndex = 0;
+ // Reference to PerfSample.LBRStack
+ const SmallVector<LBREntry, 16> &LBRStack;
+ // Used to iterate the address range
+ InstructionPointer InstPtr;
+ UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
+ : Binary(Binary), LBRStack(Sample->LBRStack),
+ InstPtr(Binary, Sample->CallStack.front()) {
+ initFrameTrie(Sample->CallStack);
+ }
+
+ bool validateInitialState() {
+ uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
+ uint64_t LeafAddr = CurrentLeafFrame->Address;
+ assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
+ "External leading LBR should match the leaf frame.");
+
+ // When we take a stack sample, ideally the sampling distance between the
+ // leaf IP of stack and the last LBR target shouldn't be very large.
+ // Use a heuristic size (0x100) to filter out broken records.
+ if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) {
+ WithColor::warning() << "Bogus trace: stack tip = "
+ << format("%#010x", LeafAddr)
+ << ", LBR tip = " << format("%#010x\n", LBRLeaf);
+ return false;
+ }
+ return true;
+ }
+
+ void checkStateConsistency() {
+ assert(InstPtr.Address == CurrentLeafFrame->Address &&
+ "IP should align with context leaf");
+ }
+
+ bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
+ uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
+ uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
+ const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
+ bool IsLastLBR() const { return LBRIndex == 0; }
+ bool getLBRStackSize() const { return LBRStack.size(); }
+ void advanceLBR() { LBRIndex++; }
+ ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
+
+ void pushFrame(uint64_t Address) {
+ CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
+ }
+
+ void switchToFrame(uint64_t Address) {
+ if (CurrentLeafFrame->Address == Address)
+ return;
+ CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
+ }
+
+ void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
+
+ void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
+
+ void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
+ ProfiledFrame *Cur = &DummyTrieRoot;
+ for (auto Address : reverse(CallStack)) {
+ Cur = Cur->getOrCreateChildFrame(Address);
+ }
+ CurrentLeafFrame = Cur;
+ }
+
+ ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
+};
+
+// Base class for sample counter key with context
+struct ContextKey {
+ uint64_t HashCode = 0;
+ virtual ~ContextKey() = default;
+ uint64_t getHashCode() {
+ if (HashCode == 0)
+ genHashCode();
+ return HashCode;
+ }
+ virtual void genHashCode() = 0;
+ virtual bool isEqual(const ContextKey *K) const {
+ return HashCode == K->HashCode;
+ };
+
+ // Utilities for LLVM-style RTTI
+ enum ContextKind { CK_StringBased, CK_ProbeBased };
+ const ContextKind Kind;
+ ContextKind getKind() const { return Kind; }
+ ContextKey(ContextKind K) : Kind(K){};
+};
+
+// String based context id
+struct StringBasedCtxKey : public ContextKey {
+ SampleContextFrameVector Context;
+
+ bool WasLeafInlined;
+ StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
+ static bool classof(const ContextKey *K) {
+ return K->getKind() == CK_StringBased;
+ }
+
+ bool isEqual(const ContextKey *K) const override {
+ const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
+ return Context == Other->Context;
+ }
+
+ void genHashCode() override {
+ HashCode = hash_value(SampleContextFrames(Context));
+ }
+};
+
+// Probe based context key as the intermediate key of context
+// String based context key will introduce redundant string handling
+// since the callee context is inferred from the context string which
+// need to be splitted by '@' to get the last location frame, so we
+// can just use probe instead and generate the string in the end.
+struct ProbeBasedCtxKey : public ContextKey {
+ SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
+
+ ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {}
+ static bool classof(const ContextKey *K) {
+ return K->getKind() == CK_ProbeBased;
+ }
+
+ bool isEqual(const ContextKey *K) const override {
+ const ProbeBasedCtxKey *O = dyn_cast<ProbeBasedCtxKey>(K);
+ assert(O != nullptr && "Probe based key shouldn't be null in isEqual");
+ return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(),
+ O->Probes.end());
+ }
+
+ void genHashCode() override {
+ for (const auto *P : Probes) {
+ HashCode = hash_combine(HashCode, P);
+ }
+ if (HashCode == 0) {
+ // Avoid zero value of HashCode when it's an empty list
+ HashCode = 1;
+ }
+ }
+};
+
+// The counter of branch samples for one function indexed by the branch,
+// which is represented as the source and target offset pair.
+using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
+// The counter of range samples for one function indexed by the range,
+// which is represented as the start and end offset pair.
+using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
+// Wrapper for sample counters including range counter and branch counter
+struct SampleCounter {
+ RangeSample RangeCounter;
+ BranchSample BranchCounter;
+
+ void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
+ assert(Start <= End && "Invalid instruction range");
+ RangeCounter[{Start, End}] += Repeat;
+ }
+ void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
+ BranchCounter[{Source, Target}] += Repeat;
+ }
+};
+
+// Sample counter with context to support context-sensitive profile
+using ContextSampleCounterMap =
+ std::unordered_map<Hashable<ContextKey>, SampleCounter,
+ Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
+
+struct FrameStack {
+ SmallVector<uint64_t, 16> Stack;
+ ProfiledBinary *Binary;
+ FrameStack(ProfiledBinary *B) : Binary(B) {}
+ bool pushFrame(UnwindState::ProfiledFrame *Cur) {
+ assert(!Cur->isExternalFrame() &&
+ "External frame's not expected for context stack.");
+ Stack.push_back(Cur->Address);
+ return true;
+ }
+
+ void popFrame() {
+ if (!Stack.empty())
+ Stack.pop_back();
+ }
+ std::shared_ptr<StringBasedCtxKey> getContextKey();
+};
+
+struct ProbeStack {
+ SmallVector<const MCDecodedPseudoProbe *, 16> Stack;
+ ProfiledBinary *Binary;
+ ProbeStack(ProfiledBinary *B) : Binary(B) {}
+ bool pushFrame(UnwindState::ProfiledFrame *Cur) {
+ assert(!Cur->isExternalFrame() &&
+ "External frame's not expected for context stack.");
+ const MCDecodedPseudoProbe *CallProbe =
+ Binary->getCallProbeForAddr(Cur->Address);
+ // We may not find a probe for a merged or external callsite.
+ // Callsite merging may cause the loss of original probe IDs.
+ // Cutting off the context from here since the inliner will
+ // not know how to consume a context with unknown callsites.
+ if (!CallProbe)
+ return false;
+ Stack.push_back(CallProbe);
+ return true;
+ }
+
+ void popFrame() {
+ if (!Stack.empty())
+ Stack.pop_back();
+ }
+ // Use pseudo probe based context key to get the sample counter
+ // A context stands for a call path from 'main' to an uninlined
+ // callee with all inline frames recovered on that path. The probes
+ // belonging to that call path is the probes either originated from
+ // the callee or from any functions inlined into the callee. Since
+ // pseudo probes are organized in a tri-tree style after decoded,
+ // the tree path from the tri-tree root (which is the uninlined
+ // callee) to the probe node forms an inline context.
+ // Here we use a list of probe(pointer) as the context key to speed up
+ // aggregation and the final context string will be generate in
+ // ProfileGenerator
+ std::shared_ptr<ProbeBasedCtxKey> getContextKey();
+};
+
+/*
+As in hybrid sample we have a group of LBRs and the most recent sampling call
+stack, we can walk through those LBRs to infer more call stacks which would be
+used as context for profile. VirtualUnwinder is the class to do the call stack
+unwinding based on LBR state. Two types of unwinding are processd here:
+1) LBR unwinding and 2) linear range unwinding.
+Specifically, for each LBR entry(can be classified into call, return, regular
+branch), LBR unwinding will replay the operation by pushing, popping or
+switching leaf frame towards the call stack and since the initial call stack
+is most recently sampled, the replay should be in anti-execution order, i.e. for
+the regular case, pop the call stack when LBR is call, push frame on call stack
+when LBR is return. After each LBR processed, it also needs to align with the
+next LBR by going through instructions from previous LBR's target to current
+LBR's source, which is the linear unwinding. As instruction from linear range
+can come from different function by inlining, linear unwinding will do the range
+splitting and record counters by the range with same inline context. Over those
+unwinding process we will record each call stack as context id and LBR/linear
+range as sample counter for further CS profile generation.
+*/
+class VirtualUnwinder {
+public:
+ VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
+ : CtxCounterMap(Counter), Binary(B) {}
+ bool unwind(const PerfSample *Sample, uint64_t Repeat);
+ std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
+
+ uint64_t NumTotalBranches = 0;
+ uint64_t NumExtCallBranch = 0;
+ uint64_t NumMissingExternalFrame = 0;
+ uint64_t NumMismatchedProEpiBranch = 0;
+ uint64_t NumMismatchedExtCallBranch = 0;
+
+private:
+ bool isCallState(UnwindState &State) const {
+ // The tail call frame is always missing here in stack sample, we will
+ // use a specific tail call tracker to infer it.
+ return Binary->addressIsCall(State.getCurrentLBRSource());
+ }
+
+ bool isReturnState(UnwindState &State) const {
+ // Simply check addressIsReturn, as ret is always reliable, both for
+ // regular call and tail call.
+ if (!Binary->addressIsReturn(State.getCurrentLBRSource()))
+ return false;
+
+ // In a callback case, a return from internal code, say A, to external
+ // runtime can happen. The external runtime can then call back to
+ // another internal routine, say B. Making an artificial branch that
+ // looks like a return from A to B can confuse the unwinder to treat
+ // the instruction before B as the call instruction. Here we detect this
+ // case if the return target is not the next inst of call inst, then we just
+ // do not treat it as a return.
+ uint64_t CallAddr =
+ Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget());
+ return (CallAddr != 0);
+ }
+
+ void unwindCall(UnwindState &State);
+ void unwindLinear(UnwindState &State, uint64_t Repeat);
+ void unwindReturn(UnwindState &State);
+ void unwindBranch(UnwindState &State);
+
+ template <typename T>
+ void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
+ // Collect each samples on trie node by DFS traversal
+ template <typename T>
+ void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
+ void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
+
+ void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
+ uint64_t Repeat);
+ void recordBranchCount(const LBREntry &Branch, UnwindState &State,
+ uint64_t Repeat);
+
+ ContextSampleCounterMap *CtxCounterMap;
+ // Profiled binary that current frame address belongs to
+ ProfiledBinary *Binary;
+ // Keep track of all untracked callsites
+ std::set<uint64_t> UntrackedCallsites;
+};
+
+// Read perf trace to parse the events and samples.
+class PerfReaderBase {
+public:
+ PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
+ : Binary(B), PerfTraceFile(PerfTrace) {
+ // Initialize the base address to preferred address.
+ Binary->setBaseAddress(Binary->getPreferredBaseAddress());
+ };
+ virtual ~PerfReaderBase() = default;
+ static std::unique_ptr<PerfReaderBase> create(ProfiledBinary *Binary,
+ PerfInputFile &PerfInput);
+
+ // Entry of the reader to parse multiple perf traces
+ virtual void parsePerfTraces() = 0;
+ const ContextSampleCounterMap &getSampleCounters() const {
+ return SampleCounters;
+ }
+ bool profileIsCSFlat() { return ProfileIsCSFlat; }
+
+protected:
+ ProfiledBinary *Binary = nullptr;
+ StringRef PerfTraceFile;
+
+ ContextSampleCounterMap SampleCounters;
+ bool ProfileIsCSFlat = false;
+
+ uint64_t NumTotalSample = 0;
+ uint64_t NumLeafExternalFrame = 0;
+ uint64_t NumLeadingOutgoingLBR = 0;
+};
+
+// Read perf script to parse the events and samples.
+class PerfScriptReader : public PerfReaderBase {
+public:
+ PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace)
+ : PerfReaderBase(B, PerfTrace){};
+
+ // Entry of the reader to parse multiple perf traces
+ virtual void parsePerfTraces() override;
+ // Generate perf script from perf data
+ static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
+ PerfInputFile &File);
+ // Extract perf script type by peaking at the input
+ static PerfContent checkPerfScriptType(StringRef FileName);
+
+protected:
+ // The parsed MMap event
+ struct MMapEvent {
+ uint64_t PID = 0;
+ uint64_t Address = 0;
+ uint64_t Size = 0;
+ uint64_t Offset = 0;
+ StringRef BinaryPath;
+ };
+
+ // Check whether a given line is LBR sample
+ static bool isLBRSample(StringRef Line);
+ // Check whether a given line is MMAP event
+ static bool isMMap2Event(StringRef Line);
+ // Parse a single line of a PERF_RECORD_MMAP2 event looking for a
+ // mapping between the binary name and its memory layout.
+ static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
+ MMapEvent &MMap);
+ // Update base address based on mmap events
+ void updateBinaryAddress(const MMapEvent &Event);
+ // Parse mmap event and update binary address
+ void parseMMap2Event(TraceStream &TraceIt);
+ // Parse perf events/samples and do aggregation
+ void parseAndAggregateTrace();
+ // Parse either an MMAP event or a perf sample
+ void parseEventOrSample(TraceStream &TraceIt);
+ // Warn if the relevant mmap event is missing.
+ void warnIfMissingMMap();
+ // Emit accumulate warnings.
+ void warnTruncatedStack();
+ // Warn if range is invalid.
+ void warnInvalidRange();
+ // Extract call stack from the perf trace lines
+ bool extractCallstack(TraceStream &TraceIt,
+ SmallVectorImpl<uint64_t> &CallStack);
+ // Extract LBR stack from one perf trace line
+ bool extractLBRStack(TraceStream &TraceIt,
+ SmallVectorImpl<LBREntry> &LBRStack);
+ uint64_t parseAggregatedCount(TraceStream &TraceIt);
+ // Parse one sample from multiple perf lines, override this for different
+ // sample type
+ void parseSample(TraceStream &TraceIt);
+ // An aggregated count is given to indicate how many times the sample is
+ // repeated.
+ virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
+ void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
+ // Post process the profile after trace aggregation, we will do simple range
+ // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
+ virtual void generateUnsymbolizedProfile();
+ void writeUnsymbolizedProfile(StringRef Filename);
+ void writeUnsymbolizedProfile(raw_fd_ostream &OS);
+
+ // Samples with the repeating time generated by the perf reader
+ AggregatedCounter AggregatedSamples;
+ // Keep track of all invalid return addresses
+ std::set<uint64_t> InvalidReturnAddresses;
+};
+
+/*
+ The reader of LBR only perf script.
+ A typical LBR sample is like:
+ 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
+ ... 0x4005c8/0x4005dc/P/-/-/0
+*/
+class LBRPerfReader : public PerfScriptReader {
+public:
+ LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace)
+ : PerfScriptReader(Binary, PerfTrace){};
+ // Parse the LBR only sample.
+ virtual void parseSample(TraceStream &TraceIt, uint64_t Count) override;
+};
+
+/*
+ Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
+ which is used to generate CS profile. An example of hybrid sample:
+ 4005dc # call stack leaf
+ 400634
+ 400684 # call stack root
+ 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
+ ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
+*/
+class HybridPerfReader : public PerfScriptReader {
+public:
+ HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace)
+ : PerfScriptReader(Binary, PerfTrace){};
+ // Parse the hybrid sample including the call and LBR line
+ void parseSample(TraceStream &TraceIt, uint64_t Count) override;
+ void generateUnsymbolizedProfile() override;
+
+private:
+ // Unwind the hybrid samples after aggregration
+ void unwindSamples();
+};
+
+/*
+ Format of unsymbolized profile:
+
+ [frame1 @ frame2 @ ...] # If it's a CS profile
+ number of entries in RangeCounter
+ from_1-to_1:count_1
+ from_2-to_2:count_2
+ ......
+ from_n-to_n:count_n
+ number of entries in BranchCounter
+ src_1->dst_1:count_1
+ src_2->dst_2:count_2
+ ......
+ src_n->dst_n:count_n
+ [frame1 @ frame2 @ ...] # Next context
+ ......
+
+Note that non-CS profile doesn't have the empty `[]` context.
+*/
+class UnsymbolizedProfileReader : public PerfReaderBase {
+public:
+ UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
+ : PerfReaderBase(Binary, PerfTrace){};
+ void parsePerfTraces() override;
+
+private:
+ void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
+ void readUnsymbolizedProfile(StringRef Filename);
+
+ std::unordered_set<std::string> ContextStrSet;
+};
+
+} // end namespace sampleprof
+} // end namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm14/tools/llvm-profgen/ProfileGenerator.cpp b/contrib/libs/llvm14/tools/llvm-profgen/ProfileGenerator.cpp
new file mode 100644
index 0000000000..1248e37dc5
--- /dev/null
+++ b/contrib/libs/llvm14/tools/llvm-profgen/ProfileGenerator.cpp
@@ -0,0 +1,979 @@
+//===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ProfileGenerator.h"
+#include "ErrorHandling.h"
+#include "ProfiledBinary.h"
+#include "llvm/ProfileData/ProfileCommon.h"
+#include <float.h>
+#include <unordered_set>
+
+cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
+ cl::Required,
+ cl::desc("Output profile file"));
+static cl::alias OutputA("o", cl::desc("Alias for --output"),
+ cl::aliasopt(OutputFilename));
+
+static cl::opt<SampleProfileFormat> OutputFormat(
+ "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
+ cl::values(
+ clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
+ clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
+ clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
+ clEnumValN(SPF_Text, "text", "Text encoding"),
+ clEnumValN(SPF_GCC, "gcc",
+ "GCC encoding (only meaningful for -sample)")));
+
+cl::opt<bool> UseMD5(
+ "use-md5", cl::init(false), cl::Hidden,
+ cl::desc("Use md5 to represent function names in the output profile (only "
+ "meaningful for -extbinary)"));
+
+static cl::opt<bool> PopulateProfileSymbolList(
+ "populate-profile-symbol-list", cl::init(false), cl::Hidden,
+ cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
+
+static cl::opt<bool> FillZeroForAllFuncs(
+ "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
+ cl::desc("Attribute all functions' range with zero count "
+ "even it's not hit by any samples."));
+
+static cl::opt<int32_t, true> RecursionCompression(
+ "compress-recursion",
+ cl::desc("Compressing recursion by deduplicating adjacent frame "
+ "sequences up to the specified size. -1 means no size limit."),
+ cl::Hidden,
+ cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
+
+static cl::opt<bool>
+ TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore,
+ cl::desc("If the total count of the profile is smaller "
+ "than threshold, it will be trimmed."));
+
+static cl::opt<bool> CSProfMergeColdContext(
+ "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
+ cl::desc("If the total count of context profile is smaller than "
+ "the threshold, it will be merged into context-less base "
+ "profile."));
+
+static cl::opt<uint32_t> CSProfMaxColdContextDepth(
+ "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
+ cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
+ "context-less base profile"));
+
+static cl::opt<int, true> CSProfMaxContextDepth(
+ "csprof-max-context-depth", cl::ZeroOrMore,
+ cl::desc("Keep the last K contexts while merging profile. -1 means no "
+ "depth limit."),
+ cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
+
+static cl::opt<double> HotFunctionDensityThreshold(
+ "hot-function-density-threshold", llvm::cl::init(1000),
+ llvm::cl::desc(
+ "specify density threshold for hot functions (default: 1000)"),
+ llvm::cl::Optional);
+static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
+ llvm::cl::desc("show profile density details"),
+ llvm::cl::Optional);
+
+static cl::opt<bool> UpdateTotalSamples(
+ "update-total-samples", llvm::cl::init(false),
+ llvm::cl::desc(
+ "Update total samples by accumulating all its body samples."),
+ llvm::cl::Optional);
+
+extern cl::opt<int> ProfileSummaryCutoffHot;
+
+static cl::opt<bool> GenCSNestedProfile(
+ "gen-cs-nested-profile", cl::Hidden, cl::init(false),
+ cl::desc("Generate nested function profiles for CSSPGO"));
+
+using namespace llvm;
+using namespace sampleprof;
+
+namespace llvm {
+namespace sampleprof {
+
+// Initialize the MaxCompressionSize to -1 which means no size limit
+int32_t CSProfileGenerator::MaxCompressionSize = -1;
+
+int CSProfileGenerator::MaxContextDepth = -1;
+
+bool ProfileGeneratorBase::UseFSDiscriminator = false;
+
+std::unique_ptr<ProfileGeneratorBase>
+ProfileGeneratorBase::create(ProfiledBinary *Binary,
+ const ContextSampleCounterMap &SampleCounters,
+ bool ProfileIsCSFlat) {
+ std::unique_ptr<ProfileGeneratorBase> Generator;
+ if (ProfileIsCSFlat) {
+ if (Binary->useFSDiscriminator())
+ exitWithError("FS discriminator is not supported in CS profile.");
+ Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
+ } else {
+ Generator.reset(new ProfileGenerator(Binary, SampleCounters));
+ }
+ ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
+ FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
+
+ return Generator;
+}
+
+void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
+ SampleProfileMap &ProfileMap) {
+ // Populate profile symbol list if extended binary format is used.
+ ProfileSymbolList SymbolList;
+
+ if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
+ Binary->populateSymbolListFromDWARF(SymbolList);
+ Writer->setProfileSymbolList(&SymbolList);
+ }
+
+ if (std::error_code EC = Writer->write(ProfileMap))
+ exitWithError(std::move(EC));
+}
+
+void ProfileGeneratorBase::write() {
+ auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
+ if (std::error_code EC = WriterOrErr.getError())
+ exitWithError(EC, OutputFilename);
+
+ if (UseMD5) {
+ if (OutputFormat != SPF_Ext_Binary)
+ WithColor::warning() << "-use-md5 is ignored. Specify "
+ "--format=extbinary to enable it\n";
+ else
+ WriterOrErr.get()->setUseMD5();
+ }
+
+ write(std::move(WriterOrErr.get()), ProfileMap);
+}
+
+void ProfileGeneratorBase::showDensitySuggestion(double Density) {
+ if (Density == 0.0)
+ WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
+ "set too low. Please check your command.\n";
+ else if (Density < HotFunctionDensityThreshold)
+ WithColor::warning()
+ << "AutoFDO is estimated to optimize better with "
+ << format("%.1f", HotFunctionDensityThreshold / Density)
+ << "x more samples. Please consider increasing sampling rate or "
+ "profiling for longer duration to get more samples.\n";
+
+ if (ShowDensity)
+ outs() << "Minimum profile density for hot functions with top "
+ << format("%.2f",
+ static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
+ 10000)
+ << "% total samples: " << format("%.1f", Density) << "\n";
+}
+
+double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
+ uint64_t HotCntThreshold) {
+ double Density = DBL_MAX;
+ std::vector<const FunctionSamples *> HotFuncs;
+ for (auto &I : Profiles) {
+ auto &FuncSamples = I.second;
+ if (FuncSamples.getTotalSamples() < HotCntThreshold)
+ continue;
+ HotFuncs.emplace_back(&FuncSamples);
+ }
+
+ for (auto *FuncSamples : HotFuncs) {
+ auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
+ if (!Func)
+ continue;
+ uint64_t FuncSize = Func->getFuncSize();
+ if (FuncSize == 0)
+ continue;
+ Density =
+ std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
+ FuncSize);
+ }
+
+ return Density == DBL_MAX ? 0.0 : Density;
+}
+
+void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
+ const RangeSample &Ranges) {
+
+ /*
+ Regions may overlap with each other. Using the boundary info, find all
+ disjoint ranges and their sample count. BoundaryPoint contains the count
+ multiple samples begin/end at this points.
+
+ |<--100-->| Sample1
+ |<------200------>| Sample2
+ A B C
+
+ In the example above,
+ Sample1 begins at A, ends at B, its value is 100.
+ Sample2 beings at A, ends at C, its value is 200.
+ For A, BeginCount is the sum of sample begins at A, which is 300 and no
+ samples ends at A, so EndCount is 0.
+ Then boundary points A, B, and C with begin/end counts are:
+ A: (300, 0)
+ B: (0, 100)
+ C: (0, 200)
+ */
+ struct BoundaryPoint {
+ // Sum of sample counts beginning at this point
+ uint64_t BeginCount = UINT64_MAX;
+ // Sum of sample counts ending at this point
+ uint64_t EndCount = UINT64_MAX;
+ // Is the begin point of a zero range.
+ bool IsZeroRangeBegin = false;
+ // Is the end point of a zero range.
+ bool IsZeroRangeEnd = false;
+
+ void addBeginCount(uint64_t Count) {
+ if (BeginCount == UINT64_MAX)
+ BeginCount = 0;
+ BeginCount += Count;
+ }
+
+ void addEndCount(uint64_t Count) {
+ if (EndCount == UINT64_MAX)
+ EndCount = 0;
+ EndCount += Count;
+ }
+ };
+
+ /*
+ For the above example. With boundary points, follwing logic finds two
+ disjoint region of
+
+ [A,B]: 300
+ [B+1,C]: 200
+
+ If there is a boundary point that both begin and end, the point itself
+ becomes a separate disjoint region. For example, if we have original
+ ranges of
+
+ |<--- 100 --->|
+ |<--- 200 --->|
+ A B C
+
+ there are three boundary points with their begin/end counts of
+
+ A: (100, 0)
+ B: (200, 100)
+ C: (0, 200)
+
+ the disjoint ranges would be
+
+ [A, B-1]: 100
+ [B, B]: 300
+ [B+1, C]: 200.
+
+ Example for zero value range:
+
+ |<--- 100 --->|
+ |<--- 200 --->|
+ |<--------------- 0 ----------------->|
+ A B C D E F
+
+ [A, B-1] : 0
+ [B, C] : 100
+ [C+1, D-1]: 0
+ [D, E] : 200
+ [E+1, F] : 0
+ */
+ std::map<uint64_t, BoundaryPoint> Boundaries;
+
+ for (const auto &Item : Ranges) {
+ assert(Item.first.first <= Item.first.second &&
+ "Invalid instruction range");
+ auto &BeginPoint = Boundaries[Item.first.first];
+ auto &EndPoint = Boundaries[Item.first.second];
+ uint64_t Count = Item.second;
+
+ BeginPoint.addBeginCount(Count);
+ EndPoint.addEndCount(Count);
+ if (Count == 0) {
+ BeginPoint.IsZeroRangeBegin = true;
+ EndPoint.IsZeroRangeEnd = true;
+ }
+ }
+
+ // Use UINT64_MAX to indicate there is no existing range between BeginAddress
+ // and the next valid address
+ uint64_t BeginAddress = UINT64_MAX;
+ int ZeroRangeDepth = 0;
+ uint64_t Count = 0;
+ for (const auto &Item : Boundaries) {
+ uint64_t Address = Item.first;
+ const BoundaryPoint &Point = Item.second;
+ if (Point.BeginCount != UINT64_MAX) {
+ if (BeginAddress != UINT64_MAX)
+ DisjointRanges[{BeginAddress, Address - 1}] = Count;
+ Count += Point.BeginCount;
+ BeginAddress = Address;
+ ZeroRangeDepth += Point.IsZeroRangeBegin;
+ }
+ if (Point.EndCount != UINT64_MAX) {
+ assert((BeginAddress != UINT64_MAX) &&
+ "First boundary point cannot be 'end' point");
+ DisjointRanges[{BeginAddress, Address}] = Count;
+ assert(Count >= Point.EndCount && "Mismatched live ranges");
+ Count -= Point.EndCount;
+ BeginAddress = Address + 1;
+ ZeroRangeDepth -= Point.IsZeroRangeEnd;
+ // If the remaining count is zero and it's no longer in a zero range, this
+ // means we consume all the ranges before, thus mark BeginAddress as
+ // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
+ // [<---- 10 ---->]
+ // [<---- 20 ---->]
+ // A B C D
+ // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
+ // have the [B+1, C-1] zero range.
+ if (Count == 0 && ZeroRangeDepth == 0)
+ BeginAddress = UINT64_MAX;
+ }
+ }
+}
+
+void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
+ FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
+ uint64_t Count) {
+ // Use the maximum count of samples with same line location
+ uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
+
+ // Use duplication factor to compensated for loop unroll/vectorization.
+ // Note that this is only needed when we're taking MAX of the counts at
+ // the location instead of SUM.
+ Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
+
+ ErrorOr<uint64_t> R =
+ FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
+
+ uint64_t PreviousCount = R ? R.get() : 0;
+ if (PreviousCount <= Count) {
+ FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
+ Count - PreviousCount);
+ }
+}
+
+void ProfileGeneratorBase::updateTotalSamples() {
+ if (!UpdateTotalSamples)
+ return;
+
+ for (auto &Item : ProfileMap) {
+ FunctionSamples &FunctionProfile = Item.second;
+ FunctionProfile.updateTotalSamples();
+ }
+}
+
+FunctionSamples &
+ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
+ SampleContext Context(FuncName);
+ auto Ret = ProfileMap.emplace(Context, FunctionSamples());
+ if (Ret.second) {
+ FunctionSamples &FProfile = Ret.first->second;
+ FProfile.setContext(Context);
+ }
+ return Ret.first->second;
+}
+
+void ProfileGenerator::generateProfile() {
+ if (Binary->usePseudoProbes()) {
+ // TODO: Support probe based profile generation
+ exitWithError("Probe based profile generation not supported for AutoFDO, "
+ "consider dropping `--ignore-stack-samples` or adding `--use-dwarf-correlation`.");
+ } else {
+ generateLineNumBasedProfile();
+ }
+ postProcessProfiles();
+}
+
+void ProfileGenerator::postProcessProfiles() {
+ computeSummaryAndThreshold();
+ trimColdProfiles(ProfileMap, ColdCountThreshold);
+ calculateAndShowDensity(ProfileMap);
+}
+
+void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
+ uint64_t ColdCntThreshold) {
+ if (!TrimColdProfile)
+ return;
+
+ // Move cold profiles into a tmp container.
+ std::vector<SampleContext> ColdProfiles;
+ for (const auto &I : ProfileMap) {
+ if (I.second.getTotalSamples() < ColdCntThreshold)
+ ColdProfiles.emplace_back(I.first);
+ }
+
+ // Remove the cold profile from ProfileMap.
+ for (const auto &I : ColdProfiles)
+ ProfileMap.erase(I);
+}
+
+void ProfileGenerator::generateLineNumBasedProfile() {
+ assert(SampleCounters.size() == 1 &&
+ "Must have one entry for profile generation.");
+ const SampleCounter &SC = SampleCounters.begin()->second;
+ // Fill in function body samples
+ populateBodySamplesForAllFunctions(SC.RangeCounter);
+ // Fill in boundary sample counts as well as call site samples for calls
+ populateBoundarySamplesForAllFunctions(SC.BranchCounter);
+
+ updateTotalSamples();
+}
+
+FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
+ const SampleContextFrameVector &FrameVec, uint64_t Count) {
+ // Get top level profile
+ FunctionSamples *FunctionProfile =
+ &getTopLevelFunctionProfile(FrameVec[0].FuncName);
+ FunctionProfile->addTotalSamples(Count);
+
+ for (size_t I = 1; I < FrameVec.size(); I++) {
+ LineLocation Callsite(
+ FrameVec[I - 1].Location.LineOffset,
+ getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
+ FunctionSamplesMap &SamplesMap =
+ FunctionProfile->functionSamplesAt(Callsite);
+ auto Ret =
+ SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
+ if (Ret.second) {
+ SampleContext Context(FrameVec[I].FuncName);
+ Ret.first->second.setContext(Context);
+ }
+ FunctionProfile = &Ret.first->second;
+ FunctionProfile->addTotalSamples(Count);
+ }
+
+ return *FunctionProfile;
+}
+
+RangeSample
+ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
+ RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
+ if (FillZeroForAllFuncs) {
+ for (auto &FuncI : Binary->getAllBinaryFunctions()) {
+ for (auto &R : FuncI.second.Ranges) {
+ Ranges[{R.first, R.second - 1}] += 0;
+ }
+ }
+ } else {
+ // For each range, we search for all ranges of the function it belongs to
+ // and initialize it with zero count, so it remains zero if doesn't hit any
+ // samples. This is to be consistent with compiler that interpret zero count
+ // as unexecuted(cold).
+ for (const auto &I : RangeCounter) {
+ uint64_t StartOffset = I.first.first;
+ for (const auto &Range : Binary->getRangesForOffset(StartOffset))
+ Ranges[{Range.first, Range.second - 1}] += 0;
+ }
+ }
+ RangeSample DisjointRanges;
+ findDisjointRanges(DisjointRanges, Ranges);
+ return DisjointRanges;
+}
+
+void ProfileGenerator::populateBodySamplesForAllFunctions(
+ const RangeSample &RangeCounter) {
+ for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
+ uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
+ uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
+ uint64_t Count = Range.second;
+
+ InstructionPointer IP(Binary, RangeBegin, true);
+ // Disjoint ranges may have range in the middle of two instr,
+ // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
+ // can be Addr1+1 to Addr2-1. We should ignore such range.
+ if (IP.Address > RangeEnd)
+ continue;
+
+ do {
+ uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
+ const SampleContextFrameVector &FrameVec =
+ Binary->getFrameLocationStack(Offset);
+ if (!FrameVec.empty()) {
+ // FIXME: As accumulating total count per instruction caused some
+ // regression, we changed to accumulate total count per byte as a
+ // workaround. Tuning hotness threshold on the compiler side might be
+ // necessary in the future.
+ FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
+ FrameVec, Count * Binary->getInstSize(Offset));
+ updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
+ Count);
+ }
+ } while (IP.advance() && IP.Address <= RangeEnd);
+ }
+}
+
+StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
+ // Get the function range by branch target if it's a call branch.
+ auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset);
+
+ // We won't accumulate sample count for a range whose start is not the real
+ // function entry such as outlined function or inner labels.
+ if (!FRange || !FRange->IsFuncEntry)
+ return StringRef();
+
+ return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
+}
+
+void ProfileGenerator::populateBoundarySamplesForAllFunctions(
+ const BranchSample &BranchCounters) {
+ for (const auto &Entry : BranchCounters) {
+ uint64_t SourceOffset = Entry.first.first;
+ uint64_t TargetOffset = Entry.first.second;
+ uint64_t Count = Entry.second;
+ assert(Count != 0 && "Unexpected zero weight branch");
+
+ StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
+ if (CalleeName.size() == 0)
+ continue;
+ // Record called target sample and its count.
+ const SampleContextFrameVector &FrameVec =
+ Binary->getFrameLocationStack(SourceOffset);
+ if (!FrameVec.empty()) {
+ FunctionSamples &FunctionProfile =
+ getLeafProfileAndAddTotalSamples(FrameVec, 0);
+ FunctionProfile.addCalledTargetSamples(
+ FrameVec.back().Location.LineOffset,
+ getBaseDiscriminator(FrameVec.back().Location.Discriminator),
+ CalleeName, Count);
+ }
+ // Add head samples for callee.
+ FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
+ CalleeProfile.addHeadSamples(Count);
+ }
+}
+
+void ProfileGeneratorBase::calculateAndShowDensity(
+ const SampleProfileMap &Profiles) {
+ double Density = calculateDensity(Profiles, HotCountThreshold);
+ showDensitySuggestion(Density);
+}
+
+FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
+ const SampleContextFrameVector &Context, bool WasLeafInlined) {
+ auto I = ProfileMap.find(SampleContext(Context));
+ if (I == ProfileMap.end()) {
+ // Save the new context for future references.
+ SampleContextFrames NewContext = *Contexts.insert(Context).first;
+ SampleContext FContext(NewContext, RawContext);
+ auto Ret = ProfileMap.emplace(FContext, FunctionSamples());
+ if (WasLeafInlined)
+ FContext.setAttribute(ContextWasInlined);
+ FunctionSamples &FProfile = Ret.first->second;
+ FProfile.setContext(FContext);
+ return Ret.first->second;
+ }
+ return I->second;
+}
+
+void CSProfileGenerator::generateProfile() {
+ FunctionSamples::ProfileIsCSFlat = true;
+
+ if (Binary->getTrackFuncContextSize())
+ computeSizeForProfiledFunctions();
+
+ if (Binary->usePseudoProbes()) {
+ // Enable pseudo probe functionalities in SampleProf
+ FunctionSamples::ProfileIsProbeBased = true;
+ generateProbeBasedProfile();
+ } else {
+ generateLineNumBasedProfile();
+ }
+ postProcessProfiles();
+}
+
+void CSProfileGenerator::computeSizeForProfiledFunctions() {
+ // Hash map to deduplicate the function range and the item is a pair of
+ // function start and end offset.
+ std::unordered_map<uint64_t, uint64_t> AggregatedRanges;
+ // Go through all the ranges in the CS counters, use the start of the range to
+ // look up the function it belongs and record the function range.
+ for (const auto &CI : SampleCounters) {
+ for (const auto &Item : CI.second.RangeCounter) {
+ // FIXME: Filter the bogus crossing function range.
+ uint64_t StartOffset = Item.first.first;
+ // Note that a function can be spilt into multiple ranges, so get all
+ // ranges of the function.
+ for (const auto &Range : Binary->getRangesForOffset(StartOffset))
+ AggregatedRanges[Range.first] = Range.second;
+ }
+ }
+
+ for (const auto &I : AggregatedRanges) {
+ uint64_t StartOffset = I.first;
+ uint64_t EndOffset = I.second;
+ Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset);
+ }
+}
+
+void CSProfileGenerator::generateLineNumBasedProfile() {
+ for (const auto &CI : SampleCounters) {
+ const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
+
+ // Get or create function profile for the range
+ FunctionSamples &FunctionProfile =
+ getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined);
+
+ // Fill in function body samples
+ populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter);
+ // Fill in boundary sample counts as well as call site samples for calls
+ populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile,
+ CI.second.BranchCounter);
+ }
+ // Fill in call site value sample for inlined calls and also use context to
+ // infer missing samples. Since we don't have call count for inlined
+ // functions, we estimate it from inlinee's profile using the entry of the
+ // body sample.
+ populateInferredFunctionSamples();
+
+ updateTotalSamples();
+}
+
+void CSProfileGenerator::populateBodySamplesForFunction(
+ FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
+ // Compute disjoint ranges first, so we can use MAX
+ // for calculating count for each location.
+ RangeSample Ranges;
+ findDisjointRanges(Ranges, RangeCounter);
+ for (const auto &Range : Ranges) {
+ uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
+ uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
+ uint64_t Count = Range.second;
+ // Disjoint ranges have introduce zero-filled gap that
+ // doesn't belong to current context, filter them out.
+ if (Count == 0)
+ continue;
+
+ InstructionPointer IP(Binary, RangeBegin, true);
+ // Disjoint ranges may have range in the middle of two instr,
+ // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
+ // can be Addr1+1 to Addr2-1. We should ignore such range.
+ if (IP.Address > RangeEnd)
+ continue;
+
+ do {
+ uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
+ auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
+ if (LeafLoc.hasValue()) {
+ // Recording body sample for this specific context
+ updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
+ FunctionProfile.addTotalSamples(Count);
+ }
+ } while (IP.advance() && IP.Address <= RangeEnd);
+ }
+}
+
+void CSProfileGenerator::populateBoundarySamplesForFunction(
+ SampleContextFrames ContextId, FunctionSamples &FunctionProfile,
+ const BranchSample &BranchCounters) {
+
+ for (const auto &Entry : BranchCounters) {
+ uint64_t SourceOffset = Entry.first.first;
+ uint64_t TargetOffset = Entry.first.second;
+ uint64_t Count = Entry.second;
+ assert(Count != 0 && "Unexpected zero weight branch");
+
+ StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
+ if (CalleeName.size() == 0)
+ continue;
+
+ // Record called target sample and its count
+ auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
+ if (!LeafLoc.hasValue())
+ continue;
+ FunctionProfile.addCalledTargetSamples(
+ LeafLoc->Location.LineOffset,
+ getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
+ Count);
+
+ // Record head sample for called target(callee)
+ SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end());
+ assert(CalleeCtx.back().FuncName == LeafLoc->FuncName &&
+ "Leaf function name doesn't match");
+ CalleeCtx.back() = *LeafLoc;
+ CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0));
+ FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx);
+ CalleeProfile.addHeadSamples(Count);
+ }
+}
+
+static SampleContextFrame
+getCallerContext(SampleContextFrames CalleeContext,
+ SampleContextFrameVector &CallerContext) {
+ assert(CalleeContext.size() > 1 && "Unexpected empty context");
+ CalleeContext = CalleeContext.drop_back();
+ CallerContext.assign(CalleeContext.begin(), CalleeContext.end());
+ SampleContextFrame CallerFrame = CallerContext.back();
+ CallerContext.back().Location = LineLocation(0, 0);
+ return CallerFrame;
+}
+
+void CSProfileGenerator::populateInferredFunctionSamples() {
+ for (const auto &Item : ProfileMap) {
+ const auto &CalleeContext = Item.first;
+ const FunctionSamples &CalleeProfile = Item.second;
+
+ // If we already have head sample counts, we must have value profile
+ // for call sites added already. Skip to avoid double counting.
+ if (CalleeProfile.getHeadSamples())
+ continue;
+ // If we don't have context, nothing to do for caller's call site.
+ // This could happen for entry point function.
+ if (CalleeContext.isBaseContext())
+ continue;
+
+ // Infer Caller's frame loc and context ID through string splitting
+ SampleContextFrameVector CallerContextId;
+ SampleContextFrame &&CallerLeafFrameLoc =
+ getCallerContext(CalleeContext.getContextFrames(), CallerContextId);
+ SampleContextFrames CallerContext(CallerContextId);
+
+ // It's possible that we haven't seen any sample directly in the caller,
+ // in which case CallerProfile will not exist. But we can't modify
+ // ProfileMap while iterating it.
+ // TODO: created function profile for those callers too
+ if (ProfileMap.find(CallerContext) == ProfileMap.end())
+ continue;
+ FunctionSamples &CallerProfile = ProfileMap[CallerContext];
+
+ // Since we don't have call count for inlined functions, we
+ // estimate it from inlinee's profile using entry body sample.
+ uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
+ // If we don't have samples with location, use 1 to indicate live.
+ if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
+ EstimatedCallCount = 1;
+ CallerProfile.addCalledTargetSamples(
+ CallerLeafFrameLoc.Location.LineOffset,
+ CallerLeafFrameLoc.Location.Discriminator,
+ CalleeProfile.getContext().getName(), EstimatedCallCount);
+ CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset,
+ CallerLeafFrameLoc.Location.Discriminator,
+ EstimatedCallCount);
+ CallerProfile.addTotalSamples(EstimatedCallCount);
+ }
+}
+
+void CSProfileGenerator::postProcessProfiles() {
+ // Compute hot/cold threshold based on profile. This will be used for cold
+ // context profile merging/trimming.
+ computeSummaryAndThreshold();
+
+ // Run global pre-inliner to adjust/merge context profile based on estimated
+ // inline decisions.
+ if (EnableCSPreInliner) {
+ CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold)
+ .run();
+ // Turn off the profile merger by default unless it is explicitly enabled.
+ if (!CSProfMergeColdContext.getNumOccurrences())
+ CSProfMergeColdContext = false;
+ }
+
+ // Trim and merge cold context profile using cold threshold above.
+ if (TrimColdProfile || CSProfMergeColdContext) {
+ SampleContextTrimmer(ProfileMap)
+ .trimAndMergeColdContextProfiles(
+ HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
+ CSProfMaxColdContextDepth, EnableCSPreInliner);
+ }
+
+ // Merge function samples of CS profile to calculate profile density.
+ sampleprof::SampleProfileMap ContextLessProfiles;
+ for (const auto &I : ProfileMap) {
+ ContextLessProfiles[I.second.getName()].merge(I.second);
+ }
+
+ calculateAndShowDensity(ContextLessProfiles);
+ if (GenCSNestedProfile) {
+ CSProfileConverter CSConverter(ProfileMap);
+ CSConverter.convertProfiles();
+ FunctionSamples::ProfileIsCSFlat = false;
+ FunctionSamples::ProfileIsCSNested = EnableCSPreInliner;
+ }
+}
+
+void ProfileGeneratorBase::computeSummaryAndThreshold() {
+ SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
+ auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
+ HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
+ (Summary->getDetailedSummary()));
+ ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
+ (Summary->getDetailedSummary()));
+}
+
+// Helper function to extract context prefix string stack
+// Extract context stack for reusing, leaf context stack will
+// be added compressed while looking up function profile
+static void extractPrefixContextStack(
+ SampleContextFrameVector &ContextStack,
+ const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
+ ProfiledBinary *Binary) {
+ for (const auto *P : Probes) {
+ Binary->getInlineContextForProbe(P, ContextStack, true);
+ }
+}
+
+void CSProfileGenerator::generateProbeBasedProfile() {
+ for (const auto &CI : SampleCounters) {
+ const ProbeBasedCtxKey *CtxKey =
+ dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
+ SampleContextFrameVector ContextStack;
+ extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
+ // Fill in function body samples from probes, also infer caller's samples
+ // from callee's probe
+ populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
+ // Fill in boundary samples for a call probe
+ populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
+ }
+}
+
+void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
+ ProbeCounterMap &ProbeCounter) {
+ RangeSample Ranges;
+ findDisjointRanges(Ranges, RangeCounter);
+ for (const auto &Range : Ranges) {
+ uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
+ uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
+ uint64_t Count = Range.second;
+ // Disjoint ranges have introduce zero-filled gap that
+ // doesn't belong to current context, filter them out.
+ if (Count == 0)
+ continue;
+
+ InstructionPointer IP(Binary, RangeBegin, true);
+ // Disjoint ranges may have range in the middle of two instr,
+ // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
+ // can be Addr1+1 to Addr2-1. We should ignore such range.
+ if (IP.Address > RangeEnd)
+ continue;
+
+ do {
+ const AddressProbesMap &Address2ProbesMap =
+ Binary->getAddress2ProbesMap();
+ auto It = Address2ProbesMap.find(IP.Address);
+ if (It != Address2ProbesMap.end()) {
+ for (const auto &Probe : It->second) {
+ if (!Probe.isBlock())
+ continue;
+ ProbeCounter[&Probe] += Count;
+ }
+ }
+ } while (IP.advance() && IP.Address <= RangeEnd);
+ }
+}
+
+void CSProfileGenerator::populateBodySamplesWithProbes(
+ const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
+ ProbeCounterMap ProbeCounter;
+ // Extract the top frame probes by looking up each address among the range in
+ // the Address2ProbeMap
+ extractProbesFromRange(RangeCounter, ProbeCounter);
+ std::unordered_map<MCDecodedPseudoProbeInlineTree *,
+ std::unordered_set<FunctionSamples *>>
+ FrameSamples;
+ for (const auto &PI : ProbeCounter) {
+ const MCDecodedPseudoProbe *Probe = PI.first;
+ uint64_t Count = PI.second;
+ FunctionSamples &FunctionProfile =
+ getFunctionProfileForLeafProbe(ContextStack, Probe);
+ // Record the current frame and FunctionProfile whenever samples are
+ // collected for non-danglie probes. This is for reporting all of the
+ // zero count probes of the frame later.
+ FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
+ FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
+ FunctionProfile.addTotalSamples(Count);
+ if (Probe->isEntry()) {
+ FunctionProfile.addHeadSamples(Count);
+ // Look up for the caller's function profile
+ const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
+ SampleContextFrames CalleeContextId =
+ FunctionProfile.getContext().getContextFrames();
+ if (InlinerDesc != nullptr && CalleeContextId.size() > 1) {
+ // Since the context id will be compressed, we have to use callee's
+ // context id to infer caller's context id to ensure they share the
+ // same context prefix.
+ SampleContextFrameVector CallerContextId;
+ SampleContextFrame &&CallerLeafFrameLoc =
+ getCallerContext(CalleeContextId, CallerContextId);
+ uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset;
+ assert(CallerIndex &&
+ "Inferred caller's location index shouldn't be zero!");
+ FunctionSamples &CallerProfile =
+ getFunctionProfileForContext(CallerContextId);
+ CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
+ CallerProfile.addBodySamples(CallerIndex, 0, Count);
+ CallerProfile.addTotalSamples(Count);
+ CallerProfile.addCalledTargetSamples(
+ CallerIndex, 0, FunctionProfile.getContext().getName(), Count);
+ }
+ }
+ }
+
+ // Assign zero count for remaining probes without sample hits to
+ // differentiate from probes optimized away, of which the counts are unknown
+ // and will be inferred by the compiler.
+ for (auto &I : FrameSamples) {
+ for (auto *FunctionProfile : I.second) {
+ for (auto *Probe : I.first->getProbes()) {
+ FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
+ }
+ }
+ }
+}
+
+void CSProfileGenerator::populateBoundarySamplesWithProbes(
+ const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
+ for (const auto &BI : BranchCounter) {
+ uint64_t SourceOffset = BI.first.first;
+ uint64_t TargetOffset = BI.first.second;
+ uint64_t Count = BI.second;
+ uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
+ const MCDecodedPseudoProbe *CallProbe =
+ Binary->getCallProbeForAddr(SourceAddress);
+ if (CallProbe == nullptr)
+ continue;
+ FunctionSamples &FunctionProfile =
+ getFunctionProfileForLeafProbe(ContextStack, CallProbe);
+ FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
+ FunctionProfile.addTotalSamples(Count);
+ StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
+ if (CalleeName.size() == 0)
+ continue;
+ FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
+ Count);
+ }
+}
+
+FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
+ SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
+
+ // Explicitly copy the context for appending the leaf context
+ SampleContextFrameVector NewContextStack(ContextStack.begin(),
+ ContextStack.end());
+ Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
+ // For leaf inlined context with the top frame, we should strip off the top
+ // frame's probe id, like:
+ // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
+ auto LeafFrame = NewContextStack.back();
+ LeafFrame.Location = LineLocation(0, 0);
+ NewContextStack.pop_back();
+ // Compress the context string except for the leaf frame
+ CSProfileGenerator::compressRecursionContext(NewContextStack);
+ CSProfileGenerator::trimContext(NewContextStack);
+ NewContextStack.push_back(LeafFrame);
+
+ const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
+ bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
+ FunctionSamples &FunctionProile =
+ getFunctionProfileForContext(NewContextStack, WasLeafInlined);
+ FunctionProile.setFunctionHash(FuncDesc->FuncHash);
+ return FunctionProile;
+}
+
+} // end namespace sampleprof
+} // end namespace llvm
diff --git a/contrib/libs/llvm14/tools/llvm-profgen/ProfileGenerator.h b/contrib/libs/llvm14/tools/llvm-profgen/ProfileGenerator.h
new file mode 100644
index 0000000000..af349ac991
--- /dev/null
+++ b/contrib/libs/llvm14/tools/llvm-profgen/ProfileGenerator.h
@@ -0,0 +1,312 @@
+//===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
+#define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
+#include "CSPreInliner.h"
+#include "ErrorHandling.h"
+#include "PerfReader.h"
+#include "ProfiledBinary.h"
+#include "llvm/ProfileData/SampleProfWriter.h"
+#include <memory>
+#include <unordered_set>
+
+using namespace llvm;
+using namespace sampleprof;
+
+namespace llvm {
+namespace sampleprof {
+
+// This base class for profile generation of sample-based PGO. We reuse all
+// structures relating to function profiles and profile writers as seen in
+// /ProfileData/SampleProf.h.
+class ProfileGeneratorBase {
+
+public:
+ ProfileGeneratorBase(ProfiledBinary *Binary,
+ const ContextSampleCounterMap &Counters)
+ : Binary(Binary), SampleCounters(Counters){};
+ virtual ~ProfileGeneratorBase() = default;
+ static std::unique_ptr<ProfileGeneratorBase>
+ create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters,
+ bool ProfileIsCSFlat);
+ virtual void generateProfile() = 0;
+ void write();
+
+ static uint32_t
+ getDuplicationFactor(unsigned Discriminator,
+ bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
+ return UseFSD ? 1
+ : llvm::DILocation::getDuplicationFactorFromDiscriminator(
+ Discriminator);
+ }
+
+ static uint32_t
+ getBaseDiscriminator(unsigned Discriminator,
+ bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
+ return UseFSD ? Discriminator
+ : DILocation::getBaseDiscriminatorFromDiscriminator(
+ Discriminator, /* IsFSDiscriminator */ false);
+ }
+
+ static bool UseFSDiscriminator;
+
+protected:
+ // Use SampleProfileWriter to serialize profile map
+ void write(std::unique_ptr<SampleProfileWriter> Writer,
+ SampleProfileMap &ProfileMap);
+ /*
+ For each region boundary point, mark if it is begin or end (or both) of
+ the region. Boundary points are inclusive. Log the sample count as well
+ so we can use it when we compute the sample count of each disjoint region
+ later. Note that there might be multiple ranges with different sample
+ count that share same begin/end point. We need to accumulate the sample
+ count for the boundary point for such case, because for the example
+ below,
+
+ |<--100-->|
+ |<------200------>|
+ A B C
+
+ sample count for disjoint region [A,B] would be 300.
+ */
+ void findDisjointRanges(RangeSample &DisjointRanges,
+ const RangeSample &Ranges);
+ // Helper function for updating body sample for a leaf location in
+ // FunctionProfile
+ void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
+ const SampleContextFrame &LeafLoc,
+ uint64_t Count);
+ void updateTotalSamples();
+
+ StringRef getCalleeNameForOffset(uint64_t TargetOffset);
+
+ void computeSummaryAndThreshold();
+
+ void calculateAndShowDensity(const SampleProfileMap &Profiles);
+
+ double calculateDensity(const SampleProfileMap &Profiles,
+ uint64_t HotCntThreshold);
+
+ void showDensitySuggestion(double Density);
+
+ // Thresholds from profile summary to answer isHotCount/isColdCount queries.
+ uint64_t HotCountThreshold;
+
+ uint64_t ColdCountThreshold;
+
+ // Used by SampleProfileWriter
+ SampleProfileMap ProfileMap;
+
+ ProfiledBinary *Binary = nullptr;
+
+ const ContextSampleCounterMap &SampleCounters;
+};
+
+class ProfileGenerator : public ProfileGeneratorBase {
+
+public:
+ ProfileGenerator(ProfiledBinary *Binary,
+ const ContextSampleCounterMap &Counters)
+ : ProfileGeneratorBase(Binary, Counters){};
+ void generateProfile() override;
+
+private:
+ void generateLineNumBasedProfile();
+ RangeSample preprocessRangeCounter(const RangeSample &RangeCounter);
+ FunctionSamples &getTopLevelFunctionProfile(StringRef FuncName);
+ // Helper function to get the leaf frame's FunctionProfile by traversing the
+ // inline stack and meanwhile it adds the total samples for each frame's
+ // function profile.
+ FunctionSamples &
+ getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec,
+ uint64_t Count);
+ void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter);
+ void
+ populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
+ void postProcessProfiles();
+ void trimColdProfiles(const SampleProfileMap &Profiles,
+ uint64_t ColdCntThreshold);
+};
+
+using ProbeCounterMap =
+ std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>;
+
+class CSProfileGenerator : public ProfileGeneratorBase {
+public:
+ CSProfileGenerator(ProfiledBinary *Binary,
+ const ContextSampleCounterMap &Counters)
+ : ProfileGeneratorBase(Binary, Counters){};
+
+ void generateProfile() override;
+
+ // Trim the context stack at a given depth.
+ template <typename T>
+ static void trimContext(SmallVectorImpl<T> &S, int Depth = MaxContextDepth) {
+ if (Depth < 0 || static_cast<size_t>(Depth) >= S.size())
+ return;
+ std::copy(S.begin() + S.size() - static_cast<size_t>(Depth), S.end(),
+ S.begin());
+ S.resize(Depth);
+ }
+
+ // Remove adjacent repeated context sequences up to a given sequence length,
+ // -1 means no size limit. Note that repeated sequences are identified based
+ // on the exact call site, this is finer granularity than function recursion.
+ template <typename T>
+ static void compressRecursionContext(SmallVectorImpl<T> &Context,
+ int32_t CSize = MaxCompressionSize) {
+ uint32_t I = 1;
+ uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
+ uint32_t MaxDedupSize =
+ CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS);
+ auto BeginIter = Context.begin();
+ // Use an in-place algorithm to save memory copy
+ // End indicates the end location of current iteration's data
+ uint32_t End = 0;
+ // Deduplicate from length 1 to the max possible size of a repeated
+ // sequence.
+ while (I <= MaxDedupSize) {
+ // This is a linear algorithm that deduplicates adjacent repeated
+ // sequences of size I. The deduplication detection runs on a sliding
+ // window whose size is 2*I and it keeps sliding the window to deduplicate
+ // the data inside. Once duplication is detected, deduplicate it by
+ // skipping the right half part of the window, otherwise just copy back
+ // the new one by appending them at the back of End pointer(for the next
+ // iteration).
+ //
+ // For example:
+ // Input: [a1, a2, b1, b2]
+ // (Added index to distinguish the same char, the origin is [a, a, b,
+ // b], the size of the dedup window is 2(I = 1) at the beginning)
+ //
+ // 1) The initial status is a dummy window[null, a1], then just copy the
+ // right half of the window(End = 0), then slide the window.
+ // Result: [a1], a2, b1, b2 (End points to the element right before ],
+ // after ] is the data of the previous iteration)
+ //
+ // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
+ // the window i.e the duplication happen. Only slide the window.
+ // Result: [a1], a2, b1, b2
+ //
+ // 3) Next window is [a2, b1], copy the right half of the window(b1 is
+ // new) to the End and slide the window.
+ // Result: [a1, b1], b1, b2
+ //
+ // 4) Next window is [b1, b2], same to 2), skip b2.
+ // Result: [a1, b1], b1, b2
+ // After resize, it will be [a, b]
+
+ // Use pointers like below to do comparison inside the window
+ // [a b c a b c]
+ // | | | | |
+ // LeftBoundary Left Right Left+I Right+I
+ // A duplication found if Left < LeftBoundry.
+
+ int32_t Right = I - 1;
+ End = I;
+ int32_t LeftBoundary = 0;
+ while (Right + I < Context.size()) {
+ // To avoids scanning a part of a sequence repeatedly, it finds out
+ // the common suffix of two hald in the window. The common suffix will
+ // serve as the common prefix of next possible pair of duplicate
+ // sequences. The non-common part will be ignored and never scanned
+ // again.
+
+ // For example.
+ // Input: [a, b1], c1, b2, c2
+ // I = 2
+ //
+ // 1) For the window [a, b1, c1, b2], non-common-suffix for the right
+ // part is 'c1', copy it and only slide the window 1 step.
+ // Result: [a, b1, c1], b2, c2
+ //
+ // 2) Next window is [b1, c1, b2, c2], so duplication happen.
+ // Result after resize: [a, b, c]
+
+ int32_t Left = Right;
+ while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
+ // Find the longest suffix inside the window. When stops, Left points
+ // at the diverging point in the current sequence.
+ Left--;
+ }
+
+ bool DuplicationFound = (Left < LeftBoundary);
+ // Don't need to recheck the data before Right
+ LeftBoundary = Right + 1;
+ if (DuplicationFound) {
+ // Duplication found, skip right half of the window.
+ Right += I;
+ } else {
+ // Copy the non-common-suffix part of the adjacent sequence.
+ std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
+ BeginIter + End);
+ End += Left + I - Right;
+ // Only slide the window by the size of non-common-suffix
+ Right = Left + I;
+ }
+ }
+ // Don't forget the remaining part that's not scanned.
+ std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
+ End += Context.size() - Right - 1;
+ I++;
+ Context.resize(End);
+ MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize);
+ }
+ }
+
+private:
+ void generateLineNumBasedProfile();
+ // Lookup or create FunctionSamples for the context
+ FunctionSamples &
+ getFunctionProfileForContext(const SampleContextFrameVector &Context,
+ bool WasLeafInlined = false);
+ // For profiled only functions, on-demand compute their inline context
+ // function byte size which is used by the pre-inliner.
+ void computeSizeForProfiledFunctions();
+ // Post processing for profiles before writing out, such as mermining
+ // and trimming cold profiles, running preinliner on profiles.
+ void postProcessProfiles();
+
+ void populateBodySamplesForFunction(FunctionSamples &FunctionProfile,
+ const RangeSample &RangeCounters);
+ void populateBoundarySamplesForFunction(SampleContextFrames ContextId,
+ FunctionSamples &FunctionProfile,
+ const BranchSample &BranchCounters);
+ void populateInferredFunctionSamples();
+
+ void generateProbeBasedProfile();
+ // Go through each address from range to extract the top frame probe by
+ // looking up in the Address2ProbeMap
+ void extractProbesFromRange(const RangeSample &RangeCounter,
+ ProbeCounterMap &ProbeCounter);
+ // Fill in function body samples from probes
+ void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
+ SampleContextFrames ContextStack);
+ // Fill in boundary samples for a call probe
+ void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
+ SampleContextFrames ContextStack);
+ // Helper function to get FunctionSamples for the leaf probe
+ FunctionSamples &
+ getFunctionProfileForLeafProbe(SampleContextFrames ContextStack,
+ const MCDecodedPseudoProbe *LeafProbe);
+
+ // Underlying context table serves for sample profile writer.
+ std::unordered_set<SampleContextFrameVector, SampleContextFrameHash> Contexts;
+
+public:
+ // Deduplicate adjacent repeated context sequences up to a given sequence
+ // length. -1 means no size limit.
+ static int32_t MaxCompressionSize;
+ static int MaxContextDepth;
+};
+
+} // end namespace sampleprof
+} // end namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm14/tools/llvm-profgen/ProfiledBinary.cpp b/contrib/libs/llvm14/tools/llvm-profgen/ProfiledBinary.cpp
new file mode 100644
index 0000000000..a773a3c98d
--- /dev/null
+++ b/contrib/libs/llvm14/tools/llvm-profgen/ProfiledBinary.cpp
@@ -0,0 +1,790 @@
+//===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ProfiledBinary.h"
+#include "ErrorHandling.h"
+#include "ProfileGenerator.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/TargetSelect.h"
+
+#define DEBUG_TYPE "load-binary"
+
+using namespace llvm;
+using namespace sampleprof;
+
+cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only", cl::init(false),
+ cl::ZeroOrMore,
+ cl::desc("Print disassembled code."));
+
+cl::opt<bool> ShowSourceLocations("show-source-locations", cl::init(false),
+ cl::ZeroOrMore,
+ cl::desc("Print source locations."));
+
+static cl::opt<bool>
+ ShowCanonicalFnName("show-canonical-fname", cl::init(false), cl::ZeroOrMore,
+ cl::desc("Print canonical function name."));
+
+static cl::opt<bool> ShowPseudoProbe(
+ "show-pseudo-probe", cl::init(false), cl::ZeroOrMore,
+ cl::desc("Print pseudo probe section and disassembled info."));
+
+static cl::opt<bool> UseDwarfCorrelation(
+ "use-dwarf-correlation", cl::init(false), cl::ZeroOrMore,
+ cl::desc("Use dwarf for profile correlation even when binary contains "
+ "pseudo probe."));
+
+static cl::list<std::string> DisassembleFunctions(
+ "disassemble-functions", cl::CommaSeparated,
+ cl::desc("List of functions to print disassembly for. Accept demangled "
+ "names only. Only work with show-disassembly-only"));
+
+extern cl::opt<bool> ShowDetailedWarning;
+
+namespace llvm {
+namespace sampleprof {
+
+static const Target *getTarget(const ObjectFile *Obj) {
+ Triple TheTriple = Obj->makeTriple();
+ std::string Error;
+ std::string ArchName;
+ const Target *TheTarget =
+ TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
+ if (!TheTarget)
+ exitWithError(Error, Obj->getFileName());
+ return TheTarget;
+}
+
+void BinarySizeContextTracker::addInstructionForContext(
+ const SampleContextFrameVector &Context, uint32_t InstrSize) {
+ ContextTrieNode *CurNode = &RootContext;
+ bool IsLeaf = true;
+ for (const auto &Callsite : reverse(Context)) {
+ StringRef CallerName = Callsite.FuncName;
+ LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Location;
+ CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName);
+ IsLeaf = false;
+ }
+
+ CurNode->addFunctionSize(InstrSize);
+}
+
+uint32_t
+BinarySizeContextTracker::getFuncSizeForContext(const SampleContext &Context) {
+ ContextTrieNode *CurrNode = &RootContext;
+ ContextTrieNode *PrevNode = nullptr;
+ SampleContextFrames Frames = Context.getContextFrames();
+ int32_t I = Frames.size() - 1;
+ Optional<uint32_t> Size;
+
+ // Start from top-level context-less function, traverse down the reverse
+ // context trie to find the best/longest match for given context, then
+ // retrieve the size.
+
+ while (CurrNode && I >= 0) {
+ // Process from leaf function to callers (added to context).
+ const auto &ChildFrame = Frames[I--];
+ PrevNode = CurrNode;
+ CurrNode =
+ CurrNode->getChildContext(ChildFrame.Location, ChildFrame.FuncName);
+ if (CurrNode && CurrNode->getFunctionSize().hasValue())
+ Size = CurrNode->getFunctionSize().getValue();
+ }
+
+ // If we traversed all nodes along the path of the context and haven't
+ // found a size yet, pivot to look for size from sibling nodes, i.e size
+ // of inlinee under different context.
+ if (!Size.hasValue()) {
+ if (!CurrNode)
+ CurrNode = PrevNode;
+ while (!Size.hasValue() && CurrNode &&
+ !CurrNode->getAllChildContext().empty()) {
+ CurrNode = &CurrNode->getAllChildContext().begin()->second;
+ if (CurrNode->getFunctionSize().hasValue())
+ Size = CurrNode->getFunctionSize().getValue();
+ }
+ }
+
+ assert(Size.hasValue() && "We should at least find one context size.");
+ return Size.getValue();
+}
+
+void BinarySizeContextTracker::trackInlineesOptimizedAway(
+ MCPseudoProbeDecoder &ProbeDecoder) {
+ ProbeFrameStack ProbeContext;
+ for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
+ trackInlineesOptimizedAway(ProbeDecoder, *Child.second.get(), ProbeContext);
+}
+
+void BinarySizeContextTracker::trackInlineesOptimizedAway(
+ MCPseudoProbeDecoder &ProbeDecoder,
+ MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &ProbeContext) {
+ StringRef FuncName =
+ ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName;
+ ProbeContext.emplace_back(FuncName, 0);
+
+ // This ProbeContext has a probe, so it has code before inlining and
+ // optimization. Make sure we mark its size as known.
+ if (!ProbeNode.getProbes().empty()) {
+ ContextTrieNode *SizeContext = &RootContext;
+ for (auto &ProbeFrame : reverse(ProbeContext)) {
+ StringRef CallerName = ProbeFrame.first;
+ LineLocation CallsiteLoc(ProbeFrame.second, 0);
+ SizeContext =
+ SizeContext->getOrCreateChildContext(CallsiteLoc, CallerName);
+ }
+ // Add 0 size to make known.
+ SizeContext->addFunctionSize(0);
+ }
+
+ // DFS down the probe inline tree
+ for (const auto &ChildNode : ProbeNode.getChildren()) {
+ InlineSite Location = ChildNode.first;
+ ProbeContext.back().second = std::get<1>(Location);
+ trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(), ProbeContext);
+ }
+
+ ProbeContext.pop_back();
+}
+
+void ProfiledBinary::warnNoFuncEntry() {
+ uint64_t NoFuncEntryNum = 0;
+ for (auto &F : BinaryFunctions) {
+ if (F.second.Ranges.empty())
+ continue;
+ bool hasFuncEntry = false;
+ for (auto &R : F.second.Ranges) {
+ if (FuncRange *FR = findFuncRangeForStartOffset(R.first)) {
+ if (FR->IsFuncEntry) {
+ hasFuncEntry = true;
+ break;
+ }
+ }
+ }
+
+ if (!hasFuncEntry) {
+ NoFuncEntryNum++;
+ if (ShowDetailedWarning)
+ WithColor::warning()
+ << "Failed to determine function entry for " << F.first
+ << " due to inconsistent name from symbol table and dwarf info.\n";
+ }
+ }
+ emitWarningSummary(NoFuncEntryNum, BinaryFunctions.size(),
+ "of functions failed to determine function entry due to "
+ "inconsistent name from symbol table and dwarf info.");
+}
+
+void ProfiledBinary::load() {
+ // Attempt to open the binary.
+ OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
+ Binary &ExeBinary = *OBinary.getBinary();
+
+ auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
+ if (!Obj)
+ exitWithError("not a valid Elf image", Path);
+
+ TheTriple = Obj->makeTriple();
+ // Current only support X86
+ if (!TheTriple.isX86())
+ exitWithError("unsupported target", TheTriple.getTriple());
+ LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
+
+ // Find the preferred load address for text sections.
+ setPreferredTextSegmentAddresses(Obj);
+
+ // Decode pseudo probe related section
+ decodePseudoProbe(Obj);
+
+ // Load debug info of subprograms from DWARF section.
+ // If path of debug info binary is specified, use the debug info from it,
+ // otherwise use the debug info from the executable binary.
+ if (!DebugBinaryPath.empty()) {
+ OwningBinary<Binary> DebugPath =
+ unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath);
+ loadSymbolsFromDWARF(*dyn_cast<ObjectFile>(DebugPath.getBinary()));
+ } else {
+ loadSymbolsFromDWARF(*dyn_cast<ObjectFile>(&ExeBinary));
+ }
+
+ // Disassemble the text sections.
+ disassemble(Obj);
+
+ // Track size for optimized inlinees when probe is available
+ if (UsePseudoProbes && TrackFuncContextSize)
+ FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder);
+
+ // Use function start and return address to infer prolog and epilog
+ ProEpilogTracker.inferPrologOffsets(StartOffset2FuncRangeMap);
+ ProEpilogTracker.inferEpilogOffsets(RetOffsets);
+
+ warnNoFuncEntry();
+
+ // TODO: decode other sections.
+}
+
+bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
+ uint64_t Offset1 = virtualAddrToOffset(Address1);
+ uint64_t Offset2 = virtualAddrToOffset(Address2);
+ const SampleContextFrameVector &Context1 = getFrameLocationStack(Offset1);
+ const SampleContextFrameVector &Context2 = getFrameLocationStack(Offset2);
+ if (Context1.size() != Context2.size())
+ return false;
+ if (Context1.empty())
+ return false;
+ // The leaf frame contains location within the leaf, and it
+ // needs to be remove that as it's not part of the calling context
+ return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
+ Context2.begin(), Context2.begin() + Context2.size() - 1);
+}
+
+SampleContextFrameVector
+ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
+ bool &WasLeafInlined) {
+ SampleContextFrameVector ContextVec;
+ // Process from frame root to leaf
+ for (auto Address : Stack) {
+ uint64_t Offset = virtualAddrToOffset(Address);
+ const SampleContextFrameVector &ExpandedContext =
+ getFrameLocationStack(Offset);
+ // An instruction without a valid debug line will be ignored by sample
+ // processing
+ if (ExpandedContext.empty())
+ return SampleContextFrameVector();
+ // Set WasLeafInlined to the size of inlined frame count for the last
+ // address which is leaf
+ WasLeafInlined = (ExpandedContext.size() > 1);
+ ContextVec.append(ExpandedContext);
+ }
+
+ // Replace with decoded base discriminator
+ for (auto &Frame : ContextVec) {
+ Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator(
+ Frame.Location.Discriminator, UseFSDiscriminator);
+ }
+
+ assert(ContextVec.size() && "Context length should be at least 1");
+
+ // Compress the context string except for the leaf frame
+ auto LeafFrame = ContextVec.back();
+ LeafFrame.Location = LineLocation(0, 0);
+ ContextVec.pop_back();
+ CSProfileGenerator::compressRecursionContext(ContextVec);
+ CSProfileGenerator::trimContext(ContextVec);
+ ContextVec.push_back(LeafFrame);
+ return ContextVec;
+}
+
+template <class ELFT>
+void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName) {
+ const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
+ // FIXME: This should be the page size of the system running profiling.
+ // However such info isn't available at post-processing time, assuming
+ // 4K page now. Note that we don't use EXEC_PAGESIZE from <linux/param.h>
+ // because we may build the tools on non-linux.
+ uint32_t PageSize = 0x1000;
+ for (const typename ELFT::Phdr &Phdr : PhdrRange) {
+ if (Phdr.p_type == ELF::PT_LOAD) {
+ if (!FirstLoadableAddress)
+ FirstLoadableAddress = Phdr.p_vaddr & ~(PageSize - 1U);
+ if (Phdr.p_flags & ELF::PF_X) {
+ // Segments will always be loaded at a page boundary.
+ PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr &
+ ~(PageSize - 1U));
+ TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U));
+ }
+ }
+ }
+
+ if (PreferredTextSegmentAddresses.empty())
+ exitWithError("no executable segment found", FileName);
+}
+
+void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFObjectFileBase *Obj) {
+ if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
+ setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
+ else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
+ setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
+ else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
+ setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
+ else if (const auto *ELFObj = cast<ELF64BEObjectFile>(Obj))
+ setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
+ else
+ llvm_unreachable("invalid ELF object format");
+}
+
+void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
+ if (UseDwarfCorrelation)
+ return;
+
+ StringRef FileName = Obj->getFileName();
+ for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
+ SI != SE; ++SI) {
+ const SectionRef &Section = *SI;
+ StringRef SectionName = unwrapOrError(Section.getName(), FileName);
+
+ if (SectionName == ".pseudo_probe_desc") {
+ StringRef Contents = unwrapOrError(Section.getContents(), FileName);
+ if (!ProbeDecoder.buildGUID2FuncDescMap(
+ reinterpret_cast<const uint8_t *>(Contents.data()),
+ Contents.size()))
+ exitWithError("Pseudo Probe decoder fail in .pseudo_probe_desc section");
+ } else if (SectionName == ".pseudo_probe") {
+ StringRef Contents = unwrapOrError(Section.getContents(), FileName);
+ if (!ProbeDecoder.buildAddress2ProbeMap(
+ reinterpret_cast<const uint8_t *>(Contents.data()),
+ Contents.size()))
+ exitWithError("Pseudo Probe decoder fail in .pseudo_probe section");
+ // set UsePseudoProbes flag, used for PerfReader
+ UsePseudoProbes = true;
+ }
+ }
+
+ if (ShowPseudoProbe)
+ ProbeDecoder.printGUID2FuncDescMap(outs());
+}
+
+void ProfiledBinary::setIsFuncEntry(uint64_t Offset, StringRef RangeSymName) {
+ // Note that the start offset of each ELF section can be a non-function
+ // symbol, we need to binary search for the start of a real function range.
+ auto *FuncRange = findFuncRangeForOffset(Offset);
+ // Skip external function symbol.
+ if (!FuncRange)
+ return;
+
+ // Set IsFuncEntry to ture if there is only one range in the function or the
+ // RangeSymName from ELF is equal to its DWARF-based function name.
+ if (FuncRange->Func->Ranges.size() == 1 ||
+ (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName))
+ FuncRange->IsFuncEntry = true;
+}
+
+bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
+ SectionSymbolsTy &Symbols,
+ const SectionRef &Section) {
+ std::size_t SE = Symbols.size();
+ uint64_t SectionOffset = Section.getAddress() - getPreferredBaseAddress();
+ uint64_t SectSize = Section.getSize();
+ uint64_t StartOffset = Symbols[SI].Addr - getPreferredBaseAddress();
+ uint64_t NextStartOffset =
+ (SI + 1 < SE) ? Symbols[SI + 1].Addr - getPreferredBaseAddress()
+ : SectionOffset + SectSize;
+ setIsFuncEntry(StartOffset,
+ FunctionSamples::getCanonicalFnName(Symbols[SI].Name));
+
+ StringRef SymbolName =
+ ShowCanonicalFnName
+ ? FunctionSamples::getCanonicalFnName(Symbols[SI].Name)
+ : Symbols[SI].Name;
+ bool ShowDisassembly =
+ ShowDisassemblyOnly && (DisassembleFunctionSet.empty() ||
+ DisassembleFunctionSet.count(SymbolName));
+ if (ShowDisassembly)
+ outs() << '<' << SymbolName << ">:\n";
+
+ auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) {
+ WithColor::warning() << "Invalid instructions at "
+ << format("%8" PRIx64, Start) << " - "
+ << format("%8" PRIx64, End) << "\n";
+ };
+
+ uint64_t Offset = StartOffset;
+ // Size of a consecutive invalid instruction range starting from Offset -1
+ // backwards.
+ uint64_t InvalidInstLength = 0;
+ while (Offset < NextStartOffset) {
+ MCInst Inst;
+ uint64_t Size;
+ // Disassemble an instruction.
+ bool Disassembled =
+ DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset),
+ Offset + getPreferredBaseAddress(), nulls());
+ if (Size == 0)
+ Size = 1;
+
+ if (ShowDisassembly) {
+ if (ShowPseudoProbe) {
+ ProbeDecoder.printProbeForAddress(outs(),
+ Offset + getPreferredBaseAddress());
+ }
+ outs() << format("%8" PRIx64 ":", Offset + getPreferredBaseAddress());
+ size_t Start = outs().tell();
+ if (Disassembled)
+ IPrinter->printInst(&Inst, Offset + Size, "", *STI.get(), outs());
+ else
+ outs() << "\t<unknown>";
+ if (ShowSourceLocations) {
+ unsigned Cur = outs().tell() - Start;
+ if (Cur < 40)
+ outs().indent(40 - Cur);
+ InstructionPointer IP(this, Offset);
+ outs() << getReversedLocWithContext(
+ symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe));
+ }
+ outs() << "\n";
+ }
+
+ if (Disassembled) {
+ const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
+
+ // Record instruction size.
+ Offset2InstSizeMap[Offset] = Size;
+
+ // Populate address maps.
+ CodeAddrOffsets.push_back(Offset);
+ if (MCDesc.isCall())
+ CallOffsets.insert(Offset);
+ else if (MCDesc.isReturn())
+ RetOffsets.insert(Offset);
+ else if (MCDesc.isBranch())
+ BranchOffsets.insert(Offset);
+
+ if (InvalidInstLength) {
+ WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
+ InvalidInstLength = 0;
+ }
+ } else {
+ InvalidInstLength += Size;
+ }
+
+ Offset += Size;
+ }
+
+ if (InvalidInstLength)
+ WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
+
+ if (ShowDisassembly)
+ outs() << "\n";
+
+ return true;
+}
+
+void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
+ const Target *TheTarget = getTarget(Obj);
+ std::string TripleName = TheTriple.getTriple();
+ StringRef FileName = Obj->getFileName();
+
+ MRI.reset(TheTarget->createMCRegInfo(TripleName));
+ if (!MRI)
+ exitWithError("no register info for target " + TripleName, FileName);
+
+ MCTargetOptions MCOptions;
+ AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
+ if (!AsmInfo)
+ exitWithError("no assembly info for target " + TripleName, FileName);
+
+ SubtargetFeatures Features = Obj->getFeatures();
+ STI.reset(
+ TheTarget->createMCSubtargetInfo(TripleName, "", Features.getString()));
+ if (!STI)
+ exitWithError("no subtarget info for target " + TripleName, FileName);
+
+ MII.reset(TheTarget->createMCInstrInfo());
+ if (!MII)
+ exitWithError("no instruction info for target " + TripleName, FileName);
+
+ MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
+ std::unique_ptr<MCObjectFileInfo> MOFI(
+ TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
+ Ctx.setObjectFileInfo(MOFI.get());
+ DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
+ if (!DisAsm)
+ exitWithError("no disassembler for target " + TripleName, FileName);
+
+ MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
+
+ int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
+ IPrinter.reset(TheTarget->createMCInstPrinter(
+ Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
+ IPrinter->setPrintBranchImmAsAddress(true);
+}
+
+void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
+ // Set up disassembler and related components.
+ setUpDisassembler(Obj);
+
+ // Create a mapping from virtual address to symbol name. The symbols in text
+ // sections are the candidates to dissassemble.
+ std::map<SectionRef, SectionSymbolsTy> AllSymbols;
+ StringRef FileName = Obj->getFileName();
+ for (const SymbolRef &Symbol : Obj->symbols()) {
+ const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
+ const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
+ section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
+ if (SecI != Obj->section_end())
+ AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
+ }
+
+ // Sort all the symbols. Use a stable sort to stabilize the output.
+ for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
+ stable_sort(SecSyms.second);
+
+ DisassembleFunctionSet.insert(DisassembleFunctions.begin(),
+ DisassembleFunctions.end());
+ assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) &&
+ "Functions to disassemble should be only specified together with "
+ "--show-disassembly-only");
+
+ if (ShowDisassemblyOnly)
+ outs() << "\nDisassembly of " << FileName << ":\n";
+
+ // Dissassemble a text section.
+ for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
+ SI != SE; ++SI) {
+ const SectionRef &Section = *SI;
+ if (!Section.isText())
+ continue;
+
+ uint64_t ImageLoadAddr = getPreferredBaseAddress();
+ uint64_t SectionOffset = Section.getAddress() - ImageLoadAddr;
+ uint64_t SectSize = Section.getSize();
+ if (!SectSize)
+ continue;
+
+ // Register the text section.
+ TextSections.insert({SectionOffset, SectSize});
+
+ StringRef SectionName = unwrapOrError(Section.getName(), FileName);
+
+ if (ShowDisassemblyOnly) {
+ outs() << "\nDisassembly of section " << SectionName;
+ outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", "
+ << format("0x%" PRIx64, Section.getAddress() + SectSize)
+ << "]:\n\n";
+ }
+
+ if (SectionName == ".plt")
+ continue;
+
+ // Get the section data.
+ ArrayRef<uint8_t> Bytes =
+ arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
+
+ // Get the list of all the symbols in this section.
+ SectionSymbolsTy &Symbols = AllSymbols[Section];
+
+ // Disassemble symbol by symbol.
+ for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
+ if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
+ exitWithError("disassembling error", FileName);
+ }
+ }
+
+ // Dissassemble rodata section to check if FS discriminator symbol exists.
+ checkUseFSDiscriminator(Obj, AllSymbols);
+}
+
+void ProfiledBinary::checkUseFSDiscriminator(
+ const ELFObjectFileBase *Obj,
+ std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
+ const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
+ for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
+ SI != SE; ++SI) {
+ const SectionRef &Section = *SI;
+ if (!Section.isData() || Section.getSize() == 0)
+ continue;
+ SectionSymbolsTy &Symbols = AllSymbols[Section];
+
+ for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
+ if (Symbols[SI].Name == FSDiscriminatorVar) {
+ UseFSDiscriminator = true;
+ return;
+ }
+ }
+ }
+}
+
+void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
+ auto DebugContext = llvm::DWARFContext::create(Obj);
+ if (!DebugContext)
+ exitWithError("Misssing debug info.", Path);
+
+ for (const auto &CompilationUnit : DebugContext->compile_units()) {
+ for (const auto &DieInfo : CompilationUnit->dies()) {
+ llvm::DWARFDie Die(CompilationUnit.get(), &DieInfo);
+
+ if (!Die.isSubprogramDIE())
+ continue;
+ auto Name = Die.getName(llvm::DINameKind::LinkageName);
+ if (!Name)
+ Name = Die.getName(llvm::DINameKind::ShortName);
+ if (!Name)
+ continue;
+
+ auto RangesOrError = Die.getAddressRanges();
+ if (!RangesOrError)
+ continue;
+ const DWARFAddressRangesVector &Ranges = RangesOrError.get();
+
+ if (Ranges.empty())
+ continue;
+
+ // Different DWARF symbols can have same function name, search or create
+ // BinaryFunction indexed by the name.
+ auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
+ auto &Func = Ret.first->second;
+ if (Ret.second)
+ Func.FuncName = Ret.first->first;
+
+ for (const auto &Range : Ranges) {
+ uint64_t FuncStart = Range.LowPC;
+ uint64_t FuncSize = Range.HighPC - FuncStart;
+
+ if (FuncSize == 0 || FuncStart < getPreferredBaseAddress())
+ continue;
+
+ uint64_t StartOffset = FuncStart - getPreferredBaseAddress();
+ uint64_t EndOffset = Range.HighPC - getPreferredBaseAddress();
+
+ // We may want to know all ranges for one function. Here group the
+ // ranges and store them into BinaryFunction.
+ Func.Ranges.emplace_back(StartOffset, EndOffset);
+
+ auto R = StartOffset2FuncRangeMap.emplace(StartOffset, FuncRange());
+ if (R.second) {
+ FuncRange &FRange = R.first->second;
+ FRange.Func = &Func;
+ FRange.StartOffset = StartOffset;
+ FRange.EndOffset = EndOffset;
+ } else {
+ WithColor::warning()
+ << "Duplicated symbol start address at "
+ << format("%8" PRIx64, StartOffset + getPreferredBaseAddress())
+ << " " << R.first->second.getFuncName() << " and " << Name
+ << "\n";
+ }
+ }
+ }
+ }
+ assert(!StartOffset2FuncRangeMap.empty() && "Misssing debug info.");
+}
+
+void ProfiledBinary::populateSymbolListFromDWARF(
+ ProfileSymbolList &SymbolList) {
+ for (auto &I : StartOffset2FuncRangeMap)
+ SymbolList.add(I.second.getFuncName());
+}
+
+void ProfiledBinary::setupSymbolizer() {
+ symbolize::LLVMSymbolizer::Options SymbolizerOpts;
+ SymbolizerOpts.PrintFunctions =
+ DILineInfoSpecifier::FunctionNameKind::LinkageName;
+ SymbolizerOpts.Demangle = false;
+ SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
+ SymbolizerOpts.UseSymbolTable = false;
+ SymbolizerOpts.RelativeAddresses = false;
+ Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts);
+}
+
+SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
+ bool UseCanonicalFnName,
+ bool UseProbeDiscriminator) {
+ assert(this == IP.Binary &&
+ "Binary should only symbolize its own instruction");
+ auto Addr = object::SectionedAddress{IP.Offset + getPreferredBaseAddress(),
+ object::SectionedAddress::UndefSection};
+ DIInliningInfo InlineStack = unwrapOrError(
+ Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr),
+ SymbolizerPath);
+
+ SampleContextFrameVector CallStack;
+ for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
+ const auto &CallerFrame = InlineStack.getFrame(I);
+ if (CallerFrame.FunctionName == "<invalid>")
+ break;
+
+ StringRef FunctionName(CallerFrame.FunctionName);
+ if (UseCanonicalFnName)
+ FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
+
+ uint32_t Discriminator = CallerFrame.Discriminator;
+ uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff;
+ if (UseProbeDiscriminator) {
+ LineOffset =
+ PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
+ Discriminator = 0;
+ }
+
+ LineLocation Line(LineOffset, Discriminator);
+ auto It = NameStrings.insert(FunctionName.str());
+ CallStack.emplace_back(*It.first, Line);
+ }
+
+ return CallStack;
+}
+
+void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset,
+ uint64_t EndOffset) {
+ uint64_t RangeBegin = offsetToVirtualAddr(StartOffset);
+ uint64_t RangeEnd = offsetToVirtualAddr(EndOffset);
+ InstructionPointer IP(this, RangeBegin, true);
+
+ if (IP.Address != RangeBegin)
+ WithColor::warning() << "Invalid start instruction at "
+ << format("%8" PRIx64, RangeBegin) << "\n";
+
+ if (IP.Address >= RangeEnd)
+ return;
+
+ do {
+ uint64_t Offset = virtualAddrToOffset(IP.Address);
+ const SampleContextFrameVector &SymbolizedCallStack =
+ getFrameLocationStack(Offset, UsePseudoProbes);
+ uint64_t Size = Offset2InstSizeMap[Offset];
+
+ // Record instruction size for the corresponding context
+ FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
+
+ } while (IP.advance() && IP.Address < RangeEnd);
+}
+
+InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
+ uint64_t Address, bool RoundToNext)
+ : Binary(Binary), Address(Address) {
+ Index = Binary->getIndexForAddr(Address);
+ if (RoundToNext) {
+ // we might get address which is not the code
+ // it should round to the next valid address
+ if (Index >= Binary->getCodeOffsetsSize())
+ this->Address = UINT64_MAX;
+ else
+ this->Address = Binary->getAddressforIndex(Index);
+ }
+}
+
+bool InstructionPointer::advance() {
+ Index++;
+ if (Index >= Binary->getCodeOffsetsSize()) {
+ Address = UINT64_MAX;
+ return false;
+ }
+ Address = Binary->getAddressforIndex(Index);
+ return true;
+}
+
+bool InstructionPointer::backward() {
+ if (Index == 0) {
+ Address = 0;
+ return false;
+ }
+ Index--;
+ Address = Binary->getAddressforIndex(Index);
+ return true;
+}
+
+void InstructionPointer::update(uint64_t Addr) {
+ Address = Addr;
+ Index = Binary->getIndexForAddr(Address);
+}
+
+} // end namespace sampleprof
+} // end namespace llvm
diff --git a/contrib/libs/llvm14/tools/llvm-profgen/ProfiledBinary.h b/contrib/libs/llvm14/tools/llvm-profgen/ProfiledBinary.h
new file mode 100644
index 0000000000..d3d1c6f1fd
--- /dev/null
+++ b/contrib/libs/llvm14/tools/llvm-profgen/ProfiledBinary.h
@@ -0,0 +1,541 @@
+//===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
+#define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
+
+#include "CallContext.h"
+#include "ErrorHandling.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCPseudoProbe.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Transforms/IPO/SampleContextTracker.h"
+#include <list>
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+extern cl::opt<bool> EnableCSPreInliner;
+extern cl::opt<bool> UseContextCostForPreInliner;
+
+using namespace llvm;
+using namespace sampleprof;
+using namespace llvm::object;
+
+namespace llvm {
+namespace sampleprof {
+
+class ProfiledBinary;
+
+struct InstructionPointer {
+ const ProfiledBinary *Binary;
+ union {
+ // Offset of the executable segment of the binary.
+ uint64_t Offset = 0;
+ // Also used as address in unwinder
+ uint64_t Address;
+ };
+ // Index to the sorted code address array of the binary.
+ uint64_t Index = 0;
+ InstructionPointer(const ProfiledBinary *Binary, uint64_t Address,
+ bool RoundToNext = false);
+ bool advance();
+ bool backward();
+ void update(uint64_t Addr);
+};
+
+// The special frame addresses.
+enum SpecialFrameAddr {
+ // Dummy root of frame trie.
+ DummyRoot = 0,
+ // Represent all the addresses outside of current binary.
+ // This's also used to indicate the call stack should be truncated since this
+ // isn't a real call context the compiler will see.
+ ExternalAddr = 1,
+};
+
+using RangesTy = std::vector<std::pair<uint64_t, uint64_t>>;
+
+struct BinaryFunction {
+ StringRef FuncName;
+ // End of range is an exclusive bound.
+ RangesTy Ranges;
+
+ uint64_t getFuncSize() {
+ uint64_t Sum = 0;
+ for (auto &R : Ranges) {
+ Sum += R.second - R.first;
+ }
+ return Sum;
+ }
+};
+
+// Info about function range. A function can be split into multiple
+// non-continuous ranges, each range corresponds to one FuncRange.
+struct FuncRange {
+ uint64_t StartOffset;
+ // EndOffset is an exclusive bound.
+ uint64_t EndOffset;
+ // Function the range belongs to
+ BinaryFunction *Func;
+ // Whether the start offset is the real entry of the function.
+ bool IsFuncEntry = false;
+
+ StringRef getFuncName() { return Func->FuncName; }
+};
+
+// PrologEpilog offset tracker, used to filter out broken stack samples
+// Currently we use a heuristic size (two) to infer prolog and epilog
+// based on the start address and return address. In the future,
+// we will switch to Dwarf CFI based tracker
+struct PrologEpilogTracker {
+ // A set of prolog and epilog offsets. Used by virtual unwinding.
+ std::unordered_set<uint64_t> PrologEpilogSet;
+ ProfiledBinary *Binary;
+ PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){};
+
+ // Take the two addresses from the start of function as prolog
+ void inferPrologOffsets(std::map<uint64_t, FuncRange> &FuncStartOffsetMap) {
+ for (auto I : FuncStartOffsetMap) {
+ PrologEpilogSet.insert(I.first);
+ InstructionPointer IP(Binary, I.first);
+ if (!IP.advance())
+ break;
+ PrologEpilogSet.insert(IP.Offset);
+ }
+ }
+
+ // Take the last two addresses before the return address as epilog
+ void inferEpilogOffsets(std::unordered_set<uint64_t> &RetAddrs) {
+ for (auto Addr : RetAddrs) {
+ PrologEpilogSet.insert(Addr);
+ InstructionPointer IP(Binary, Addr);
+ if (!IP.backward())
+ break;
+ PrologEpilogSet.insert(IP.Offset);
+ }
+ }
+};
+
+// Track function byte size under different context (outlined version as well as
+// various inlined versions). It also provides query support to get function
+// size with the best matching context, which is used to help pre-inliner use
+// accurate post-optimization size to make decisions.
+// TODO: If an inlinee is completely optimized away, ideally we should have zero
+// for its context size, currently we would misss such context since it doesn't
+// have instructions. To fix this, we need to mark all inlinee with entry probe
+// but without instructions as having zero size.
+class BinarySizeContextTracker {
+public:
+ // Add instruction with given size to a context
+ void addInstructionForContext(const SampleContextFrameVector &Context,
+ uint32_t InstrSize);
+
+ // Get function size with a specific context. When there's no exact match
+ // for the given context, try to retrieve the size of that function from
+ // closest matching context.
+ uint32_t getFuncSizeForContext(const SampleContext &Context);
+
+ // For inlinees that are full optimized away, we can establish zero size using
+ // their remaining probes.
+ void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder);
+
+ void dump() { RootContext.dumpTree(); }
+
+private:
+ using ProbeFrameStack = SmallVector<std::pair<StringRef, uint32_t>>;
+ void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder,
+ MCDecodedPseudoProbeInlineTree &ProbeNode,
+ ProbeFrameStack &Context);
+
+ // Root node for context trie tree, node that this is a reverse context trie
+ // with callee as parent and caller as child. This way we can traverse from
+ // root to find the best/longest matching context if an exact match does not
+ // exist. It gives us the best possible estimate for function's post-inline,
+ // post-optimization byte size.
+ ContextTrieNode RootContext;
+};
+
+using OffsetRange = std::pair<uint64_t, uint64_t>;
+
+class ProfiledBinary {
+ // Absolute path of the executable binary.
+ std::string Path;
+ // Path of the debug info binary.
+ std::string DebugBinaryPath;
+ // Path of symbolizer path which should be pointed to binary with debug info.
+ StringRef SymbolizerPath;
+ // The target triple.
+ Triple TheTriple;
+ // The runtime base address that the first executable segment is loaded at.
+ uint64_t BaseAddress = 0;
+ // The runtime base address that the first loadabe segment is loaded at.
+ uint64_t FirstLoadableAddress = 0;
+ // The preferred load address of each executable segment.
+ std::vector<uint64_t> PreferredTextSegmentAddresses;
+ // The file offset of each executable segment.
+ std::vector<uint64_t> TextSegmentOffsets;
+
+ // Mutiple MC component info
+ std::unique_ptr<const MCRegisterInfo> MRI;
+ std::unique_ptr<const MCAsmInfo> AsmInfo;
+ std::unique_ptr<const MCSubtargetInfo> STI;
+ std::unique_ptr<const MCInstrInfo> MII;
+ std::unique_ptr<MCDisassembler> DisAsm;
+ std::unique_ptr<const MCInstrAnalysis> MIA;
+ std::unique_ptr<MCInstPrinter> IPrinter;
+ // A list of text sections sorted by start RVA and size. Used to check
+ // if a given RVA is a valid code address.
+ std::set<std::pair<uint64_t, uint64_t>> TextSections;
+
+ // A map of mapping function name to BinaryFunction info.
+ std::unordered_map<std::string, BinaryFunction> BinaryFunctions;
+
+ // An ordered map of mapping function's start offset to function range
+ // relevant info. Currently to determine if the offset of ELF is the start of
+ // a real function, we leverage the function range info from DWARF.
+ std::map<uint64_t, FuncRange> StartOffset2FuncRangeMap;
+
+ // Offset to context location map. Used to expand the context.
+ std::unordered_map<uint64_t, SampleContextFrameVector> Offset2LocStackMap;
+
+ // Offset to instruction size map. Also used for quick offset lookup.
+ std::unordered_map<uint64_t, uint64_t> Offset2InstSizeMap;
+
+ // An array of offsets of all instructions sorted in increasing order. The
+ // sorting is needed to fast advance to the next forward/backward instruction.
+ std::vector<uint64_t> CodeAddrOffsets;
+ // A set of call instruction offsets. Used by virtual unwinding.
+ std::unordered_set<uint64_t> CallOffsets;
+ // A set of return instruction offsets. Used by virtual unwinding.
+ std::unordered_set<uint64_t> RetOffsets;
+ // A set of branch instruction offsets.
+ std::unordered_set<uint64_t> BranchOffsets;
+
+ // Estimate and track function prolog and epilog ranges.
+ PrologEpilogTracker ProEpilogTracker;
+
+ // Track function sizes under different context
+ BinarySizeContextTracker FuncSizeTracker;
+
+ // The symbolizer used to get inline context for an instruction.
+ std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
+
+ // String table owning function name strings created from the symbolizer.
+ std::unordered_set<std::string> NameStrings;
+
+ // A collection of functions to print disassembly for.
+ StringSet<> DisassembleFunctionSet;
+
+ // Pseudo probe decoder
+ MCPseudoProbeDecoder ProbeDecoder;
+
+ bool UsePseudoProbes = false;
+
+ bool UseFSDiscriminator = false;
+
+ // Whether we need to symbolize all instructions to get function context size.
+ bool TrackFuncContextSize = false;
+
+ // Indicate if the base loading address is parsed from the mmap event or uses
+ // the preferred address
+ bool IsLoadedByMMap = false;
+ // Use to avoid redundant warning.
+ bool MissingMMapWarned = false;
+
+ void setPreferredTextSegmentAddresses(const ELFObjectFileBase *O);
+
+ template <class ELFT>
+ void setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName);
+
+ void decodePseudoProbe(const ELFObjectFileBase *Obj);
+
+ void
+ checkUseFSDiscriminator(const ELFObjectFileBase *Obj,
+ std::map<SectionRef, SectionSymbolsTy> &AllSymbols);
+
+ // Set up disassembler and related components.
+ void setUpDisassembler(const ELFObjectFileBase *Obj);
+ void setupSymbolizer();
+
+ // Load debug info of subprograms from DWARF section.
+ void loadSymbolsFromDWARF(ObjectFile &Obj);
+
+ // A function may be spilt into multiple non-continuous address ranges. We use
+ // this to set whether start offset of a function is the real entry of the
+ // function and also set false to the non-function label.
+ void setIsFuncEntry(uint64_t Offset, StringRef RangeSymName);
+
+ // Warn if no entry range exists in the function.
+ void warnNoFuncEntry();
+
+ /// Dissassemble the text section and build various address maps.
+ void disassemble(const ELFObjectFileBase *O);
+
+ /// Helper function to dissassemble the symbol and extract info for unwinding
+ bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
+ SectionSymbolsTy &Symbols, const SectionRef &Section);
+ /// Symbolize a given instruction pointer and return a full call context.
+ SampleContextFrameVector symbolize(const InstructionPointer &IP,
+ bool UseCanonicalFnName = false,
+ bool UseProbeDiscriminator = false);
+ /// Decode the interesting parts of the binary and build internal data
+ /// structures. On high level, the parts of interest are:
+ /// 1. Text sections, including the main code section and the PLT
+ /// entries that will be used to handle cross-module call transitions.
+ /// 2. The .debug_line section, used by Dwarf-based profile generation.
+ /// 3. Pseudo probe related sections, used by probe-based profile
+ /// generation.
+ void load();
+
+public:
+ ProfiledBinary(const StringRef ExeBinPath, const StringRef DebugBinPath)
+ : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this),
+ TrackFuncContextSize(EnableCSPreInliner &&
+ UseContextCostForPreInliner) {
+ // Point to executable binary if debug info binary is not specified.
+ SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath;
+ setupSymbolizer();
+ load();
+ }
+ uint64_t virtualAddrToOffset(uint64_t VirtualAddress) const {
+ return VirtualAddress - BaseAddress;
+ }
+ uint64_t offsetToVirtualAddr(uint64_t Offset) const {
+ return Offset + BaseAddress;
+ }
+ StringRef getPath() const { return Path; }
+ StringRef getName() const { return llvm::sys::path::filename(Path); }
+ uint64_t getBaseAddress() const { return BaseAddress; }
+ void setBaseAddress(uint64_t Address) { BaseAddress = Address; }
+
+ // Return the preferred load address for the first executable segment.
+ uint64_t getPreferredBaseAddress() const { return PreferredTextSegmentAddresses[0]; }
+ // Return the preferred load address for the first loadable segment.
+ uint64_t getFirstLoadableAddress() const { return FirstLoadableAddress; }
+ // Return the file offset for the first executable segment.
+ uint64_t getTextSegmentOffset() const { return TextSegmentOffsets[0]; }
+ const std::vector<uint64_t> &getPreferredTextSegmentAddresses() const {
+ return PreferredTextSegmentAddresses;
+ }
+ const std::vector<uint64_t> &getTextSegmentOffsets() const {
+ return TextSegmentOffsets;
+ }
+
+ uint64_t getInstSize(uint64_t Offset) const {
+ auto I = Offset2InstSizeMap.find(Offset);
+ if (I == Offset2InstSizeMap.end())
+ return 0;
+ return I->second;
+ }
+
+ bool offsetIsCode(uint64_t Offset) const {
+ return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end();
+ }
+ bool addressIsCode(uint64_t Address) const {
+ uint64_t Offset = virtualAddrToOffset(Address);
+ return offsetIsCode(Offset);
+ }
+ bool addressIsCall(uint64_t Address) const {
+ uint64_t Offset = virtualAddrToOffset(Address);
+ return CallOffsets.count(Offset);
+ }
+ bool addressIsReturn(uint64_t Address) const {
+ uint64_t Offset = virtualAddrToOffset(Address);
+ return RetOffsets.count(Offset);
+ }
+ bool addressInPrologEpilog(uint64_t Address) const {
+ uint64_t Offset = virtualAddrToOffset(Address);
+ return ProEpilogTracker.PrologEpilogSet.count(Offset);
+ }
+
+ bool offsetIsTransfer(uint64_t Offset) {
+ return BranchOffsets.count(Offset) || RetOffsets.count(Offset) ||
+ CallOffsets.count(Offset);
+ }
+
+ uint64_t getAddressforIndex(uint64_t Index) const {
+ return offsetToVirtualAddr(CodeAddrOffsets[Index]);
+ }
+
+ size_t getCodeOffsetsSize() const { return CodeAddrOffsets.size(); }
+
+ bool usePseudoProbes() const { return UsePseudoProbes; }
+ bool useFSDiscriminator() const { return UseFSDiscriminator; }
+ // Get the index in CodeAddrOffsets for the address
+ // As we might get an address which is not the code
+ // here it would round to the next valid code address by
+ // using lower bound operation
+ uint32_t getIndexForOffset(uint64_t Offset) const {
+ auto Low = llvm::lower_bound(CodeAddrOffsets, Offset);
+ return Low - CodeAddrOffsets.begin();
+ }
+ uint32_t getIndexForAddr(uint64_t Address) const {
+ uint64_t Offset = virtualAddrToOffset(Address);
+ return getIndexForOffset(Offset);
+ }
+
+ uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const {
+ if (FrameAddr == ExternalAddr)
+ return ExternalAddr;
+ auto I = getIndexForAddr(FrameAddr);
+ FrameAddr = I ? getAddressforIndex(I - 1) : 0;
+ if (FrameAddr && addressIsCall(FrameAddr))
+ return FrameAddr;
+ return 0;
+ }
+
+ FuncRange *findFuncRangeForStartOffset(uint64_t Offset) {
+ auto I = StartOffset2FuncRangeMap.find(Offset);
+ if (I == StartOffset2FuncRangeMap.end())
+ return nullptr;
+ return &I->second;
+ }
+
+ // Binary search the function range which includes the input offset.
+ FuncRange *findFuncRangeForOffset(uint64_t Offset) {
+ auto I = StartOffset2FuncRangeMap.upper_bound(Offset);
+ if (I == StartOffset2FuncRangeMap.begin())
+ return nullptr;
+ I--;
+
+ if (Offset >= I->second.EndOffset)
+ return nullptr;
+
+ return &I->second;
+ }
+
+ // Get all ranges of one function.
+ RangesTy getRangesForOffset(uint64_t Offset) {
+ auto *FRange = findFuncRangeForOffset(Offset);
+ // Ignore the range which falls into plt section or system lib.
+ if (!FRange)
+ return RangesTy();
+
+ return FRange->Func->Ranges;
+ }
+
+ const std::unordered_map<std::string, BinaryFunction> &
+ getAllBinaryFunctions() {
+ return BinaryFunctions;
+ }
+
+ BinaryFunction *getBinaryFunction(StringRef FName) {
+ auto I = BinaryFunctions.find(FName.str());
+ if (I == BinaryFunctions.end())
+ return nullptr;
+ return &I->second;
+ }
+
+ uint32_t getFuncSizeForContext(SampleContext &Context) {
+ return FuncSizeTracker.getFuncSizeForContext(Context);
+ }
+
+ // Load the symbols from debug table and populate into symbol list.
+ void populateSymbolListFromDWARF(ProfileSymbolList &SymbolList);
+
+ const SampleContextFrameVector &
+ getFrameLocationStack(uint64_t Offset, bool UseProbeDiscriminator = false) {
+ auto I = Offset2LocStackMap.emplace(Offset, SampleContextFrameVector());
+ if (I.second) {
+ InstructionPointer IP(this, Offset);
+ I.first->second = symbolize(IP, true, UseProbeDiscriminator);
+ }
+ return I.first->second;
+ }
+
+ Optional<SampleContextFrame> getInlineLeafFrameLoc(uint64_t Offset) {
+ const auto &Stack = getFrameLocationStack(Offset);
+ if (Stack.empty())
+ return {};
+ return Stack.back();
+ }
+
+ // Compare two addresses' inline context
+ bool inlineContextEqual(uint64_t Add1, uint64_t Add2);
+
+ // Get the full context of the current stack with inline context filled in.
+ // It will search the disassembling info stored in Offset2LocStackMap. This is
+ // used as the key of function sample map
+ SampleContextFrameVector
+ getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
+ bool &WasLeafInlined);
+ // Go through instructions among the given range and record its size for the
+ // inline context.
+ void computeInlinedContextSizeForRange(uint64_t StartOffset,
+ uint64_t EndOffset);
+
+ const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
+ return ProbeDecoder.getCallProbeForAddr(Address);
+ }
+
+ void getInlineContextForProbe(const MCDecodedPseudoProbe *Probe,
+ SampleContextFrameVector &InlineContextStack,
+ bool IncludeLeaf = false) const {
+ SmallVector<MCPseduoProbeFrameLocation, 16> ProbeInlineContext;
+ ProbeDecoder.getInlineContextForProbe(Probe, ProbeInlineContext,
+ IncludeLeaf);
+ for (uint32_t I = 0; I < ProbeInlineContext.size(); I++) {
+ auto &Callsite = ProbeInlineContext[I];
+ // Clear the current context for an unknown probe.
+ if (Callsite.second == 0 && I != ProbeInlineContext.size() - 1) {
+ InlineContextStack.clear();
+ continue;
+ }
+ InlineContextStack.emplace_back(Callsite.first,
+ LineLocation(Callsite.second, 0));
+ }
+ }
+ const AddressProbesMap &getAddress2ProbesMap() const {
+ return ProbeDecoder.getAddress2ProbesMap();
+ }
+ const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) {
+ return ProbeDecoder.getFuncDescForGUID(GUID);
+ }
+
+ const MCPseudoProbeFuncDesc *
+ getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) {
+ return ProbeDecoder.getInlinerDescForProbe(Probe);
+ }
+
+ bool getTrackFuncContextSize() { return TrackFuncContextSize; }
+
+ bool getIsLoadedByMMap() { return IsLoadedByMMap; }
+
+ void setIsLoadedByMMap(bool Value) { IsLoadedByMMap = Value; }
+
+ bool getMissingMMapWarned() { return MissingMMapWarned; }
+
+ void setMissingMMapWarned(bool Value) { MissingMMapWarned = Value; }
+};
+
+} // end namespace sampleprof
+} // end namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm14/tools/llvm-profgen/llvm-profgen.cpp b/contrib/libs/llvm14/tools/llvm-profgen/llvm-profgen.cpp
new file mode 100644
index 0000000000..f092df04d5
--- /dev/null
+++ b/contrib/libs/llvm14/tools/llvm-profgen/llvm-profgen.cpp
@@ -0,0 +1,164 @@
+//===- llvm-profgen.cpp - LLVM SPGO profile generation tool -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// llvm-profgen generates SPGO profiles from perf script ouput.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ErrorHandling.h"
+#include "PerfReader.h"
+#include "ProfileGenerator.h"
+#include "ProfiledBinary.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/TargetSelect.h"
+
+static cl::OptionCategory ProfGenCategory("ProfGen Options");
+
+static cl::opt<std::string> PerfScriptFilename(
+ "perfscript", cl::value_desc("perfscript"), cl::ZeroOrMore,
+ llvm::cl::MiscFlags::CommaSeparated,
+ cl::desc("Path of perf-script trace created by Linux perf tool with "
+ "`script` command(the raw perf.data should be profiled with -b)"),
+ cl::cat(ProfGenCategory));
+static cl::alias PSA("ps", cl::desc("Alias for --perfscript"),
+ cl::aliasopt(PerfScriptFilename));
+
+static cl::opt<std::string> PerfDataFilename(
+ "perfdata", cl::value_desc("perfdata"), cl::ZeroOrMore,
+ llvm::cl::MiscFlags::CommaSeparated,
+ cl::desc("Path of raw perf data created by Linux perf tool (it should be "
+ "profiled with -b)"),
+ cl::cat(ProfGenCategory));
+static cl::alias PDA("pd", cl::desc("Alias for --perfdata"),
+ cl::aliasopt(PerfDataFilename));
+
+static cl::opt<std::string> UnsymbolizedProfFilename(
+ "unsymbolized-profile", cl::value_desc("unsymbolized profile"),
+ cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated,
+ cl::desc("Path of the unsymbolized profile created by "
+ "`llvm-profgen` with `--skip-symbolization`"),
+ cl::cat(ProfGenCategory));
+static cl::alias UPA("up", cl::desc("Alias for --unsymbolized-profile"),
+ cl::aliasopt(UnsymbolizedProfFilename));
+
+static cl::opt<std::string>
+ BinaryPath("binary", cl::value_desc("binary"), cl::Required,
+ cl::desc("Path of profiled executable binary."),
+ cl::cat(ProfGenCategory));
+
+static cl::opt<std::string> DebugBinPath(
+ "debug-binary", cl::value_desc("debug-binary"), cl::ZeroOrMore,
+ cl::desc("Path of debug info binary, llvm-profgen will load the DWARF info "
+ "from it instead of the executable binary."),
+ cl::cat(ProfGenCategory));
+
+extern cl::opt<bool> ShowDisassemblyOnly;
+extern cl::opt<bool> ShowSourceLocations;
+extern cl::opt<bool> SkipSymbolization;
+
+using namespace llvm;
+using namespace sampleprof;
+
+// Validate the command line input.
+static void validateCommandLine() {
+ // Allow the missing perfscript if we only use to show binary disassembly.
+ if (!ShowDisassemblyOnly) {
+ // Validate input profile is provided only once
+ uint16_t HasPerfData = PerfDataFilename.getNumOccurrences();
+ uint16_t HasPerfScript = PerfScriptFilename.getNumOccurrences();
+ uint16_t HasUnsymbolizedProfile =
+ UnsymbolizedProfFilename.getNumOccurrences();
+ uint16_t S = HasPerfData + HasPerfScript + HasUnsymbolizedProfile;
+ if (S != 1) {
+ std::string Msg =
+ S > 1
+ ? "`--perfscript`, `--perfdata` and `--unsymbolized-profile` "
+ "cannot be used together."
+ : "Perf input file is missing, please use one of `--perfscript`, "
+ "`--perfdata` and `--unsymbolized-profile` for the input.";
+ exitWithError(Msg);
+ }
+
+ auto CheckFileExists = [](bool H, StringRef File) {
+ if (H && !llvm::sys::fs::exists(File)) {
+ std::string Msg = "Input perf file(" + File.str() + ") doesn't exist.";
+ exitWithError(Msg);
+ }
+ };
+
+ CheckFileExists(HasPerfData, PerfDataFilename);
+ CheckFileExists(HasPerfScript, PerfScriptFilename);
+ CheckFileExists(HasUnsymbolizedProfile, UnsymbolizedProfFilename);
+ }
+
+ if (!llvm::sys::fs::exists(BinaryPath)) {
+ std::string Msg = "Input binary(" + BinaryPath + ") doesn't exist.";
+ exitWithError(Msg);
+ }
+
+ if (CSProfileGenerator::MaxCompressionSize < -1) {
+ exitWithError("Value of --compress-recursion should >= -1");
+ }
+ if (ShowSourceLocations && !ShowDisassemblyOnly) {
+ exitWithError("--show-source-locations should work together with "
+ "--show-disassembly-only!");
+ }
+}
+
+static PerfInputFile getPerfInputFile() {
+ PerfInputFile File;
+ if (PerfDataFilename.getNumOccurrences()) {
+ File.InputFile = PerfDataFilename;
+ File.Format = PerfFormat::PerfData;
+ } else if (PerfScriptFilename.getNumOccurrences()) {
+ File.InputFile = PerfScriptFilename;
+ File.Format = PerfFormat::PerfScript;
+ } else if (UnsymbolizedProfFilename.getNumOccurrences()) {
+ File.InputFile = UnsymbolizedProfFilename;
+ File.Format = PerfFormat::UnsymbolizedProfile;
+ }
+ return File;
+}
+
+int main(int argc, const char *argv[]) {
+ InitLLVM X(argc, argv);
+
+ // Initialize targets and assembly printers/parsers.
+ InitializeAllTargetInfos();
+ InitializeAllTargetMCs();
+ InitializeAllDisassemblers();
+
+ cl::HideUnrelatedOptions({&ProfGenCategory, &getColorCategory()});
+ cl::ParseCommandLineOptions(argc, argv, "llvm SPGO profile generator\n");
+ validateCommandLine();
+
+ // Load symbols and disassemble the code of a given binary.
+ std::unique_ptr<ProfiledBinary> Binary =
+ std::make_unique<ProfiledBinary>(BinaryPath, DebugBinPath);
+ if (ShowDisassemblyOnly)
+ return EXIT_SUCCESS;
+
+ PerfInputFile PerfFile = getPerfInputFile();
+ std::unique_ptr<PerfReaderBase> Reader =
+ PerfReaderBase::create(Binary.get(), PerfFile);
+ // Parse perf events and samples
+ Reader->parsePerfTraces();
+
+ if (SkipSymbolization)
+ return EXIT_SUCCESS;
+
+ std::unique_ptr<ProfileGeneratorBase> Generator =
+ ProfileGeneratorBase::create(Binary.get(), Reader->getSampleCounters(),
+ Reader->profileIsCSFlat());
+ Generator->generateProfile();
+ Generator->write();
+
+ return EXIT_SUCCESS;
+}
diff --git a/contrib/libs/llvm14/tools/llvm-profgen/ya.make b/contrib/libs/llvm14/tools/llvm-profgen/ya.make
new file mode 100644
index 0000000000..c6c45c6e36
--- /dev/null
+++ b/contrib/libs/llvm14/tools/llvm-profgen/ya.make
@@ -0,0 +1,80 @@
+# Generated by devtools/yamaker.
+
+PROGRAM()
+
+LICENSE(Apache-2.0 WITH LLVM-exception)
+
+LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
+
+PEERDIR(
+ contrib/libs/llvm14
+ contrib/libs/llvm14/include
+ contrib/libs/llvm14/lib/Analysis
+ contrib/libs/llvm14/lib/AsmParser
+ contrib/libs/llvm14/lib/BinaryFormat
+ contrib/libs/llvm14/lib/Bitcode/Reader
+ contrib/libs/llvm14/lib/Bitcode/Writer
+ contrib/libs/llvm14/lib/Bitstream/Reader
+ contrib/libs/llvm14/lib/DebugInfo/CodeView
+ contrib/libs/llvm14/lib/DebugInfo/DWARF
+ contrib/libs/llvm14/lib/DebugInfo/MSF
+ contrib/libs/llvm14/lib/DebugInfo/PDB
+ contrib/libs/llvm14/lib/DebugInfo/Symbolize
+ contrib/libs/llvm14/lib/Demangle
+ contrib/libs/llvm14/lib/Frontend/OpenMP
+ contrib/libs/llvm14/lib/IR
+ contrib/libs/llvm14/lib/IRReader
+ contrib/libs/llvm14/lib/Linker
+ contrib/libs/llvm14/lib/MC
+ contrib/libs/llvm14/lib/MC/MCDisassembler
+ contrib/libs/llvm14/lib/MC/MCParser
+ contrib/libs/llvm14/lib/Object
+ contrib/libs/llvm14/lib/ProfileData
+ contrib/libs/llvm14/lib/Remarks
+ contrib/libs/llvm14/lib/Support
+ contrib/libs/llvm14/lib/Target/AArch64/Disassembler
+ contrib/libs/llvm14/lib/Target/AArch64/MCTargetDesc
+ contrib/libs/llvm14/lib/Target/AArch64/TargetInfo
+ contrib/libs/llvm14/lib/Target/AArch64/Utils
+ contrib/libs/llvm14/lib/Target/ARM/Disassembler
+ contrib/libs/llvm14/lib/Target/ARM/MCTargetDesc
+ contrib/libs/llvm14/lib/Target/ARM/TargetInfo
+ contrib/libs/llvm14/lib/Target/ARM/Utils
+ contrib/libs/llvm14/lib/Target/BPF/Disassembler
+ contrib/libs/llvm14/lib/Target/BPF/MCTargetDesc
+ contrib/libs/llvm14/lib/Target/BPF/TargetInfo
+ contrib/libs/llvm14/lib/Target/NVPTX/MCTargetDesc
+ contrib/libs/llvm14/lib/Target/NVPTX/TargetInfo
+ contrib/libs/llvm14/lib/Target/PowerPC/Disassembler
+ contrib/libs/llvm14/lib/Target/PowerPC/MCTargetDesc
+ contrib/libs/llvm14/lib/Target/PowerPC/TargetInfo
+ contrib/libs/llvm14/lib/Target/X86/Disassembler
+ contrib/libs/llvm14/lib/Target/X86/MCTargetDesc
+ contrib/libs/llvm14/lib/Target/X86/TargetInfo
+ contrib/libs/llvm14/lib/TextAPI
+ contrib/libs/llvm14/lib/Transforms/AggressiveInstCombine
+ contrib/libs/llvm14/lib/Transforms/IPO
+ contrib/libs/llvm14/lib/Transforms/InstCombine
+ contrib/libs/llvm14/lib/Transforms/Instrumentation
+ contrib/libs/llvm14/lib/Transforms/Scalar
+ contrib/libs/llvm14/lib/Transforms/Utils
+ contrib/libs/llvm14/lib/Transforms/Vectorize
+)
+
+ADDINCL(
+ contrib/libs/llvm14/tools/llvm-profgen
+)
+
+NO_COMPILER_WARNINGS()
+
+NO_UTIL()
+
+SRCS(
+ CSPreInliner.cpp
+ PerfReader.cpp
+ ProfileGenerator.cpp
+ ProfiledBinary.cpp
+ llvm-profgen.cpp
+)
+
+END()