diff options
author | vitalyisaev <vitalyisaev@yandex-team.com> | 2023-06-29 10:00:50 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@yandex-team.com> | 2023-06-29 10:00:50 +0300 |
commit | 6ffe9e53658409f212834330e13564e4952558f6 (patch) | |
tree | 85b1e00183517648b228aafa7c8fb07f5276f419 /contrib/libs/llvm14/include/llvm/ProfileData | |
parent | 726057070f9c5a91fc10fde0d5024913d10f1ab9 (diff) | |
download | ydb-6ffe9e53658409f212834330e13564e4952558f6.tar.gz |
YQ Connector: support managed ClickHouse
Со стороны dqrun можно обратиться к инстансу коннектора, который работает на streaming стенде, и извлечь данные из облачного CH.
Diffstat (limited to 'contrib/libs/llvm14/include/llvm/ProfileData')
15 files changed, 7779 insertions, 0 deletions
diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/Coverage/CoverageMapping.h b/contrib/libs/llvm14/include/llvm/ProfileData/Coverage/CoverageMapping.h new file mode 100644 index 0000000000..d5f8c4c6cc --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -0,0 +1,1077 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- CoverageMapping.h - Code coverage mapping support --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Code coverage mapping data is generated by clang and read by +// llvm-cov to show code coverage statistics for a file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPING_H +#define LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPING_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> +#include <iterator> +#include <memory> +#include <string> +#include <system_error> +#include <tuple> +#include <utility> +#include <vector> + +namespace llvm { + +class IndexedInstrProfReader; + +namespace coverage { + +class CoverageMappingReader; +struct CoverageMappingRecord; + +enum class coveragemap_error { + success = 0, + eof, + no_data_found, + unsupported_version, + truncated, + malformed, + decompression_failed, + invalid_or_missing_arch_specifier +}; + +const std::error_category &coveragemap_category(); + +inline std::error_code make_error_code(coveragemap_error E) { + return std::error_code(static_cast<int>(E), coveragemap_category()); +} + +class CoverageMapError : public ErrorInfo<CoverageMapError> { +public: + CoverageMapError(coveragemap_error Err) : Err(Err) { + assert(Err != coveragemap_error::success && "Not an error"); + } + + std::string message() const override; + + void log(raw_ostream &OS) const override { OS << message(); } + + std::error_code convertToErrorCode() const override { + return make_error_code(Err); + } + + coveragemap_error get() const { return Err; } + + static char ID; + +private: + coveragemap_error Err; +}; + +/// A Counter is an abstract value that describes how to compute the +/// execution count for a region of code using the collected profile count data. +struct Counter { + /// The CounterExpression kind (Add or Subtract) is encoded in bit 0 next to + /// the CounterKind. This means CounterKind has to leave bit 0 free. + enum CounterKind { Zero, CounterValueReference, Expression }; + static const unsigned EncodingTagBits = 2; + static const unsigned EncodingTagMask = 0x3; + static const unsigned EncodingCounterTagAndExpansionRegionTagBits = + EncodingTagBits + 1; + +private: + CounterKind Kind = Zero; + unsigned ID = 0; + + Counter(CounterKind Kind, unsigned ID) : Kind(Kind), ID(ID) {} + +public: + Counter() = default; + + CounterKind getKind() const { return Kind; } + + bool isZero() const { return Kind == Zero; } + + bool isExpression() const { return Kind == Expression; } + + unsigned getCounterID() const { return ID; } + + unsigned getExpressionID() const { return ID; } + + friend bool operator==(const Counter &LHS, const Counter &RHS) { + return LHS.Kind == RHS.Kind && LHS.ID == RHS.ID; + } + + friend bool operator!=(const Counter &LHS, const Counter &RHS) { + return !(LHS == RHS); + } + + friend bool operator<(const Counter &LHS, const Counter &RHS) { + return std::tie(LHS.Kind, LHS.ID) < std::tie(RHS.Kind, RHS.ID); + } + + /// Return the counter that represents the number zero. + static Counter getZero() { return Counter(); } + + /// Return the counter that corresponds to a specific profile counter. + static Counter getCounter(unsigned CounterId) { + return Counter(CounterValueReference, CounterId); + } + + /// Return the counter that corresponds to a specific addition counter + /// expression. + static Counter getExpression(unsigned ExpressionId) { + return Counter(Expression, ExpressionId); + } +}; + +/// A Counter expression is a value that represents an arithmetic operation +/// with two counters. +struct CounterExpression { + enum ExprKind { Subtract, Add }; + ExprKind Kind; + Counter LHS, RHS; + + CounterExpression(ExprKind Kind, Counter LHS, Counter RHS) + : Kind(Kind), LHS(LHS), RHS(RHS) {} +}; + +/// A Counter expression builder is used to construct the counter expressions. +/// It avoids unnecessary duplication and simplifies algebraic expressions. +class CounterExpressionBuilder { + /// A list of all the counter expressions + std::vector<CounterExpression> Expressions; + + /// A lookup table for the index of a given expression. + DenseMap<CounterExpression, unsigned> ExpressionIndices; + + /// Return the counter which corresponds to the given expression. + /// + /// If the given expression is already stored in the builder, a counter + /// that references that expression is returned. Otherwise, the given + /// expression is added to the builder's collection of expressions. + Counter get(const CounterExpression &E); + + /// Represents a term in a counter expression tree. + struct Term { + unsigned CounterID; + int Factor; + + Term(unsigned CounterID, int Factor) + : CounterID(CounterID), Factor(Factor) {} + }; + + /// Gather the terms of the expression tree for processing. + /// + /// This collects each addition and subtraction referenced by the counter into + /// a sequence that can be sorted and combined to build a simplified counter + /// expression. + void extractTerms(Counter C, int Sign, SmallVectorImpl<Term> &Terms); + + /// Simplifies the given expression tree + /// by getting rid of algebraically redundant operations. + Counter simplify(Counter ExpressionTree); + +public: + ArrayRef<CounterExpression> getExpressions() const { return Expressions; } + + /// Return a counter that represents the expression that adds LHS and RHS. + Counter add(Counter LHS, Counter RHS); + + /// Return a counter that represents the expression that subtracts RHS from + /// LHS. + Counter subtract(Counter LHS, Counter RHS); +}; + +using LineColPair = std::pair<unsigned, unsigned>; + +/// A Counter mapping region associates a source range with a specific counter. +struct CounterMappingRegion { + enum RegionKind { + /// A CodeRegion associates some code with a counter + CodeRegion, + + /// An ExpansionRegion represents a file expansion region that associates + /// a source range with the expansion of a virtual source file, such as + /// for a macro instantiation or #include file. + ExpansionRegion, + + /// A SkippedRegion represents a source range with code that was skipped + /// by a preprocessor or similar means. + SkippedRegion, + + /// A GapRegion is like a CodeRegion, but its count is only set as the + /// line execution count when its the only region in the line. + GapRegion, + + /// A BranchRegion represents leaf-level boolean expressions and is + /// associated with two counters, each representing the number of times the + /// expression evaluates to true or false. + BranchRegion + }; + + /// Primary Counter that is also used for Branch Regions (TrueCount). + Counter Count; + + /// Secondary Counter used for Branch Regions (FalseCount). + Counter FalseCount; + + unsigned FileID, ExpandedFileID; + unsigned LineStart, ColumnStart, LineEnd, ColumnEnd; + RegionKind Kind; + + CounterMappingRegion(Counter Count, unsigned FileID, unsigned ExpandedFileID, + unsigned LineStart, unsigned ColumnStart, + unsigned LineEnd, unsigned ColumnEnd, RegionKind Kind) + : Count(Count), FileID(FileID), ExpandedFileID(ExpandedFileID), + LineStart(LineStart), ColumnStart(ColumnStart), LineEnd(LineEnd), + ColumnEnd(ColumnEnd), Kind(Kind) {} + + CounterMappingRegion(Counter Count, Counter FalseCount, unsigned FileID, + unsigned ExpandedFileID, unsigned LineStart, + unsigned ColumnStart, unsigned LineEnd, + unsigned ColumnEnd, RegionKind Kind) + : Count(Count), FalseCount(FalseCount), FileID(FileID), + ExpandedFileID(ExpandedFileID), LineStart(LineStart), + ColumnStart(ColumnStart), LineEnd(LineEnd), ColumnEnd(ColumnEnd), + Kind(Kind) {} + + static CounterMappingRegion + makeRegion(Counter Count, unsigned FileID, unsigned LineStart, + unsigned ColumnStart, unsigned LineEnd, unsigned ColumnEnd) { + return CounterMappingRegion(Count, FileID, 0, LineStart, ColumnStart, + LineEnd, ColumnEnd, CodeRegion); + } + + static CounterMappingRegion + makeExpansion(unsigned FileID, unsigned ExpandedFileID, unsigned LineStart, + unsigned ColumnStart, unsigned LineEnd, unsigned ColumnEnd) { + return CounterMappingRegion(Counter(), FileID, ExpandedFileID, LineStart, + ColumnStart, LineEnd, ColumnEnd, + ExpansionRegion); + } + + static CounterMappingRegion + makeSkipped(unsigned FileID, unsigned LineStart, unsigned ColumnStart, + unsigned LineEnd, unsigned ColumnEnd) { + return CounterMappingRegion(Counter(), FileID, 0, LineStart, ColumnStart, + LineEnd, ColumnEnd, SkippedRegion); + } + + static CounterMappingRegion + makeGapRegion(Counter Count, unsigned FileID, unsigned LineStart, + unsigned ColumnStart, unsigned LineEnd, unsigned ColumnEnd) { + return CounterMappingRegion(Count, FileID, 0, LineStart, ColumnStart, + LineEnd, (1U << 31) | ColumnEnd, GapRegion); + } + + static CounterMappingRegion + makeBranchRegion(Counter Count, Counter FalseCount, unsigned FileID, + unsigned LineStart, unsigned ColumnStart, unsigned LineEnd, + unsigned ColumnEnd) { + return CounterMappingRegion(Count, FalseCount, FileID, 0, LineStart, + ColumnStart, LineEnd, ColumnEnd, BranchRegion); + } + + inline LineColPair startLoc() const { + return LineColPair(LineStart, ColumnStart); + } + + inline LineColPair endLoc() const { return LineColPair(LineEnd, ColumnEnd); } +}; + +/// Associates a source range with an execution count. +struct CountedRegion : public CounterMappingRegion { + uint64_t ExecutionCount; + uint64_t FalseExecutionCount; + bool Folded; + + CountedRegion(const CounterMappingRegion &R, uint64_t ExecutionCount) + : CounterMappingRegion(R), ExecutionCount(ExecutionCount), + FalseExecutionCount(0), Folded(false) {} + + CountedRegion(const CounterMappingRegion &R, uint64_t ExecutionCount, + uint64_t FalseExecutionCount) + : CounterMappingRegion(R), ExecutionCount(ExecutionCount), + FalseExecutionCount(FalseExecutionCount), Folded(false) {} +}; + +/// A Counter mapping context is used to connect the counters, expressions +/// and the obtained counter values. +class CounterMappingContext { + ArrayRef<CounterExpression> Expressions; + ArrayRef<uint64_t> CounterValues; + +public: + CounterMappingContext(ArrayRef<CounterExpression> Expressions, + ArrayRef<uint64_t> CounterValues = None) + : Expressions(Expressions), CounterValues(CounterValues) {} + + void setCounts(ArrayRef<uint64_t> Counts) { CounterValues = Counts; } + + void dump(const Counter &C, raw_ostream &OS) const; + void dump(const Counter &C) const { dump(C, dbgs()); } + + /// Return the number of times that a region of code associated with this + /// counter was executed. + Expected<int64_t> evaluate(const Counter &C) const; + + unsigned getMaxCounterID(const Counter &C) const; +}; + +/// Code coverage information for a single function. +struct FunctionRecord { + /// Raw function name. + std::string Name; + /// Mapping from FileID (i.e. vector index) to filename. Used to support + /// macro expansions within a function in which the macro and function are + /// defined in separate files. + /// + /// TODO: Uniquing filenames across all function records may be a performance + /// optimization. + std::vector<std::string> Filenames; + /// Regions in the function along with their counts. + std::vector<CountedRegion> CountedRegions; + /// Branch Regions in the function along with their counts. + std::vector<CountedRegion> CountedBranchRegions; + /// The number of times this function was executed. + uint64_t ExecutionCount = 0; + + FunctionRecord(StringRef Name, ArrayRef<StringRef> Filenames) + : Name(Name), Filenames(Filenames.begin(), Filenames.end()) {} + + FunctionRecord(FunctionRecord &&FR) = default; + FunctionRecord &operator=(FunctionRecord &&) = default; + + void pushRegion(CounterMappingRegion Region, uint64_t Count, + uint64_t FalseCount) { + if (Region.Kind == CounterMappingRegion::BranchRegion) { + CountedBranchRegions.emplace_back(Region, Count, FalseCount); + // If both counters are hard-coded to zero, then this region represents a + // constant-folded branch. + if (Region.Count.isZero() && Region.FalseCount.isZero()) + CountedBranchRegions.back().Folded = true; + return; + } + if (CountedRegions.empty()) + ExecutionCount = Count; + CountedRegions.emplace_back(Region, Count, FalseCount); + } +}; + +/// Iterator over Functions, optionally filtered to a single file. +class FunctionRecordIterator + : public iterator_facade_base<FunctionRecordIterator, + std::forward_iterator_tag, FunctionRecord> { + ArrayRef<FunctionRecord> Records; + ArrayRef<FunctionRecord>::iterator Current; + StringRef Filename; + + /// Skip records whose primary file is not \c Filename. + void skipOtherFiles(); + +public: + FunctionRecordIterator(ArrayRef<FunctionRecord> Records_, + StringRef Filename = "") + : Records(Records_), Current(Records.begin()), Filename(Filename) { + skipOtherFiles(); + } + + FunctionRecordIterator() : Current(Records.begin()) {} + + bool operator==(const FunctionRecordIterator &RHS) const { + return Current == RHS.Current && Filename == RHS.Filename; + } + + const FunctionRecord &operator*() const { return *Current; } + + FunctionRecordIterator &operator++() { + assert(Current != Records.end() && "incremented past end"); + ++Current; + skipOtherFiles(); + return *this; + } +}; + +/// Coverage information for a macro expansion or #included file. +/// +/// When covered code has pieces that can be expanded for more detail, such as a +/// preprocessor macro use and its definition, these are represented as +/// expansions whose coverage can be looked up independently. +struct ExpansionRecord { + /// The abstract file this expansion covers. + unsigned FileID; + /// The region that expands to this record. + const CountedRegion &Region; + /// Coverage for the expansion. + const FunctionRecord &Function; + + ExpansionRecord(const CountedRegion &Region, + const FunctionRecord &Function) + : FileID(Region.ExpandedFileID), Region(Region), Function(Function) {} +}; + +/// The execution count information starting at a point in a file. +/// +/// A sequence of CoverageSegments gives execution counts for a file in format +/// that's simple to iterate through for processing. +struct CoverageSegment { + /// The line where this segment begins. + unsigned Line; + /// The column where this segment begins. + unsigned Col; + /// The execution count, or zero if no count was recorded. + uint64_t Count; + /// When false, the segment was uninstrumented or skipped. + bool HasCount; + /// Whether this enters a new region or returns to a previous count. + bool IsRegionEntry; + /// Whether this enters a gap region. + bool IsGapRegion; + + CoverageSegment(unsigned Line, unsigned Col, bool IsRegionEntry) + : Line(Line), Col(Col), Count(0), HasCount(false), + IsRegionEntry(IsRegionEntry), IsGapRegion(false) {} + + CoverageSegment(unsigned Line, unsigned Col, uint64_t Count, + bool IsRegionEntry, bool IsGapRegion = false, + bool IsBranchRegion = false) + : Line(Line), Col(Col), Count(Count), HasCount(true), + IsRegionEntry(IsRegionEntry), IsGapRegion(IsGapRegion) {} + + friend bool operator==(const CoverageSegment &L, const CoverageSegment &R) { + return std::tie(L.Line, L.Col, L.Count, L.HasCount, L.IsRegionEntry, + L.IsGapRegion) == std::tie(R.Line, R.Col, R.Count, + R.HasCount, R.IsRegionEntry, + R.IsGapRegion); + } +}; + +/// An instantiation group contains a \c FunctionRecord list, such that each +/// record corresponds to a distinct instantiation of the same function. +/// +/// Note that it's possible for a function to have more than one instantiation +/// (consider C++ template specializations or static inline functions). +class InstantiationGroup { + friend class CoverageMapping; + + unsigned Line; + unsigned Col; + std::vector<const FunctionRecord *> Instantiations; + + InstantiationGroup(unsigned Line, unsigned Col, + std::vector<const FunctionRecord *> Instantiations) + : Line(Line), Col(Col), Instantiations(std::move(Instantiations)) {} + +public: + InstantiationGroup(const InstantiationGroup &) = delete; + InstantiationGroup(InstantiationGroup &&) = default; + + /// Get the number of instantiations in this group. + size_t size() const { return Instantiations.size(); } + + /// Get the line where the common function was defined. + unsigned getLine() const { return Line; } + + /// Get the column where the common function was defined. + unsigned getColumn() const { return Col; } + + /// Check if the instantiations in this group have a common mangled name. + bool hasName() const { + for (unsigned I = 1, E = Instantiations.size(); I < E; ++I) + if (Instantiations[I]->Name != Instantiations[0]->Name) + return false; + return true; + } + + /// Get the common mangled name for instantiations in this group. + StringRef getName() const { + assert(hasName() && "Instantiations don't have a shared name"); + return Instantiations[0]->Name; + } + + /// Get the total execution count of all instantiations in this group. + uint64_t getTotalExecutionCount() const { + uint64_t Count = 0; + for (const FunctionRecord *F : Instantiations) + Count += F->ExecutionCount; + return Count; + } + + /// Get the instantiations in this group. + ArrayRef<const FunctionRecord *> getInstantiations() const { + return Instantiations; + } +}; + +/// Coverage information to be processed or displayed. +/// +/// This represents the coverage of an entire file, expansion, or function. It +/// provides a sequence of CoverageSegments to iterate through, as well as the +/// list of expansions that can be further processed. +class CoverageData { + friend class CoverageMapping; + + std::string Filename; + std::vector<CoverageSegment> Segments; + std::vector<ExpansionRecord> Expansions; + std::vector<CountedRegion> BranchRegions; + +public: + CoverageData() = default; + + CoverageData(StringRef Filename) : Filename(Filename) {} + + /// Get the name of the file this data covers. + StringRef getFilename() const { return Filename; } + + /// Get an iterator over the coverage segments for this object. The segments + /// are guaranteed to be uniqued and sorted by location. + std::vector<CoverageSegment>::const_iterator begin() const { + return Segments.begin(); + } + + std::vector<CoverageSegment>::const_iterator end() const { + return Segments.end(); + } + + bool empty() const { return Segments.empty(); } + + /// Expansions that can be further processed. + ArrayRef<ExpansionRecord> getExpansions() const { return Expansions; } + + /// Branches that can be further processed. + ArrayRef<CountedRegion> getBranches() const { return BranchRegions; } +}; + +/// The mapping of profile information to coverage data. +/// +/// This is the main interface to get coverage information, using a profile to +/// fill out execution counts. +class CoverageMapping { + DenseMap<size_t, DenseSet<size_t>> RecordProvenance; + std::vector<FunctionRecord> Functions; + DenseMap<size_t, SmallVector<unsigned, 0>> FilenameHash2RecordIndices; + std::vector<std::pair<std::string, uint64_t>> FuncHashMismatches; + + CoverageMapping() = default; + + // Load coverage records from readers. + static Error loadFromReaders( + ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders, + IndexedInstrProfReader &ProfileReader, CoverageMapping &Coverage); + + /// Add a function record corresponding to \p Record. + Error loadFunctionRecord(const CoverageMappingRecord &Record, + IndexedInstrProfReader &ProfileReader); + + /// Look up the indices for function records which are at least partially + /// defined in the specified file. This is guaranteed to return a superset of + /// such records: extra records not in the file may be included if there is + /// a hash collision on the filename. Clients must be robust to collisions. + ArrayRef<unsigned> + getImpreciseRecordIndicesForFilename(StringRef Filename) const; + +public: + CoverageMapping(const CoverageMapping &) = delete; + CoverageMapping &operator=(const CoverageMapping &) = delete; + + /// Load the coverage mapping using the given readers. + static Expected<std::unique_ptr<CoverageMapping>> + load(ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders, + IndexedInstrProfReader &ProfileReader); + + /// Load the coverage mapping from the given object files and profile. If + /// \p Arches is non-empty, it must specify an architecture for each object. + /// Ignores non-instrumented object files unless all are not instrumented. + static Expected<std::unique_ptr<CoverageMapping>> + load(ArrayRef<StringRef> ObjectFilenames, StringRef ProfileFilename, + ArrayRef<StringRef> Arches = None, StringRef CompilationDir = ""); + + /// The number of functions that couldn't have their profiles mapped. + /// + /// This is a count of functions whose profile is out of date or otherwise + /// can't be associated with any coverage information. + unsigned getMismatchedCount() const { return FuncHashMismatches.size(); } + + /// A hash mismatch occurs when a profile record for a symbol does not have + /// the same hash as a coverage mapping record for the same symbol. This + /// returns a list of hash mismatches, where each mismatch is a pair of the + /// symbol name and its coverage mapping hash. + ArrayRef<std::pair<std::string, uint64_t>> getHashMismatches() const { + return FuncHashMismatches; + } + + /// Returns a lexicographically sorted, unique list of files that are + /// covered. + std::vector<StringRef> getUniqueSourceFiles() const; + + /// Get the coverage for a particular file. + /// + /// The given filename must be the name as recorded in the coverage + /// information. That is, only names returned from getUniqueSourceFiles will + /// yield a result. + CoverageData getCoverageForFile(StringRef Filename) const; + + /// Get the coverage for a particular function. + CoverageData getCoverageForFunction(const FunctionRecord &Function) const; + + /// Get the coverage for an expansion within a coverage set. + CoverageData getCoverageForExpansion(const ExpansionRecord &Expansion) const; + + /// Gets all of the functions covered by this profile. + iterator_range<FunctionRecordIterator> getCoveredFunctions() const { + return make_range(FunctionRecordIterator(Functions), + FunctionRecordIterator()); + } + + /// Gets all of the functions in a particular file. + iterator_range<FunctionRecordIterator> + getCoveredFunctions(StringRef Filename) const { + return make_range(FunctionRecordIterator(Functions, Filename), + FunctionRecordIterator()); + } + + /// Get the list of function instantiation groups in a particular file. + /// + /// Every instantiation group in a program is attributed to exactly one file: + /// the file in which the definition for the common function begins. + std::vector<InstantiationGroup> + getInstantiationGroups(StringRef Filename) const; +}; + +/// Coverage statistics for a single line. +class LineCoverageStats { + uint64_t ExecutionCount; + bool HasMultipleRegions; + bool Mapped; + unsigned Line; + ArrayRef<const CoverageSegment *> LineSegments; + const CoverageSegment *WrappedSegment; + + friend class LineCoverageIterator; + LineCoverageStats() = default; + +public: + LineCoverageStats(ArrayRef<const CoverageSegment *> LineSegments, + const CoverageSegment *WrappedSegment, unsigned Line); + + uint64_t getExecutionCount() const { return ExecutionCount; } + + bool hasMultipleRegions() const { return HasMultipleRegions; } + + bool isMapped() const { return Mapped; } + + unsigned getLine() const { return Line; } + + ArrayRef<const CoverageSegment *> getLineSegments() const { + return LineSegments; + } + + const CoverageSegment *getWrappedSegment() const { return WrappedSegment; } +}; + +/// An iterator over the \c LineCoverageStats objects for lines described by +/// a \c CoverageData instance. +class LineCoverageIterator + : public iterator_facade_base<LineCoverageIterator, + std::forward_iterator_tag, + const LineCoverageStats> { +public: + LineCoverageIterator(const CoverageData &CD) + : LineCoverageIterator(CD, CD.begin()->Line) {} + + LineCoverageIterator(const CoverageData &CD, unsigned Line) + : CD(CD), WrappedSegment(nullptr), Next(CD.begin()), Ended(false), + Line(Line) { + this->operator++(); + } + + bool operator==(const LineCoverageIterator &R) const { + return &CD == &R.CD && Next == R.Next && Ended == R.Ended; + } + + const LineCoverageStats &operator*() const { return Stats; } + + LineCoverageIterator &operator++(); + + LineCoverageIterator getEnd() const { + auto EndIt = *this; + EndIt.Next = CD.end(); + EndIt.Ended = true; + return EndIt; + } + +private: + const CoverageData &CD; + const CoverageSegment *WrappedSegment; + std::vector<CoverageSegment>::const_iterator Next; + bool Ended; + unsigned Line; + SmallVector<const CoverageSegment *, 4> Segments; + LineCoverageStats Stats; +}; + +/// Get a \c LineCoverageIterator range for the lines described by \p CD. +static inline iterator_range<LineCoverageIterator> +getLineCoverageStats(const coverage::CoverageData &CD) { + auto Begin = LineCoverageIterator(CD); + auto End = Begin.getEnd(); + return make_range(Begin, End); +} + +// Coverage mappping data (V2) has the following layout: +// IPSK_covmap: +// [CoverageMapFileHeader] +// [ArrayStart] +// [CovMapFunctionRecordV2] +// [CovMapFunctionRecordV2] +// ... +// [ArrayEnd] +// [Encoded Filenames and Region Mapping Data] +// +// Coverage mappping data (V3) has the following layout: +// IPSK_covmap: +// [CoverageMapFileHeader] +// [Encoded Filenames] +// IPSK_covfun: +// [ArrayStart] +// odr_name_1: [CovMapFunctionRecordV3] +// odr_name_2: [CovMapFunctionRecordV3] +// ... +// [ArrayEnd] +// +// Both versions of the coverage mapping format encode the same information, +// but the V3 format does so more compactly by taking advantage of linkonce_odr +// semantics (it allows exactly 1 function record per name reference). + +/// This namespace defines accessors shared by different versions of coverage +/// mapping records. +namespace accessors { + +/// Return the structural hash associated with the function. +template <class FuncRecordTy, support::endianness Endian> +uint64_t getFuncHash(const FuncRecordTy *Record) { + return support::endian::byte_swap<uint64_t, Endian>(Record->FuncHash); +} + +/// Return the coverage map data size for the function. +template <class FuncRecordTy, support::endianness Endian> +uint64_t getDataSize(const FuncRecordTy *Record) { + return support::endian::byte_swap<uint32_t, Endian>(Record->DataSize); +} + +/// Return the function lookup key. The value is considered opaque. +template <class FuncRecordTy, support::endianness Endian> +uint64_t getFuncNameRef(const FuncRecordTy *Record) { + return support::endian::byte_swap<uint64_t, Endian>(Record->NameRef); +} + +/// Return the PGO name of the function. Used for formats in which the name is +/// a hash. +template <class FuncRecordTy, support::endianness Endian> +Error getFuncNameViaRef(const FuncRecordTy *Record, + InstrProfSymtab &ProfileNames, StringRef &FuncName) { + uint64_t NameRef = getFuncNameRef<FuncRecordTy, Endian>(Record); + FuncName = ProfileNames.getFuncName(NameRef); + return Error::success(); +} + +/// Read coverage mapping out-of-line, from \p MappingBuf. This is used when the +/// coverage mapping is attached to the file header, instead of to the function +/// record. +template <class FuncRecordTy, support::endianness Endian> +StringRef getCoverageMappingOutOfLine(const FuncRecordTy *Record, + const char *MappingBuf) { + return {MappingBuf, size_t(getDataSize<FuncRecordTy, Endian>(Record))}; +} + +/// Advance to the next out-of-line coverage mapping and its associated +/// function record. +template <class FuncRecordTy, support::endianness Endian> +std::pair<const char *, const FuncRecordTy *> +advanceByOneOutOfLine(const FuncRecordTy *Record, const char *MappingBuf) { + return {MappingBuf + getDataSize<FuncRecordTy, Endian>(Record), Record + 1}; +} + +} // end namespace accessors + +LLVM_PACKED_START +template <class IntPtrT> +struct CovMapFunctionRecordV1 { + using ThisT = CovMapFunctionRecordV1<IntPtrT>; + +#define COVMAP_V1 +#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" +#undef COVMAP_V1 + CovMapFunctionRecordV1() = delete; + + template <support::endianness Endian> uint64_t getFuncHash() const { + return accessors::getFuncHash<ThisT, Endian>(this); + } + + template <support::endianness Endian> uint64_t getDataSize() const { + return accessors::getDataSize<ThisT, Endian>(this); + } + + /// Return function lookup key. The value is consider opaque. + template <support::endianness Endian> IntPtrT getFuncNameRef() const { + return support::endian::byte_swap<IntPtrT, Endian>(NamePtr); + } + + /// Return the PGO name of the function. + template <support::endianness Endian> + Error getFuncName(InstrProfSymtab &ProfileNames, StringRef &FuncName) const { + IntPtrT NameRef = getFuncNameRef<Endian>(); + uint32_t NameS = support::endian::byte_swap<uint32_t, Endian>(NameSize); + FuncName = ProfileNames.getFuncName(NameRef, NameS); + if (NameS && FuncName.empty()) + return make_error<CoverageMapError>(coveragemap_error::malformed); + return Error::success(); + } + + template <support::endianness Endian> + std::pair<const char *, const ThisT *> + advanceByOne(const char *MappingBuf) const { + return accessors::advanceByOneOutOfLine<ThisT, Endian>(this, MappingBuf); + } + + template <support::endianness Endian> uint64_t getFilenamesRef() const { + llvm_unreachable("V1 function format does not contain a filenames ref"); + } + + template <support::endianness Endian> + StringRef getCoverageMapping(const char *MappingBuf) const { + return accessors::getCoverageMappingOutOfLine<ThisT, Endian>(this, + MappingBuf); + } +}; + +struct CovMapFunctionRecordV2 { + using ThisT = CovMapFunctionRecordV2; + +#define COVMAP_V2 +#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" +#undef COVMAP_V2 + CovMapFunctionRecordV2() = delete; + + template <support::endianness Endian> uint64_t getFuncHash() const { + return accessors::getFuncHash<ThisT, Endian>(this); + } + + template <support::endianness Endian> uint64_t getDataSize() const { + return accessors::getDataSize<ThisT, Endian>(this); + } + + template <support::endianness Endian> uint64_t getFuncNameRef() const { + return accessors::getFuncNameRef<ThisT, Endian>(this); + } + + template <support::endianness Endian> + Error getFuncName(InstrProfSymtab &ProfileNames, StringRef &FuncName) const { + return accessors::getFuncNameViaRef<ThisT, Endian>(this, ProfileNames, + FuncName); + } + + template <support::endianness Endian> + std::pair<const char *, const ThisT *> + advanceByOne(const char *MappingBuf) const { + return accessors::advanceByOneOutOfLine<ThisT, Endian>(this, MappingBuf); + } + + template <support::endianness Endian> uint64_t getFilenamesRef() const { + llvm_unreachable("V2 function format does not contain a filenames ref"); + } + + template <support::endianness Endian> + StringRef getCoverageMapping(const char *MappingBuf) const { + return accessors::getCoverageMappingOutOfLine<ThisT, Endian>(this, + MappingBuf); + } +}; + +struct CovMapFunctionRecordV3 { + using ThisT = CovMapFunctionRecordV3; + +#define COVMAP_V3 +#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" +#undef COVMAP_V3 + CovMapFunctionRecordV3() = delete; + + template <support::endianness Endian> uint64_t getFuncHash() const { + return accessors::getFuncHash<ThisT, Endian>(this); + } + + template <support::endianness Endian> uint64_t getDataSize() const { + return accessors::getDataSize<ThisT, Endian>(this); + } + + template <support::endianness Endian> uint64_t getFuncNameRef() const { + return accessors::getFuncNameRef<ThisT, Endian>(this); + } + + template <support::endianness Endian> + Error getFuncName(InstrProfSymtab &ProfileNames, StringRef &FuncName) const { + return accessors::getFuncNameViaRef<ThisT, Endian>(this, ProfileNames, + FuncName); + } + + /// Get the filename set reference. + template <support::endianness Endian> uint64_t getFilenamesRef() const { + return support::endian::byte_swap<uint64_t, Endian>(FilenamesRef); + } + + /// Read the inline coverage mapping. Ignore the buffer parameter, it is for + /// out-of-line coverage mapping data only. + template <support::endianness Endian> + StringRef getCoverageMapping(const char *) const { + return StringRef(&CoverageMapping, getDataSize<Endian>()); + } + + // Advance to the next inline coverage mapping and its associated function + // record. Ignore the out-of-line coverage mapping buffer. + template <support::endianness Endian> + std::pair<const char *, const CovMapFunctionRecordV3 *> + advanceByOne(const char *) const { + assert(isAddrAligned(Align(8), this) && "Function record not aligned"); + const char *Next = ((const char *)this) + sizeof(CovMapFunctionRecordV3) - + sizeof(char) + getDataSize<Endian>(); + // Each function record has an alignment of 8, so we need to adjust + // alignment before reading the next record. + Next += offsetToAlignedAddr(Next, Align(8)); + return {nullptr, reinterpret_cast<const CovMapFunctionRecordV3 *>(Next)}; + } +}; + +// Per module coverage mapping data header, i.e. CoverageMapFileHeader +// documented above. +struct CovMapHeader { +#define COVMAP_HEADER(Type, LLVMType, Name, Init) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" + template <support::endianness Endian> uint32_t getNRecords() const { + return support::endian::byte_swap<uint32_t, Endian>(NRecords); + } + + template <support::endianness Endian> uint32_t getFilenamesSize() const { + return support::endian::byte_swap<uint32_t, Endian>(FilenamesSize); + } + + template <support::endianness Endian> uint32_t getCoverageSize() const { + return support::endian::byte_swap<uint32_t, Endian>(CoverageSize); + } + + template <support::endianness Endian> uint32_t getVersion() const { + return support::endian::byte_swap<uint32_t, Endian>(Version); + } +}; + +LLVM_PACKED_END + +enum CovMapVersion { + Version1 = 0, + // Function's name reference from CovMapFuncRecord is changed from raw + // name string pointer to MD5 to support name section compression. Name + // section is also compressed. + Version2 = 1, + // A new interpretation of the columnEnd field is added in order to mark + // regions as gap areas. + Version3 = 2, + // Function records are named, uniqued, and moved to a dedicated section. + Version4 = 3, + // Branch regions referring to two counters are added + Version5 = 4, + // Compilation directory is stored separately and combined with relative + // filenames to produce an absolute file path. + Version6 = 5, + // The current version is Version6. + CurrentVersion = INSTR_PROF_COVMAP_VERSION +}; + +template <int CovMapVersion, class IntPtrT> struct CovMapTraits { + using CovMapFuncRecordType = CovMapFunctionRecordV3; + using NameRefType = uint64_t; +}; + +template <class IntPtrT> struct CovMapTraits<CovMapVersion::Version3, IntPtrT> { + using CovMapFuncRecordType = CovMapFunctionRecordV2; + using NameRefType = uint64_t; +}; + +template <class IntPtrT> struct CovMapTraits<CovMapVersion::Version2, IntPtrT> { + using CovMapFuncRecordType = CovMapFunctionRecordV2; + using NameRefType = uint64_t; +}; + +template <class IntPtrT> struct CovMapTraits<CovMapVersion::Version1, IntPtrT> { + using CovMapFuncRecordType = CovMapFunctionRecordV1<IntPtrT>; + using NameRefType = IntPtrT; +}; + +} // end namespace coverage + +/// Provide DenseMapInfo for CounterExpression +template<> struct DenseMapInfo<coverage::CounterExpression> { + static inline coverage::CounterExpression getEmptyKey() { + using namespace coverage; + + return CounterExpression(CounterExpression::ExprKind::Subtract, + Counter::getCounter(~0U), + Counter::getCounter(~0U)); + } + + static inline coverage::CounterExpression getTombstoneKey() { + using namespace coverage; + + return CounterExpression(CounterExpression::ExprKind::Add, + Counter::getCounter(~0U), + Counter::getCounter(~0U)); + } + + static unsigned getHashValue(const coverage::CounterExpression &V) { + return static_cast<unsigned>( + hash_combine(V.Kind, V.LHS.getKind(), V.LHS.getCounterID(), + V.RHS.getKind(), V.RHS.getCounterID())); + } + + static bool isEqual(const coverage::CounterExpression &LHS, + const coverage::CounterExpression &RHS) { + return LHS.Kind == RHS.Kind && LHS.LHS == RHS.LHS && LHS.RHS == RHS.RHS; + } +}; + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPING_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/Coverage/CoverageMappingReader.h b/contrib/libs/llvm14/include/llvm/ProfileData/Coverage/CoverageMappingReader.h new file mode 100644 index 0000000000..1a305e4d98 --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/Coverage/CoverageMappingReader.h @@ -0,0 +1,256 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- CoverageMappingReader.h - Code coverage mapping reader ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading coverage mapping data for +// instrumentation based coverage. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPINGREADER_H +#define LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPINGREADER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ProfileData/Coverage/CoverageMapping.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <memory> +#include <vector> + +namespace llvm { +namespace coverage { + +class CoverageMappingReader; + +/// Coverage mapping information for a single function. +struct CoverageMappingRecord { + StringRef FunctionName; + uint64_t FunctionHash; + ArrayRef<StringRef> Filenames; + ArrayRef<CounterExpression> Expressions; + ArrayRef<CounterMappingRegion> MappingRegions; +}; + +/// A file format agnostic iterator over coverage mapping data. +class CoverageMappingIterator { + CoverageMappingReader *Reader; + CoverageMappingRecord Record; + coveragemap_error ReadErr; + + void increment(); + +public: + using iterator_category = std::input_iterator_tag; + using value_type = CoverageMappingRecord; + using difference_type = std::ptrdiff_t; + using pointer = value_type *; + using reference = value_type &; + + CoverageMappingIterator() + : Reader(nullptr), ReadErr(coveragemap_error::success) {} + + CoverageMappingIterator(CoverageMappingReader *Reader) + : Reader(Reader), ReadErr(coveragemap_error::success) { + increment(); + } + + ~CoverageMappingIterator() { + if (ReadErr != coveragemap_error::success) + llvm_unreachable("Unexpected error in coverage mapping iterator"); + } + + CoverageMappingIterator &operator++() { + increment(); + return *this; + } + bool operator==(const CoverageMappingIterator &RHS) const { + return Reader == RHS.Reader; + } + bool operator!=(const CoverageMappingIterator &RHS) const { + return Reader != RHS.Reader; + } + Expected<CoverageMappingRecord &> operator*() { + if (ReadErr != coveragemap_error::success) { + auto E = make_error<CoverageMapError>(ReadErr); + ReadErr = coveragemap_error::success; + return std::move(E); + } + return Record; + } + Expected<CoverageMappingRecord *> operator->() { + if (ReadErr != coveragemap_error::success) { + auto E = make_error<CoverageMapError>(ReadErr); + ReadErr = coveragemap_error::success; + return std::move(E); + } + return &Record; + } +}; + +class CoverageMappingReader { +public: + virtual ~CoverageMappingReader() = default; + + virtual Error readNextRecord(CoverageMappingRecord &Record) = 0; + CoverageMappingIterator begin() { return CoverageMappingIterator(this); } + CoverageMappingIterator end() { return CoverageMappingIterator(); } +}; + +/// Base class for the raw coverage mapping and filenames data readers. +class RawCoverageReader { +protected: + StringRef Data; + + RawCoverageReader(StringRef Data) : Data(Data) {} + + Error readULEB128(uint64_t &Result); + Error readIntMax(uint64_t &Result, uint64_t MaxPlus1); + Error readSize(uint64_t &Result); + Error readString(StringRef &Result); +}; + +/// Checks if the given coverage mapping data is exported for +/// an unused function. +class RawCoverageMappingDummyChecker : public RawCoverageReader { +public: + RawCoverageMappingDummyChecker(StringRef MappingData) + : RawCoverageReader(MappingData) {} + + Expected<bool> isDummy(); +}; + +/// Reader for the raw coverage mapping data. +class RawCoverageMappingReader : public RawCoverageReader { + ArrayRef<std::string> &TranslationUnitFilenames; + std::vector<StringRef> &Filenames; + std::vector<CounterExpression> &Expressions; + std::vector<CounterMappingRegion> &MappingRegions; + +public: + RawCoverageMappingReader(StringRef MappingData, + ArrayRef<std::string> &TranslationUnitFilenames, + std::vector<StringRef> &Filenames, + std::vector<CounterExpression> &Expressions, + std::vector<CounterMappingRegion> &MappingRegions) + : RawCoverageReader(MappingData), + TranslationUnitFilenames(TranslationUnitFilenames), + Filenames(Filenames), Expressions(Expressions), + MappingRegions(MappingRegions) {} + RawCoverageMappingReader(const RawCoverageMappingReader &) = delete; + RawCoverageMappingReader & + operator=(const RawCoverageMappingReader &) = delete; + + Error read(); + +private: + Error decodeCounter(unsigned Value, Counter &C); + Error readCounter(Counter &C); + Error + readMappingRegionsSubArray(std::vector<CounterMappingRegion> &MappingRegions, + unsigned InferredFileID, size_t NumFileIDs); +}; + +/// Reader for the coverage mapping data that is emitted by the +/// frontend and stored in an object file. +class BinaryCoverageReader : public CoverageMappingReader { +public: + struct ProfileMappingRecord { + CovMapVersion Version; + StringRef FunctionName; + uint64_t FunctionHash; + StringRef CoverageMapping; + size_t FilenamesBegin; + size_t FilenamesSize; + + ProfileMappingRecord(CovMapVersion Version, StringRef FunctionName, + uint64_t FunctionHash, StringRef CoverageMapping, + size_t FilenamesBegin, size_t FilenamesSize) + : Version(Version), FunctionName(FunctionName), + FunctionHash(FunctionHash), CoverageMapping(CoverageMapping), + FilenamesBegin(FilenamesBegin), FilenamesSize(FilenamesSize) {} + }; + + using FuncRecordsStorage = std::unique_ptr<MemoryBuffer>; + +private: + std::vector<std::string> Filenames; + std::vector<ProfileMappingRecord> MappingRecords; + InstrProfSymtab ProfileNames; + size_t CurrentRecord = 0; + std::vector<StringRef> FunctionsFilenames; + std::vector<CounterExpression> Expressions; + std::vector<CounterMappingRegion> MappingRegions; + + // Used to tie the lifetimes of coverage function records to the lifetime of + // this BinaryCoverageReader instance. Needed to support the format change in + // D69471, which can split up function records into multiple sections on ELF. + FuncRecordsStorage FuncRecords; + + BinaryCoverageReader(FuncRecordsStorage &&FuncRecords) + : FuncRecords(std::move(FuncRecords)) {} + +public: + BinaryCoverageReader(const BinaryCoverageReader &) = delete; + BinaryCoverageReader &operator=(const BinaryCoverageReader &) = delete; + + static Expected<std::vector<std::unique_ptr<BinaryCoverageReader>>> + create(MemoryBufferRef ObjectBuffer, StringRef Arch, + SmallVectorImpl<std::unique_ptr<MemoryBuffer>> &ObjectFileBuffers, + StringRef CompilationDir = ""); + + static Expected<std::unique_ptr<BinaryCoverageReader>> + createCoverageReaderFromBuffer(StringRef Coverage, + FuncRecordsStorage &&FuncRecords, + InstrProfSymtab &&ProfileNames, + uint8_t BytesInAddress, + support::endianness Endian, + StringRef CompilationDir = ""); + + Error readNextRecord(CoverageMappingRecord &Record) override; +}; + +/// Reader for the raw coverage filenames. +class RawCoverageFilenamesReader : public RawCoverageReader { + std::vector<std::string> &Filenames; + StringRef CompilationDir; + + // Read an uncompressed sequence of filenames. + Error readUncompressed(CovMapVersion Version, uint64_t NumFilenames); + +public: + RawCoverageFilenamesReader(StringRef Data, + std::vector<std::string> &Filenames, + StringRef CompilationDir = "") + : RawCoverageReader(Data), Filenames(Filenames), + CompilationDir(CompilationDir) {} + RawCoverageFilenamesReader(const RawCoverageFilenamesReader &) = delete; + RawCoverageFilenamesReader & + operator=(const RawCoverageFilenamesReader &) = delete; + + Error read(CovMapVersion Version); +}; + +} // end namespace coverage +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPINGREADER_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h b/contrib/libs/llvm14/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h new file mode 100644 index 0000000000..a415d111e3 --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h @@ -0,0 +1,72 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- CoverageMappingWriter.h - Code coverage mapping writer ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing coverage mapping data for +// instrumentation based coverage. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPINGWRITER_H +#define LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPINGWRITER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ProfileData/Coverage/CoverageMapping.h" + +namespace llvm { + +class raw_ostream; + +namespace coverage { + +/// Writer of the filenames section for the instrumentation +/// based code coverage. +class CoverageFilenamesSectionWriter { + ArrayRef<std::string> Filenames; + +public: + CoverageFilenamesSectionWriter(ArrayRef<std::string> Filenames); + + /// Write encoded filenames to the given output stream. If \p Compress is + /// true, attempt to compress the filenames. + void write(raw_ostream &OS, bool Compress = true); +}; + +/// Writer for instrumentation based coverage mapping data. +class CoverageMappingWriter { + ArrayRef<unsigned> VirtualFileMapping; + ArrayRef<CounterExpression> Expressions; + MutableArrayRef<CounterMappingRegion> MappingRegions; + +public: + CoverageMappingWriter(ArrayRef<unsigned> VirtualFileMapping, + ArrayRef<CounterExpression> Expressions, + MutableArrayRef<CounterMappingRegion> MappingRegions) + : VirtualFileMapping(VirtualFileMapping), Expressions(Expressions), + MappingRegions(MappingRegions) {} + + /// Write encoded coverage mapping data to the given output stream. + void write(raw_ostream &OS); +}; + +} // end namespace coverage + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPINGWRITER_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/GCOV.h b/contrib/libs/llvm14/include/llvm/ProfileData/GCOV.h new file mode 100644 index 0000000000..bd562b7348 --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/GCOV.h @@ -0,0 +1,334 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- GCOV.h - LLVM coverage tool ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header provides the interface to read and write coverage files that +// use 'gcov' format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_GCOV_H +#define LLVM_PROFILEDATA_GCOV_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <limits> +#include <map> +#include <memory> +#include <string> +#include <utility> + +namespace llvm { + +class GCOVFunction; +class GCOVBlock; + +namespace GCOV { + +enum GCOVVersion { V304, V407, V408, V800, V900, V1200 }; + +/// A struct for passing gcov options between functions. +struct Options { + Options(bool A, bool B, bool C, bool F, bool P, bool U, bool I, bool L, + bool M, bool N, bool R, bool T, bool X, std::string SourcePrefix) + : AllBlocks(A), BranchInfo(B), BranchCount(C), FuncCoverage(F), + PreservePaths(P), UncondBranch(U), Intermediate(I), LongFileNames(L), + Demangle(M), NoOutput(N), RelativeOnly(R), UseStdout(T), + HashFilenames(X), SourcePrefix(std::move(SourcePrefix)) {} + + bool AllBlocks; + bool BranchInfo; + bool BranchCount; + bool FuncCoverage; + bool PreservePaths; + bool UncondBranch; + bool Intermediate; + bool LongFileNames; + bool Demangle; + bool NoOutput; + bool RelativeOnly; + bool UseStdout; + bool HashFilenames; + std::string SourcePrefix; +}; + +} // end namespace GCOV + +/// GCOVBuffer - A wrapper around MemoryBuffer to provide GCOV specific +/// read operations. +class GCOVBuffer { +public: + GCOVBuffer(MemoryBuffer *B) : Buffer(B) {} + ~GCOVBuffer() { consumeError(cursor.takeError()); } + + /// readGCNOFormat - Check GCNO signature is valid at the beginning of buffer. + bool readGCNOFormat() { + StringRef buf = Buffer->getBuffer(); + StringRef magic = buf.substr(0, 4); + if (magic == "gcno") { + de = DataExtractor(buf.substr(4), false, 0); + } else if (magic == "oncg") { + de = DataExtractor(buf.substr(4), true, 0); + } else { + errs() << "unexpected magic: " << magic << "\n"; + return false; + } + return true; + } + + /// readGCDAFormat - Check GCDA signature is valid at the beginning of buffer. + bool readGCDAFormat() { + StringRef buf = Buffer->getBuffer(); + StringRef magic = buf.substr(0, 4); + if (magic == "gcda") { + de = DataExtractor(buf.substr(4), false, 0); + } else if (magic == "adcg") { + de = DataExtractor(buf.substr(4), true, 0); + } else { + return false; + } + return true; + } + + /// readGCOVVersion - Read GCOV version. + bool readGCOVVersion(GCOV::GCOVVersion &version) { + std::string str(de.getBytes(cursor, 4)); + if (str.size() != 4) + return false; + if (de.isLittleEndian()) + std::reverse(str.begin(), str.end()); + int ver = str[0] >= 'A' + ? (str[0] - 'A') * 100 + (str[1] - '0') * 10 + str[2] - '0' + : (str[0] - '0') * 10 + str[2] - '0'; + if (ver >= 120) { + this->version = version = GCOV::V1200; + return true; + } else if (ver >= 90) { + // PR gcov-profile/84846, r269678 + this->version = version = GCOV::V900; + return true; + } else if (ver >= 80) { + // PR gcov-profile/48463 + this->version = version = GCOV::V800; + return true; + } else if (ver >= 48) { + // r189778: the exit block moved from the last to the second. + this->version = version = GCOV::V408; + return true; + } else if (ver >= 47) { + // r173147: split checksum into cfg checksum and line checksum. + this->version = version = GCOV::V407; + return true; + } else if (ver >= 34) { + this->version = version = GCOV::V304; + return true; + } + errs() << "unexpected version: " << str << "\n"; + return false; + } + + uint32_t getWord() { return de.getU32(cursor); } + StringRef getString() { + uint32_t len; + if (!readInt(len) || len == 0) + return {}; + return de.getBytes(cursor, len * 4).split('\0').first; + } + + bool readInt(uint32_t &Val) { + if (cursor.tell() + 4 > de.size()) { + Val = 0; + errs() << "unexpected end of memory buffer: " << cursor.tell() << "\n"; + return false; + } + Val = de.getU32(cursor); + return true; + } + + bool readInt64(uint64_t &Val) { + uint32_t Lo, Hi; + if (!readInt(Lo) || !readInt(Hi)) + return false; + Val = ((uint64_t)Hi << 32) | Lo; + return true; + } + + bool readString(StringRef &str) { + uint32_t len; + if (!readInt(len) || len == 0) + return false; + if (version >= GCOV::V1200) + str = de.getBytes(cursor, len).drop_back(); + else + str = de.getBytes(cursor, len * 4).split('\0').first; + return bool(cursor); + } + + DataExtractor de{ArrayRef<uint8_t>{}, false, 0}; + DataExtractor::Cursor cursor{0}; + +private: + MemoryBuffer *Buffer; + GCOV::GCOVVersion version{}; +}; + +/// GCOVFile - Collects coverage information for one pair of coverage file +/// (.gcno and .gcda). +class GCOVFile { +public: + GCOVFile() = default; + + bool readGCNO(GCOVBuffer &Buffer); + bool readGCDA(GCOVBuffer &Buffer); + GCOV::GCOVVersion getVersion() const { return version; } + void print(raw_ostream &OS) const; + void dump() const; + + std::vector<std::string> filenames; + StringMap<unsigned> filenameToIdx; + +public: + bool GCNOInitialized = false; + GCOV::GCOVVersion version{}; + uint32_t checksum = 0; + StringRef cwd; + SmallVector<std::unique_ptr<GCOVFunction>, 16> functions; + std::map<uint32_t, GCOVFunction *> identToFunction; + uint32_t runCount = 0; + uint32_t programCount = 0; + + using iterator = pointee_iterator< + SmallVectorImpl<std::unique_ptr<GCOVFunction>>::const_iterator>; + iterator begin() const { return iterator(functions.begin()); } + iterator end() const { return iterator(functions.end()); } +}; + +struct GCOVArc { + GCOVArc(GCOVBlock &src, GCOVBlock &dst, uint32_t flags) + : src(src), dst(dst), flags(flags) {} + bool onTree() const; + + GCOVBlock &src; + GCOVBlock &dst; + uint32_t flags; + uint64_t count = 0; + uint64_t cycleCount = 0; +}; + +/// GCOVFunction - Collects function information. +class GCOVFunction { +public: + using BlockIterator = pointee_iterator< + SmallVectorImpl<std::unique_ptr<GCOVBlock>>::const_iterator>; + + GCOVFunction(GCOVFile &file) : file(file) {} + + StringRef getName(bool demangle) const; + StringRef getFilename() const; + uint64_t getEntryCount() const; + GCOVBlock &getExitBlock() const; + + iterator_range<BlockIterator> blocksRange() const { + return make_range(blocks.begin(), blocks.end()); + } + + uint64_t propagateCounts(const GCOVBlock &v, GCOVArc *pred); + void print(raw_ostream &OS) const; + void dump() const; + + GCOVFile &file; + uint32_t ident = 0; + uint32_t linenoChecksum; + uint32_t cfgChecksum = 0; + uint32_t startLine = 0; + uint32_t startColumn = 0; + uint32_t endLine = 0; + uint32_t endColumn = 0; + uint8_t artificial = 0; + StringRef Name; + mutable SmallString<0> demangled; + unsigned srcIdx; + SmallVector<std::unique_ptr<GCOVBlock>, 0> blocks; + SmallVector<std::unique_ptr<GCOVArc>, 0> arcs, treeArcs; + DenseSet<const GCOVBlock *> visited; +}; + +/// GCOVBlock - Collects block information. +class GCOVBlock { +public: + using EdgeIterator = SmallVectorImpl<GCOVArc *>::const_iterator; + using BlockVector = SmallVector<const GCOVBlock *, 1>; + using BlockVectorLists = SmallVector<BlockVector, 4>; + using Edges = SmallVector<GCOVArc *, 4>; + + GCOVBlock(uint32_t N) : number(N) {} + + void addLine(uint32_t N) { lines.push_back(N); } + uint32_t getLastLine() const { return lines.back(); } + uint64_t getCount() const { return count; } + + void addSrcEdge(GCOVArc *Edge) { pred.push_back(Edge); } + + void addDstEdge(GCOVArc *Edge) { succ.push_back(Edge); } + + iterator_range<EdgeIterator> srcs() const { + return make_range(pred.begin(), pred.end()); + } + + iterator_range<EdgeIterator> dsts() const { + return make_range(succ.begin(), succ.end()); + } + + void print(raw_ostream &OS) const; + void dump() const; + + static uint64_t + augmentOneCycle(GCOVBlock *src, + std::vector<std::pair<GCOVBlock *, size_t>> &stack); + static uint64_t getCyclesCount(const BlockVector &blocks); + static uint64_t getLineCount(const BlockVector &Blocks); + +public: + uint32_t number; + uint64_t count = 0; + SmallVector<GCOVArc *, 2> pred; + SmallVector<GCOVArc *, 2> succ; + SmallVector<uint32_t, 4> lines; + bool traversable = false; + GCOVArc *incoming = nullptr; +}; + +void gcovOneInput(const GCOV::Options &options, StringRef filename, + StringRef gcno, StringRef gcda, GCOVFile &file); + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_GCOV_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/InstrProf.h b/contrib/libs/llvm14/include/llvm/ProfileData/InstrProf.h new file mode 100644 index 0000000000..002d7c92fc --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/InstrProf.h @@ -0,0 +1,1190 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- InstrProf.h - Instrumented profiling format support ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Instrumentation-based profiling data is generated by instrumented +// binaries through library functions in compiler-rt, and read by the clang +// frontend to feed PGO. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_INSTRPROF_H +#define LLVM_PROFILEDATA_INSTRPROF_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/InstrProfData.inc" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <list> +#include <memory> +#include <string> +#include <system_error> +#include <utility> +#include <vector> + +namespace llvm { + +class Function; +class GlobalVariable; +struct InstrProfRecord; +class InstrProfSymtab; +class Instruction; +class MDNode; +class Module; + +enum InstrProfSectKind { +#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind, +#include "llvm/ProfileData/InstrProfData.inc" +}; + +/// Return the name of the profile section corresponding to \p IPSK. +/// +/// The name of the section depends on the object format type \p OF. If +/// \p AddSegmentInfo is true, a segment prefix and additional linker hints may +/// be added to the section name (this is the default). +std::string getInstrProfSectionName(InstrProfSectKind IPSK, + Triple::ObjectFormatType OF, + bool AddSegmentInfo = true); + +/// Return the name profile runtime entry point to do value profiling +/// for a given site. +inline StringRef getInstrProfValueProfFuncName() { + return INSTR_PROF_VALUE_PROF_FUNC_STR; +} + +/// Return the name profile runtime entry point to do memop size value +/// profiling. +inline StringRef getInstrProfValueProfMemOpFuncName() { + return INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR; +} + +/// Return the name prefix of variables containing instrumented function names. +inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } + +/// Return the name prefix of variables containing per-function control data. +inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; } + +/// Return the name prefix of profile counter variables. +inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; } + +/// Return the name prefix of value profile variables. +inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; } + +/// Return the name of value profile node array variables: +inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; } + +/// Return the name of the variable holding the strings (possibly compressed) +/// of all function's PGO names. +inline StringRef getInstrProfNamesVarName() { + return "__llvm_prf_nm"; +} + +/// Return the name of a covarage mapping variable (internal linkage) +/// for each instrumented source module. Such variables are allocated +/// in the __llvm_covmap section. +inline StringRef getCoverageMappingVarName() { + return "__llvm_coverage_mapping"; +} + +/// Return the name of the internal variable recording the array +/// of PGO name vars referenced by the coverage mapping. The owning +/// functions of those names are not emitted by FE (e.g, unused inline +/// functions.) +inline StringRef getCoverageUnusedNamesVarName() { + return "__llvm_coverage_names"; +} + +/// Return the name of function that registers all the per-function control +/// data at program startup time by calling __llvm_register_function. This +/// function has internal linkage and is called by __llvm_profile_init +/// runtime method. This function is not generated for these platforms: +/// Darwin, Linux, and FreeBSD. +inline StringRef getInstrProfRegFuncsName() { + return "__llvm_profile_register_functions"; +} + +/// Return the name of the runtime interface that registers per-function control +/// data for one instrumented function. +inline StringRef getInstrProfRegFuncName() { + return "__llvm_profile_register_function"; +} + +/// Return the name of the runtime interface that registers the PGO name strings. +inline StringRef getInstrProfNamesRegFuncName() { + return "__llvm_profile_register_names_function"; +} + +/// Return the name of the runtime initialization method that is generated by +/// the compiler. The function calls __llvm_profile_register_functions and +/// __llvm_profile_override_default_filename functions if needed. This function +/// has internal linkage and invoked at startup time via init_array. +inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; } + +/// Return the name of the hook variable defined in profile runtime library. +/// A reference to the variable causes the linker to link in the runtime +/// initialization module (which defines the hook variable). +inline StringRef getInstrProfRuntimeHookVarName() { + return INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_RUNTIME_VAR); +} + +/// Return the name of the compiler generated function that references the +/// runtime hook variable. The function is a weak global. +inline StringRef getInstrProfRuntimeHookVarUseFuncName() { + return "__llvm_profile_runtime_user"; +} + +inline StringRef getInstrProfCounterBiasVarName() { + return INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_VAR); +} + +/// Return the marker used to separate PGO names during serialization. +inline StringRef getInstrProfNameSeparator() { return "\01"; } + +/// Return the modified name for function \c F suitable to be +/// used the key for profile lookup. Variable \c InLTO indicates if this +/// is called in LTO optimization passes. +std::string getPGOFuncName(const Function &F, bool InLTO = false, + uint64_t Version = INSTR_PROF_INDEX_VERSION); + +/// Return the modified name for a function suitable to be +/// used the key for profile lookup. The function's original +/// name is \c RawFuncName and has linkage of type \c Linkage. +/// The function is defined in module \c FileName. +std::string getPGOFuncName(StringRef RawFuncName, + GlobalValue::LinkageTypes Linkage, + StringRef FileName, + uint64_t Version = INSTR_PROF_INDEX_VERSION); + +/// Return the name of the global variable used to store a function +/// name in PGO instrumentation. \c FuncName is the name of the function +/// returned by the \c getPGOFuncName call. +std::string getPGOFuncNameVarName(StringRef FuncName, + GlobalValue::LinkageTypes Linkage); + +/// Create and return the global variable for function name used in PGO +/// instrumentation. \c FuncName is the name of the function returned +/// by \c getPGOFuncName call. +GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName); + +/// Create and return the global variable for function name used in PGO +/// instrumentation. /// \c FuncName is the name of the function +/// returned by \c getPGOFuncName call, \c M is the owning module, +/// and \c Linkage is the linkage of the instrumented function. +GlobalVariable *createPGOFuncNameVar(Module &M, + GlobalValue::LinkageTypes Linkage, + StringRef PGOFuncName); + +/// Return the initializer in string of the PGO name var \c NameVar. +StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar); + +/// Given a PGO function name, remove the filename prefix and return +/// the original (static) function name. +StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, + StringRef FileName = "<unknown>"); + +/// Given a vector of strings (function PGO names) \c NameStrs, the +/// method generates a combined string \c Result that is ready to be +/// serialized. The \c Result string is comprised of three fields: +/// The first field is the length of the uncompressed strings, and the +/// the second field is the length of the zlib-compressed string. +/// Both fields are encoded in ULEB128. If \c doCompress is false, the +/// third field is the uncompressed strings; otherwise it is the +/// compressed string. When the string compression is off, the +/// second field will have value zero. +Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs, + bool doCompression, std::string &Result); + +/// Produce \c Result string with the same format described above. The input +/// is vector of PGO function name variables that are referenced. +Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars, + std::string &Result, bool doCompression = true); + +/// \c NameStrings is a string composed of one of more sub-strings encoded in +/// the format described above. The substrings are separated by 0 or more zero +/// bytes. This method decodes the string and populates the \c Symtab. +Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab); + +/// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being +/// set in IR PGO compilation. +bool isIRPGOFlagSet(const Module *M); + +/// Check if we can safely rename this Comdat function. Instances of the same +/// comdat function may have different control flows thus can not share the +/// same counter variable. +bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken = false); + +enum InstrProfValueKind : uint32_t { +#define VALUE_PROF_KIND(Enumerator, Value, Descr) Enumerator = Value, +#include "llvm/ProfileData/InstrProfData.inc" +}; + +/// Get the value profile data for value site \p SiteIdx from \p InstrProfR +/// and annotate the instruction \p Inst with the value profile meta data. +/// Annotate up to \p MaxMDCount (default 3) number of records per value site. +void annotateValueSite(Module &M, Instruction &Inst, + const InstrProfRecord &InstrProfR, + InstrProfValueKind ValueKind, uint32_t SiteIndx, + uint32_t MaxMDCount = 3); + +/// Same as the above interface but using an ArrayRef, as well as \p Sum. +void annotateValueSite(Module &M, Instruction &Inst, + ArrayRef<InstrProfValueData> VDs, uint64_t Sum, + InstrProfValueKind ValueKind, uint32_t MaxMDCount); + +/// Extract the value profile data from \p Inst which is annotated with +/// value profile meta data. Return false if there is no value data annotated, +/// otherwise return true. +bool getValueProfDataFromInst(const Instruction &Inst, + InstrProfValueKind ValueKind, + uint32_t MaxNumValueData, + InstrProfValueData ValueData[], + uint32_t &ActualNumValueData, uint64_t &TotalC, + bool GetNoICPValue = false); + +inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } + +/// Return the PGOFuncName meta data associated with a function. +MDNode *getPGOFuncNameMetadata(const Function &F); + +/// Create the PGOFuncName meta data if PGOFuncName is different from +/// function's raw name. This should only apply to internal linkage functions +/// declared by users only. +void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName); + +/// Check if we can use Comdat for profile variables. This will eliminate +/// the duplicated profile variables for Comdat functions. +bool needsComdatForCounter(const Function &F, const Module &M); + +/// An enum describing the attributes of an instrumented profile. +enum class InstrProfKind { + Unknown = 0x0, + FE = 0x1, // A frontend clang profile, incompatible with other attrs. + IR = 0x2, // An IR-level profile (default when -fprofile-generate is used). + BB = 0x4, // A profile with entry basic block instrumentation. + CS = 0x8, // A context sensitive IR-level profile. + SingleByteCoverage = 0x10, // Use single byte probes for coverage. + FunctionEntryOnly = 0x20, // Only instrument the function entry basic block. + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionEntryOnly) +}; + +const std::error_category &instrprof_category(); + +enum class instrprof_error { + success = 0, + eof, + unrecognized_format, + bad_magic, + bad_header, + unsupported_version, + unsupported_hash_type, + too_large, + truncated, + malformed, + missing_debug_info_for_correlation, + unexpected_debug_info_for_correlation, + unable_to_correlate_profile, + unknown_function, + invalid_prof, + hash_mismatch, + count_mismatch, + counter_overflow, + value_site_count_mismatch, + compress_failed, + uncompress_failed, + empty_raw_profile, + zlib_unavailable +}; + +inline std::error_code make_error_code(instrprof_error E) { + return std::error_code(static_cast<int>(E), instrprof_category()); +} + +class InstrProfError : public ErrorInfo<InstrProfError> { +public: + InstrProfError(instrprof_error Err, const Twine &ErrStr = Twine()) + : Err(Err), Msg(ErrStr.str()) { + assert(Err != instrprof_error::success && "Not an error"); + } + + std::string message() const override; + + void log(raw_ostream &OS) const override { OS << message(); } + + std::error_code convertToErrorCode() const override { + return make_error_code(Err); + } + + instrprof_error get() const { return Err; } + const std::string &getMessage() const { return Msg; } + + /// Consume an Error and return the raw enum value contained within it. The + /// Error must either be a success value, or contain a single InstrProfError. + static instrprof_error take(Error E) { + auto Err = instrprof_error::success; + handleAllErrors(std::move(E), [&Err](const InstrProfError &IPE) { + assert(Err == instrprof_error::success && "Multiple errors encountered"); + Err = IPE.get(); + }); + return Err; + } + + static char ID; + +private: + instrprof_error Err; + std::string Msg; +}; + +class SoftInstrProfErrors { + /// Count the number of soft instrprof_errors encountered and keep track of + /// the first such error for reporting purposes. + + /// The first soft error encountered. + instrprof_error FirstError = instrprof_error::success; + + /// The number of hash mismatches. + unsigned NumHashMismatches = 0; + + /// The number of count mismatches. + unsigned NumCountMismatches = 0; + + /// The number of counter overflows. + unsigned NumCounterOverflows = 0; + + /// The number of value site count mismatches. + unsigned NumValueSiteCountMismatches = 0; + +public: + SoftInstrProfErrors() = default; + + ~SoftInstrProfErrors() { + assert(FirstError == instrprof_error::success && + "Unchecked soft error encountered"); + } + + /// Track a soft error (\p IE) and increment its associated counter. + void addError(instrprof_error IE); + + /// Get the number of hash mismatches. + unsigned getNumHashMismatches() const { return NumHashMismatches; } + + /// Get the number of count mismatches. + unsigned getNumCountMismatches() const { return NumCountMismatches; } + + /// Get the number of counter overflows. + unsigned getNumCounterOverflows() const { return NumCounterOverflows; } + + /// Get the number of value site count mismatches. + unsigned getNumValueSiteCountMismatches() const { + return NumValueSiteCountMismatches; + } + + /// Return the first encountered error and reset FirstError to a success + /// value. + Error takeError() { + if (FirstError == instrprof_error::success) + return Error::success(); + auto E = make_error<InstrProfError>(FirstError); + FirstError = instrprof_error::success; + return E; + } +}; + +namespace object { + +class SectionRef; + +} // end namespace object + +namespace IndexedInstrProf { + +uint64_t ComputeHash(StringRef K); + +} // end namespace IndexedInstrProf + +/// A symbol table used for function PGO name look-up with keys +/// (such as pointers, md5hash values) to the function. A function's +/// PGO name or name's md5hash are used in retrieving the profile +/// data of the function. See \c getPGOFuncName() method for details +/// on how PGO name is formed. +class InstrProfSymtab { +public: + using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>; + +private: + StringRef Data; + uint64_t Address = 0; + // Unique name strings. + StringSet<> NameTab; + // A map from MD5 keys to function name strings. + std::vector<std::pair<uint64_t, StringRef>> MD5NameMap; + // A map from MD5 keys to function define. We only populate this map + // when build the Symtab from a Module. + std::vector<std::pair<uint64_t, Function *>> MD5FuncMap; + // A map from function runtime address to function name MD5 hash. + // This map is only populated and used by raw instr profile reader. + AddrHashMap AddrToMD5Map; + bool Sorted = false; + + static StringRef getExternalSymbol() { + return "** External Symbol **"; + } + + // If the symtab is created by a series of calls to \c addFuncName, \c + // finalizeSymtab needs to be called before looking up function names. + // This is required because the underlying map is a vector (for space + // efficiency) which needs to be sorted. + inline void finalizeSymtab(); + +public: + InstrProfSymtab() = default; + + /// Create InstrProfSymtab from an object file section which + /// contains function PGO names. When section may contain raw + /// string data or string data in compressed form. This method + /// only initialize the symtab with reference to the data and + /// the section base address. The decompression will be delayed + /// until before it is used. See also \c create(StringRef) method. + Error create(object::SectionRef &Section); + + /// This interface is used by reader of CoverageMapping test + /// format. + inline Error create(StringRef D, uint64_t BaseAddr); + + /// \c NameStrings is a string composed of one of more sub-strings + /// encoded in the format described in \c collectPGOFuncNameStrings. + /// This method is a wrapper to \c readPGOFuncNameStrings method. + inline Error create(StringRef NameStrings); + + /// A wrapper interface to populate the PGO symtab with functions + /// decls from module \c M. This interface is used by transformation + /// passes such as indirect function call promotion. Variable \c InLTO + /// indicates if this is called from LTO optimization passes. + Error create(Module &M, bool InLTO = false); + + /// Create InstrProfSymtab from a set of names iteratable from + /// \p IterRange. This interface is used by IndexedProfReader. + template <typename NameIterRange> Error create(const NameIterRange &IterRange); + + /// Update the symtab by adding \p FuncName to the table. This interface + /// is used by the raw and text profile readers. + Error addFuncName(StringRef FuncName) { + if (FuncName.empty()) + return make_error<InstrProfError>(instrprof_error::malformed, + "function name is empty"); + auto Ins = NameTab.insert(FuncName); + if (Ins.second) { + MD5NameMap.push_back(std::make_pair( + IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey())); + Sorted = false; + } + return Error::success(); + } + + /// Map a function address to its name's MD5 hash. This interface + /// is only used by the raw profiler reader. + void mapAddress(uint64_t Addr, uint64_t MD5Val) { + AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val)); + } + + /// Return a function's hash, or 0, if the function isn't in this SymTab. + uint64_t getFunctionHashFromAddress(uint64_t Address); + + /// Return function's PGO name from the function name's symbol + /// address in the object file. If an error occurs, return + /// an empty string. + StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize); + + /// Return function's PGO name from the name's md5 hash value. + /// If not found, return an empty string. + inline StringRef getFuncName(uint64_t FuncMD5Hash); + + /// Just like getFuncName, except that it will return a non-empty StringRef + /// if the function is external to this symbol table. All such cases + /// will be represented using the same StringRef value. + inline StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash); + + /// True if Symbol is the value used to represent external symbols. + static bool isExternalSymbol(const StringRef &Symbol) { + return Symbol == InstrProfSymtab::getExternalSymbol(); + } + + /// Return function from the name's md5 hash. Return nullptr if not found. + inline Function *getFunction(uint64_t FuncMD5Hash); + + /// Return the function's original assembly name by stripping off + /// the prefix attached (to symbols with priviate linkage). For + /// global functions, it returns the same string as getFuncName. + inline StringRef getOrigFuncName(uint64_t FuncMD5Hash); + + /// Return the name section data. + inline StringRef getNameData() const { return Data; } + + /// Dump the symbols in this table. + void dumpNames(raw_ostream &OS) const { + for (StringRef S : NameTab.keys()) + OS << S << "\n"; + } +}; + +Error InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) { + Data = D; + Address = BaseAddr; + return Error::success(); +} + +Error InstrProfSymtab::create(StringRef NameStrings) { + return readPGOFuncNameStrings(NameStrings, *this); +} + +template <typename NameIterRange> +Error InstrProfSymtab::create(const NameIterRange &IterRange) { + for (auto Name : IterRange) + if (Error E = addFuncName(Name)) + return E; + + finalizeSymtab(); + return Error::success(); +} + +void InstrProfSymtab::finalizeSymtab() { + if (Sorted) + return; + llvm::sort(MD5NameMap, less_first()); + llvm::sort(MD5FuncMap, less_first()); + llvm::sort(AddrToMD5Map, less_first()); + AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()), + AddrToMD5Map.end()); + Sorted = true; +} + +StringRef InstrProfSymtab::getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash) { + StringRef ret = getFuncName(FuncMD5Hash); + if (ret.empty()) + return InstrProfSymtab::getExternalSymbol(); + return ret; +} + +StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) { + finalizeSymtab(); + auto Result = llvm::lower_bound(MD5NameMap, FuncMD5Hash, + [](const std::pair<uint64_t, StringRef> &LHS, + uint64_t RHS) { return LHS.first < RHS; }); + if (Result != MD5NameMap.end() && Result->first == FuncMD5Hash) + return Result->second; + return StringRef(); +} + +Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) { + finalizeSymtab(); + auto Result = llvm::lower_bound(MD5FuncMap, FuncMD5Hash, + [](const std::pair<uint64_t, Function *> &LHS, + uint64_t RHS) { return LHS.first < RHS; }); + if (Result != MD5FuncMap.end() && Result->first == FuncMD5Hash) + return Result->second; + return nullptr; +} + +// See also getPGOFuncName implementation. These two need to be +// matched. +StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) { + StringRef PGOName = getFuncName(FuncMD5Hash); + size_t S = PGOName.find_first_of(':'); + if (S == StringRef::npos) + return PGOName; + return PGOName.drop_front(S + 1); +} + +// To store the sums of profile count values, or the percentage of +// the sums of the total count values. +struct CountSumOrPercent { + uint64_t NumEntries; + double CountSum; + double ValueCounts[IPVK_Last - IPVK_First + 1]; + CountSumOrPercent() : NumEntries(0), CountSum(0.0f), ValueCounts() {} + void reset() { + NumEntries = 0; + CountSum = 0.0f; + for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) + ValueCounts[I] = 0.0f; + } +}; + +// Function level or program level overlap information. +struct OverlapStats { + enum OverlapStatsLevel { ProgramLevel, FunctionLevel }; + // Sum of the total count values for the base profile. + CountSumOrPercent Base; + // Sum of the total count values for the test profile. + CountSumOrPercent Test; + // Overlap lap score. Should be in range of [0.0f to 1.0f]. + CountSumOrPercent Overlap; + CountSumOrPercent Mismatch; + CountSumOrPercent Unique; + OverlapStatsLevel Level; + const std::string *BaseFilename; + const std::string *TestFilename; + StringRef FuncName; + uint64_t FuncHash; + bool Valid; + + OverlapStats(OverlapStatsLevel L = ProgramLevel) + : Level(L), BaseFilename(nullptr), TestFilename(nullptr), FuncHash(0), + Valid(false) {} + + void dump(raw_fd_ostream &OS) const; + + void setFuncInfo(StringRef Name, uint64_t Hash) { + FuncName = Name; + FuncHash = Hash; + } + + Error accumulateCounts(const std::string &BaseFilename, + const std::string &TestFilename, bool IsCS); + void addOneMismatch(const CountSumOrPercent &MismatchFunc); + void addOneUnique(const CountSumOrPercent &UniqueFunc); + + static inline double score(uint64_t Val1, uint64_t Val2, double Sum1, + double Sum2) { + if (Sum1 < 1.0f || Sum2 < 1.0f) + return 0.0f; + return std::min(Val1 / Sum1, Val2 / Sum2); + } +}; + +// This is used to filter the functions whose overlap information +// to be output. +struct OverlapFuncFilters { + uint64_t ValueCutoff; + const std::string NameFilter; +}; + +struct InstrProfValueSiteRecord { + /// Value profiling data pairs at a given value site. + std::list<InstrProfValueData> ValueData; + + InstrProfValueSiteRecord() { ValueData.clear(); } + template <class InputIterator> + InstrProfValueSiteRecord(InputIterator F, InputIterator L) + : ValueData(F, L) {} + + /// Sort ValueData ascending by Value + void sortByTargetValues() { + ValueData.sort( + [](const InstrProfValueData &left, const InstrProfValueData &right) { + return left.Value < right.Value; + }); + } + /// Sort ValueData Descending by Count + inline void sortByCount(); + + /// Merge data from another InstrProfValueSiteRecord + /// Optionally scale merged counts by \p Weight. + void merge(InstrProfValueSiteRecord &Input, uint64_t Weight, + function_ref<void(instrprof_error)> Warn); + /// Scale up value profile data counts by N (Numerator) / D (Denominator). + void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn); + + /// Compute the overlap b/w this record and Input record. + void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind, + OverlapStats &Overlap, OverlapStats &FuncLevelOverlap); +}; + +/// Profiling information for a single function. +struct InstrProfRecord { + std::vector<uint64_t> Counts; + + InstrProfRecord() = default; + InstrProfRecord(std::vector<uint64_t> Counts) : Counts(std::move(Counts)) {} + InstrProfRecord(InstrProfRecord &&) = default; + InstrProfRecord(const InstrProfRecord &RHS) + : Counts(RHS.Counts), + ValueData(RHS.ValueData + ? std::make_unique<ValueProfData>(*RHS.ValueData) + : nullptr) {} + InstrProfRecord &operator=(InstrProfRecord &&) = default; + InstrProfRecord &operator=(const InstrProfRecord &RHS) { + Counts = RHS.Counts; + if (!RHS.ValueData) { + ValueData = nullptr; + return *this; + } + if (!ValueData) + ValueData = std::make_unique<ValueProfData>(*RHS.ValueData); + else + *ValueData = *RHS.ValueData; + return *this; + } + + /// Return the number of value profile kinds with non-zero number + /// of profile sites. + inline uint32_t getNumValueKinds() const; + /// Return the number of instrumented sites for ValueKind. + inline uint32_t getNumValueSites(uint32_t ValueKind) const; + + /// Return the total number of ValueData for ValueKind. + inline uint32_t getNumValueData(uint32_t ValueKind) const; + + /// Return the number of value data collected for ValueKind at profiling + /// site: Site. + inline uint32_t getNumValueDataForSite(uint32_t ValueKind, + uint32_t Site) const; + + /// Return the array of profiled values at \p Site. If \p TotalC + /// is not null, the total count of all target values at this site + /// will be stored in \c *TotalC. + inline std::unique_ptr<InstrProfValueData[]> + getValueForSite(uint32_t ValueKind, uint32_t Site, + uint64_t *TotalC = nullptr) const; + + /// Get the target value/counts of kind \p ValueKind collected at site + /// \p Site and store the result in array \p Dest. Return the total + /// counts of all target values at this site. + inline uint64_t getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind, + uint32_t Site) const; + + /// Reserve space for NumValueSites sites. + inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites); + + /// Add ValueData for ValueKind at value Site. + void addValueData(uint32_t ValueKind, uint32_t Site, + InstrProfValueData *VData, uint32_t N, + InstrProfSymtab *SymTab); + + /// Merge the counts in \p Other into this one. + /// Optionally scale merged counts by \p Weight. + void merge(InstrProfRecord &Other, uint64_t Weight, + function_ref<void(instrprof_error)> Warn); + + /// Scale up profile counts (including value profile data) by + /// a factor of (N / D). + void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn); + + /// Sort value profile data (per site) by count. + void sortValueData() { + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + for (auto &SR : getValueSitesForKind(Kind)) + SR.sortByCount(); + } + + /// Clear value data entries and edge counters. + void Clear() { + Counts.clear(); + clearValueData(); + } + + /// Clear value data entries + void clearValueData() { ValueData = nullptr; } + + /// Compute the sums of all counts and store in Sum. + void accumulateCounts(CountSumOrPercent &Sum) const; + + /// Compute the overlap b/w this IntrprofRecord and Other. + void overlap(InstrProfRecord &Other, OverlapStats &Overlap, + OverlapStats &FuncLevelOverlap, uint64_t ValueCutoff); + + /// Compute the overlap of value profile counts. + void overlapValueProfData(uint32_t ValueKind, InstrProfRecord &Src, + OverlapStats &Overlap, + OverlapStats &FuncLevelOverlap); + +private: + struct ValueProfData { + std::vector<InstrProfValueSiteRecord> IndirectCallSites; + std::vector<InstrProfValueSiteRecord> MemOPSizes; + }; + std::unique_ptr<ValueProfData> ValueData; + + MutableArrayRef<InstrProfValueSiteRecord> + getValueSitesForKind(uint32_t ValueKind) { + // Cast to /add/ const (should be an implicit_cast, ideally, if that's ever + // implemented in LLVM) to call the const overload of this function, then + // cast away the constness from the result. + auto AR = const_cast<const InstrProfRecord *>(this)->getValueSitesForKind( + ValueKind); + return makeMutableArrayRef( + const_cast<InstrProfValueSiteRecord *>(AR.data()), AR.size()); + } + ArrayRef<InstrProfValueSiteRecord> + getValueSitesForKind(uint32_t ValueKind) const { + if (!ValueData) + return None; + switch (ValueKind) { + case IPVK_IndirectCallTarget: + return ValueData->IndirectCallSites; + case IPVK_MemOPSize: + return ValueData->MemOPSizes; + default: + llvm_unreachable("Unknown value kind!"); + } + } + + std::vector<InstrProfValueSiteRecord> & + getOrCreateValueSitesForKind(uint32_t ValueKind) { + if (!ValueData) + ValueData = std::make_unique<ValueProfData>(); + switch (ValueKind) { + case IPVK_IndirectCallTarget: + return ValueData->IndirectCallSites; + case IPVK_MemOPSize: + return ValueData->MemOPSizes; + default: + llvm_unreachable("Unknown value kind!"); + } + } + + // Map indirect call target name hash to name string. + uint64_t remapValue(uint64_t Value, uint32_t ValueKind, + InstrProfSymtab *SymTab); + + // Merge Value Profile data from Src record to this record for ValueKind. + // Scale merged value counts by \p Weight. + void mergeValueProfData(uint32_t ValkeKind, InstrProfRecord &Src, + uint64_t Weight, + function_ref<void(instrprof_error)> Warn); + + // Scale up value profile data count by N (Numerator) / D (Denominator). + void scaleValueProfData(uint32_t ValueKind, uint64_t N, uint64_t D, + function_ref<void(instrprof_error)> Warn); +}; + +struct NamedInstrProfRecord : InstrProfRecord { + StringRef Name; + uint64_t Hash; + + // We reserve this bit as the flag for context sensitive profile record. + static const int CS_FLAG_IN_FUNC_HASH = 60; + + NamedInstrProfRecord() = default; + NamedInstrProfRecord(StringRef Name, uint64_t Hash, + std::vector<uint64_t> Counts) + : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {} + + static bool hasCSFlagInHash(uint64_t FuncHash) { + return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1); + } + static void setCSFlagInHash(uint64_t &FuncHash) { + FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH); + } +}; + +uint32_t InstrProfRecord::getNumValueKinds() const { + uint32_t NumValueKinds = 0; + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + NumValueKinds += !(getValueSitesForKind(Kind).empty()); + return NumValueKinds; +} + +uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const { + uint32_t N = 0; + for (auto &SR : getValueSitesForKind(ValueKind)) + N += SR.ValueData.size(); + return N; +} + +uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const { + return getValueSitesForKind(ValueKind).size(); +} + +uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind, + uint32_t Site) const { + return getValueSitesForKind(ValueKind)[Site].ValueData.size(); +} + +std::unique_ptr<InstrProfValueData[]> +InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site, + uint64_t *TotalC) const { + uint64_t Dummy = 0; + uint64_t &TotalCount = (TotalC == nullptr ? Dummy : *TotalC); + uint32_t N = getNumValueDataForSite(ValueKind, Site); + if (N == 0) { + TotalCount = 0; + return std::unique_ptr<InstrProfValueData[]>(nullptr); + } + + auto VD = std::make_unique<InstrProfValueData[]>(N); + TotalCount = getValueForSite(VD.get(), ValueKind, Site); + + return VD; +} + +uint64_t InstrProfRecord::getValueForSite(InstrProfValueData Dest[], + uint32_t ValueKind, + uint32_t Site) const { + uint32_t I = 0; + uint64_t TotalCount = 0; + for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) { + Dest[I].Value = V.Value; + Dest[I].Count = V.Count; + TotalCount = SaturatingAdd(TotalCount, V.Count); + I++; + } + return TotalCount; +} + +void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) { + if (!NumValueSites) + return; + getOrCreateValueSitesForKind(ValueKind).reserve(NumValueSites); +} + +inline support::endianness getHostEndianness() { + return sys::IsLittleEndianHost ? support::little : support::big; +} + +// Include definitions for value profile data +#define INSTR_PROF_VALUE_PROF_DATA +#include "llvm/ProfileData/InstrProfData.inc" + +void InstrProfValueSiteRecord::sortByCount() { + ValueData.sort( + [](const InstrProfValueData &left, const InstrProfValueData &right) { + return left.Count > right.Count; + }); + // Now truncate + size_t max_s = INSTR_PROF_MAX_NUM_VAL_PER_SITE; + if (ValueData.size() > max_s) + ValueData.resize(max_s); +} + +namespace IndexedInstrProf { + +enum class HashT : uint32_t { + MD5, + Last = MD5 +}; + +inline uint64_t ComputeHash(HashT Type, StringRef K) { + switch (Type) { + case HashT::MD5: + return MD5Hash(K); + } + llvm_unreachable("Unhandled hash type"); +} + +const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" + +enum ProfVersion { + // Version 1 is the first version. In this version, the value of + // a key/value pair can only include profile data of a single function. + // Due to this restriction, the number of block counters for a given + // function is not recorded but derived from the length of the value. + Version1 = 1, + // The version 2 format supports recording profile data of multiple + // functions which share the same key in one value field. To support this, + // the number block counters is recorded as an uint64_t field right after the + // function structural hash. + Version2 = 2, + // Version 3 supports value profile data. The value profile data is expected + // to follow the block counter profile data. + Version3 = 3, + // In this version, profile summary data \c IndexedInstrProf::Summary is + // stored after the profile header. + Version4 = 4, + // In this version, the frontend PGO stable hash algorithm defaults to V2. + Version5 = 5, + // In this version, the frontend PGO stable hash algorithm got fixed and + // may produce hashes different from Version5. + Version6 = 6, + // An additional counter is added around logical operators. + Version7 = 7, + // The current version is 7. + CurrentVersion = INSTR_PROF_INDEX_VERSION +}; +const uint64_t Version = ProfVersion::CurrentVersion; + +const HashT HashType = HashT::MD5; + +inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); } + +// This structure defines the file header of the LLVM profile +// data file in indexed-format. +struct Header { + uint64_t Magic; + uint64_t Version; + uint64_t Unused; // Becomes unused since version 4 + uint64_t HashType; + uint64_t HashOffset; +}; + +// Profile summary data recorded in the profile data file in indexed +// format. It is introduced in version 4. The summary data follows +// right after the profile file header. +struct Summary { + struct Entry { + uint64_t Cutoff; ///< The required percentile of total execution count. + uint64_t + MinBlockCount; ///< The minimum execution count for this percentile. + uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count. + }; + // The field kind enumerator to assigned value mapping should remain + // unchanged when a new kind is added or an old kind gets deleted in + // the future. + enum SummaryFieldKind { + /// The total number of functions instrumented. + TotalNumFunctions = 0, + /// Total number of instrumented blocks/edges. + TotalNumBlocks = 1, + /// The maximal execution count among all functions. + /// This field does not exist for profile data from IR based + /// instrumentation. + MaxFunctionCount = 2, + /// Max block count of the program. + MaxBlockCount = 3, + /// Max internal block count of the program (excluding entry blocks). + MaxInternalBlockCount = 4, + /// The sum of all instrumented block counts. + TotalBlockCount = 5, + NumKinds = TotalBlockCount + 1 + }; + + // The number of summmary fields following the summary header. + uint64_t NumSummaryFields; + // The number of Cutoff Entries (Summary::Entry) following summary fields. + uint64_t NumCutoffEntries; + + Summary() = delete; + Summary(uint32_t Size) { memset(this, 0, Size); } + + void operator delete(void *ptr) { ::operator delete(ptr); } + + static uint32_t getSize(uint32_t NumSumFields, uint32_t NumCutoffEntries) { + return sizeof(Summary) + NumCutoffEntries * sizeof(Entry) + + NumSumFields * sizeof(uint64_t); + } + + const uint64_t *getSummaryDataBase() const { + return reinterpret_cast<const uint64_t *>(this + 1); + } + + uint64_t *getSummaryDataBase() { + return reinterpret_cast<uint64_t *>(this + 1); + } + + const Entry *getCutoffEntryBase() const { + return reinterpret_cast<const Entry *>( + &getSummaryDataBase()[NumSummaryFields]); + } + + Entry *getCutoffEntryBase() { + return reinterpret_cast<Entry *>(&getSummaryDataBase()[NumSummaryFields]); + } + + uint64_t get(SummaryFieldKind K) const { + return getSummaryDataBase()[K]; + } + + void set(SummaryFieldKind K, uint64_t V) { + getSummaryDataBase()[K] = V; + } + + const Entry &getEntry(uint32_t I) const { return getCutoffEntryBase()[I]; } + + void setEntry(uint32_t I, const ProfileSummaryEntry &E) { + Entry &ER = getCutoffEntryBase()[I]; + ER.Cutoff = E.Cutoff; + ER.MinBlockCount = E.MinCount; + ER.NumBlocks = E.NumCounts; + } +}; + +inline std::unique_ptr<Summary> allocSummary(uint32_t TotalSize) { + return std::unique_ptr<Summary>(new (::operator new(TotalSize)) + Summary(TotalSize)); +} + +} // end namespace IndexedInstrProf + +namespace RawInstrProf { + +// Version 1: First version +// Version 2: Added value profile data section. Per-function control data +// struct has more fields to describe value profile information. +// Version 3: Compressed name section support. Function PGO name reference +// from control data struct is changed from raw pointer to Name's MD5 value. +// Version 4: ValueDataBegin and ValueDataSizes fields are removed from the +// raw header. +// Version 5: Bit 60 of FuncHash is reserved for the flag for the context +// sensitive records. +// Version 6: Added binary id. +// Version 7: Reorder binary id and include version in signature. +// Version 8: Use relative counter pointer. +const uint64_t Version = INSTR_PROF_RAW_VERSION; + +template <class IntPtrT> inline uint64_t getMagic(); +template <> inline uint64_t getMagic<uint64_t>() { + return INSTR_PROF_RAW_MAGIC_64; +} + +template <> inline uint64_t getMagic<uint32_t>() { + return INSTR_PROF_RAW_MAGIC_32; +} + +// Per-function profile data header/control structure. +// The definition should match the structure defined in +// compiler-rt/lib/profile/InstrProfiling.h. +// It should also match the synthesized type in +// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters. +template <class IntPtrT> struct alignas(8) ProfileData { + #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name; + #include "llvm/ProfileData/InstrProfData.inc" +}; + +// File header structure of the LLVM profile data in raw format. +// The definition should match the header referenced in +// compiler-rt/lib/profile/InstrProfilingFile.c and +// InstrProfilingBuffer.c. +struct Header { +#define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name; +#include "llvm/ProfileData/InstrProfData.inc" +}; + +} // end namespace RawInstrProf + +// Parse MemOP Size range option. +void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, + int64_t &RangeLast); + +// Create the variable for the profile file name. +void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput); + +// Whether to compress function names in profile records, and filenames in +// code coverage mappings. Used by the Instrumentation library and unit tests. +extern cl::opt<bool> DoInstrProfNameCompression; + +} // end namespace llvm +#endif // LLVM_PROFILEDATA_INSTRPROF_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/InstrProfCorrelator.h b/contrib/libs/llvm14/include/llvm/ProfileData/InstrProfCorrelator.h new file mode 100644 index 0000000000..1a341ce53d --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/InstrProfCorrelator.h @@ -0,0 +1,189 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- InstrProfCorrelator.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file defines InstrProfCorrelator used to generate PGO profiles from +// raw profile data and debug info. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_INSTRPROFCORRELATOR_H +#define LLVM_PROFILEDATA_INSTRPROFCORRELATOR_H + +#include "llvm/ADT/DenseSet.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include <vector> + +namespace llvm { + +/// InstrProfCorrelator - A base class used to create raw instrumentation data +/// to their functions. +class InstrProfCorrelator { +public: + static llvm::Expected<std::unique_ptr<InstrProfCorrelator>> + get(StringRef DebugInfoFilename); + + /// Construct a ProfileData vector used to correlate raw instrumentation data + /// to their functions. + virtual Error correlateProfileData() = 0; + + /// Return the number of ProfileData elements. + llvm::Optional<size_t> getDataSize() const; + + /// Return a pointer to the names string that this class constructs. + const char *getNamesPointer() const { return Names.c_str(); } + + /// Return the number of bytes in the names string. + size_t getNamesSize() const { return Names.size(); } + + /// Return the size of the counters section in bytes. + uint64_t getCountersSectionSize() const { + return Ctx->CountersSectionEnd - Ctx->CountersSectionStart; + } + + static const char *FunctionNameAttributeName; + static const char *CFGHashAttributeName; + static const char *NumCountersAttributeName; + + enum InstrProfCorrelatorKind { CK_32Bit, CK_64Bit }; + InstrProfCorrelatorKind getKind() const { return Kind; } + virtual ~InstrProfCorrelator() = default; + +protected: + struct Context { + static llvm::Expected<std::unique_ptr<Context>> + get(std::unique_ptr<MemoryBuffer> Buffer, const object::ObjectFile &Obj); + std::unique_ptr<MemoryBuffer> Buffer; + /// The address range of the __llvm_prf_cnts section. + uint64_t CountersSectionStart; + uint64_t CountersSectionEnd; + /// True if target and host have different endian orders. + bool ShouldSwapBytes; + }; + const std::unique_ptr<InstrProfCorrelator::Context> Ctx; + + InstrProfCorrelator(InstrProfCorrelatorKind K, std::unique_ptr<Context> Ctx) + : Ctx(std::move(Ctx)), Kind(K) {} + + std::string Names; + std::vector<std::string> NamesVec; + +private: + static llvm::Expected<std::unique_ptr<InstrProfCorrelator>> + get(std::unique_ptr<MemoryBuffer> Buffer); + + const InstrProfCorrelatorKind Kind; +}; + +/// InstrProfCorrelatorImpl - A child of InstrProfCorrelator with a template +/// pointer type so that the ProfileData vector can be materialized. +template <class IntPtrT> +class InstrProfCorrelatorImpl : public InstrProfCorrelator { +public: + InstrProfCorrelatorImpl(std::unique_ptr<InstrProfCorrelator::Context> Ctx); + static bool classof(const InstrProfCorrelator *C); + + /// Return a pointer to the underlying ProfileData vector that this class + /// constructs. + const RawInstrProf::ProfileData<IntPtrT> *getDataPointer() const { + return Data.empty() ? nullptr : Data.data(); + } + + /// Return the number of ProfileData elements. + size_t getDataSize() const { return Data.size(); } + + static llvm::Expected<std::unique_ptr<InstrProfCorrelatorImpl<IntPtrT>>> + get(std::unique_ptr<InstrProfCorrelator::Context> Ctx, + const object::ObjectFile &Obj); + +protected: + std::vector<RawInstrProf::ProfileData<IntPtrT>> Data; + + Error correlateProfileData() override; + virtual void correlateProfileDataImpl() = 0; + + void addProbe(StringRef FunctionName, uint64_t CFGHash, IntPtrT CounterOffset, + IntPtrT FunctionPtr, uint32_t NumCounters); + +private: + InstrProfCorrelatorImpl(InstrProfCorrelatorKind Kind, + std::unique_ptr<InstrProfCorrelator::Context> Ctx) + : InstrProfCorrelator(Kind, std::move(Ctx)){}; + llvm::DenseSet<IntPtrT> CounterOffsets; + + // Byte-swap the value if necessary. + template <class T> T maybeSwap(T Value) const { + return Ctx->ShouldSwapBytes ? sys::getSwappedBytes(Value) : Value; + } +}; + +/// DwarfInstrProfCorrelator - A child of InstrProfCorrelatorImpl that takes +/// DWARF debug info as input to correlate profiles. +template <class IntPtrT> +class DwarfInstrProfCorrelator : public InstrProfCorrelatorImpl<IntPtrT> { +public: + DwarfInstrProfCorrelator(std::unique_ptr<DWARFContext> DICtx, + std::unique_ptr<InstrProfCorrelator::Context> Ctx) + : InstrProfCorrelatorImpl<IntPtrT>(std::move(Ctx)), + DICtx(std::move(DICtx)) {} + +private: + std::unique_ptr<DWARFContext> DICtx; + + /// Return the address of the object that the provided DIE symbolizes. + llvm::Optional<uint64_t> getLocation(const DWARFDie &Die) const; + + /// Returns true if the provided DIE symbolizes an instrumentation probe + /// symbol. + static bool isDIEOfProbe(const DWARFDie &Die); + + /// Iterate over DWARF DIEs to find those that symbolize instrumentation + /// probes and construct the ProfileData vector and Names string. + /// + /// Here is some example DWARF for an instrumentation probe we are looking + /// for: + /// \code + /// DW_TAG_subprogram + /// DW_AT_low_pc (0x0000000000000000) + /// DW_AT_high_pc (0x0000000000000014) + /// DW_AT_name ("foo") + /// DW_TAG_variable + /// DW_AT_name ("__profc_foo") + /// DW_AT_location (DW_OP_addr 0x0) + /// DW_TAG_LLVM_annotation + /// DW_AT_name ("Function Name") + /// DW_AT_const_value ("foo") + /// DW_TAG_LLVM_annotation + /// DW_AT_name ("CFG Hash") + /// DW_AT_const_value (12345678) + /// DW_TAG_LLVM_annotation + /// DW_AT_name ("Num Counters") + /// DW_AT_const_value (2) + /// NULL + /// NULL + /// \endcode + void correlateProfileDataImpl() override; +}; + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_INSTRPROFCORRELATOR_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/InstrProfData.inc b/contrib/libs/llvm14/include/llvm/ProfileData/InstrProfData.inc new file mode 100644 index 0000000000..62054a6a3d --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/InstrProfData.inc @@ -0,0 +1,905 @@ +/*===-- InstrProfData.inc - instr profiling runtime structures -*- C++ -*-=== *\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ +/* + * This is the main file that defines all the data structure, signature, + * constant literals that are shared across profiling runtime library, + * compiler (instrumentation), and host tools (reader/writer). The entities + * defined in this file affect the profile runtime ABI, the raw profile format, + * or both. + * + * The file has two identical copies. The primary copy lives in LLVM and + * the other one sits in compiler-rt/lib/profile directory. To make changes + * in this file, first modify the primary copy and copy it over to compiler-rt. + * Testing of any change in this file can start only after the two copies are + * synced up. + * + * The first part of the file includes macros that defines types, names, and + * initializers for the member fields of the core data structures. The field + * declarations for one structure is enabled by defining the field activation + * macro associated with that structure. Only one field activation record + * can be defined at one time and the rest definitions will be filtered out by + * the preprocessor. + * + * Examples of how the template is used to instantiate structure definition: + * 1. To declare a structure: + * + * struct ProfData { + * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ + * Type Name; + * #include "llvm/ProfileData/InstrProfData.inc" + * }; + * + * 2. To construct LLVM type arrays for the struct type: + * + * Type *DataTypes[] = { + * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ + * LLVMType, + * #include "llvm/ProfileData/InstrProfData.inc" + * }; + * + * 4. To construct constant array for the initializers: + * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ + * Initializer, + * Constant *ConstantVals[] = { + * #include "llvm/ProfileData/InstrProfData.inc" + * }; + * + * + * The second part of the file includes definitions all other entities that + * are related to runtime ABI and format. When no field activation macro is + * defined, this file can be included to introduce the definitions. + * +\*===----------------------------------------------------------------------===*/ + +/* Functions marked with INSTR_PROF_VISIBILITY must have hidden visibility in + * the compiler runtime. */ +#ifndef INSTR_PROF_VISIBILITY +#define INSTR_PROF_VISIBILITY +#endif + +/* INSTR_PROF_DATA start. */ +/* Definition of member fields of the per-function control structure. */ +#ifndef INSTR_PROF_DATA +#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \ + ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ + IndexedInstrProf::ComputeHash(getPGOFuncNameVarInitializer(Inc->getName())))) +INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \ + ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ + Inc->getHash()->getZExtValue())) +INSTR_PROF_DATA(const IntPtrT, IntPtrTy, CounterPtr, RelativeCounterPtr) +/* This is used to map function pointers for the indirect call targets to + * function name hashes during the conversion from raw to merged profile + * data. + */ +INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), FunctionPointer, \ + FunctionAddr) +INSTR_PROF_DATA(IntPtrT, llvm::Type::getInt8PtrTy(Ctx), Values, \ + ValuesPtrExpr) +INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \ + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters)) +INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \ + ConstantArray::get(Int16ArrayTy, Int16ArrayVals)) +#undef INSTR_PROF_DATA +/* INSTR_PROF_DATA end. */ + + +/* This is an internal data structure used by value profiler. It + * is defined here to allow serialization code sharing by LLVM + * to be used in unit test. + * + * typedef struct ValueProfNode { + * // InstrProfValueData VData; + * uint64_t Value; + * uint64_t Count; + * struct ValueProfNode *Next; + * } ValueProfNode; + */ +/* INSTR_PROF_VALUE_NODE start. */ +#ifndef INSTR_PROF_VALUE_NODE +#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +INSTR_PROF_VALUE_NODE(uint64_t, llvm::Type::getInt64Ty(Ctx), Value, \ + ConstantInt::get(llvm::Type::GetInt64Ty(Ctx), 0)) +INSTR_PROF_VALUE_NODE(uint64_t, llvm::Type::getInt64Ty(Ctx), Count, \ + ConstantInt::get(llvm::Type::GetInt64Ty(Ctx), 0)) +INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \ + ConstantInt::get(llvm::Type::GetInt8PtrTy(Ctx), 0)) +#undef INSTR_PROF_VALUE_NODE +/* INSTR_PROF_VALUE_NODE end. */ + +/* INSTR_PROF_RAW_HEADER start */ +/* Definition of member fields of the raw profile header data structure. */ +#ifndef INSTR_PROF_RAW_HEADER +#define INSTR_PROF_RAW_HEADER(Type, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) +INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) +/* FIXME: A more accurate name is NumData */ +INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) +INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) +/* FIXME: A more accurate name is NumCounters */ +INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) +INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterCounters, PaddingBytesAfterCounters) +INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) +INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, + (uintptr_t)CountersBegin - (uintptr_t)DataBegin) +INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) +INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) +#undef INSTR_PROF_RAW_HEADER +/* INSTR_PROF_RAW_HEADER end */ + +/* VALUE_PROF_FUNC_PARAM start */ +/* Definition of parameter types of the runtime API used to do value profiling + * for a given value site. + */ +#ifndef VALUE_PROF_FUNC_PARAM +#define VALUE_PROF_FUNC_PARAM(ArgType, ArgName, ArgLLVMType) +#define INSTR_PROF_COMMA +#else +#define INSTR_PROF_DATA_DEFINED +#define INSTR_PROF_COMMA , +#endif +VALUE_PROF_FUNC_PARAM(uint64_t, TargetValue, Type::getInt64Ty(Ctx)) \ + INSTR_PROF_COMMA +VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA +VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) +#undef VALUE_PROF_FUNC_PARAM +#undef INSTR_PROF_COMMA +/* VALUE_PROF_FUNC_PARAM end */ + +/* VALUE_PROF_KIND start */ +#ifndef VALUE_PROF_KIND +#define VALUE_PROF_KIND(Enumerator, Value, Descr) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +/* For indirect function call value profiling, the addresses of the target + * functions are profiled by the instrumented code. The target addresses are + * written in the raw profile data and converted to target function name's MD5 + * hash by the profile reader during deserialization. Typically, this happens + * when the raw profile data is read during profile merging. + * + * For this remapping the ProfData is used. ProfData contains both the function + * name hash and the function address. + */ +VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target") +/* For memory intrinsic functions size profiling. */ +VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size") +/* These two kinds must be the last to be + * declared. This is to make sure the string + * array created with the template can be + * indexed with the kind value. + */ +VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first") +VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last") + +#undef VALUE_PROF_KIND +/* VALUE_PROF_KIND end */ + +#undef COVMAP_V2_OR_V3 +#ifdef COVMAP_V2 +#define COVMAP_V2_OR_V3 +#endif +#ifdef COVMAP_V3 +#define COVMAP_V2_OR_V3 +#endif + +/* COVMAP_FUNC_RECORD start */ +/* Definition of member fields of the function record structure in coverage + * map. + */ +#ifndef COVMAP_FUNC_RECORD +#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +#ifdef COVMAP_V1 +COVMAP_FUNC_RECORD(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), \ + NamePtr, llvm::ConstantExpr::getBitCast(NamePtr, \ + llvm::Type::getInt8PtrTy(Ctx))) +COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), NameSize, \ + llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), \ + NameValue.size())) +#endif +#ifdef COVMAP_V2_OR_V3 +COVMAP_FUNC_RECORD(const int64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \ + llvm::ConstantInt::get( \ + llvm::Type::getInt64Ty(Ctx), NameHash)) +#endif +COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), DataSize, \ + llvm::ConstantInt::get( \ + llvm::Type::getInt32Ty(Ctx), CoverageMapping.size())) +COVMAP_FUNC_RECORD(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \ + llvm::ConstantInt::get( \ + llvm::Type::getInt64Ty(Ctx), FuncHash)) +#ifdef COVMAP_V3 +COVMAP_FUNC_RECORD(const uint64_t, llvm::Type::getInt64Ty(Ctx), FilenamesRef, \ + llvm::ConstantInt::get( \ + llvm::Type::getInt64Ty(Ctx), FilenamesRef)) +COVMAP_FUNC_RECORD(const char, \ + llvm::ArrayType::get(llvm::Type::getInt8Ty(Ctx), \ + CoverageMapping.size()), \ + CoverageMapping, + llvm::ConstantDataArray::getRaw( \ + CoverageMapping, CoverageMapping.size(), \ + llvm::Type::getInt8Ty(Ctx))) +#endif +#undef COVMAP_FUNC_RECORD +/* COVMAP_FUNC_RECORD end. */ + +/* COVMAP_HEADER start */ +/* Definition of member fields of coverage map header. + */ +#ifndef COVMAP_HEADER +#define COVMAP_HEADER(Type, LLVMType, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +COVMAP_HEADER(uint32_t, Int32Ty, NRecords, \ + llvm::ConstantInt::get(Int32Ty, NRecords)) +COVMAP_HEADER(uint32_t, Int32Ty, FilenamesSize, \ + llvm::ConstantInt::get(Int32Ty, FilenamesSize)) +COVMAP_HEADER(uint32_t, Int32Ty, CoverageSize, \ + llvm::ConstantInt::get(Int32Ty, CoverageMappingSize)) +COVMAP_HEADER(uint32_t, Int32Ty, Version, \ + llvm::ConstantInt::get(Int32Ty, CovMapVersion::CurrentVersion)) +#undef COVMAP_HEADER +/* COVMAP_HEADER end. */ + + +#ifdef INSTR_PROF_SECT_ENTRY +#define INSTR_PROF_DATA_DEFINED +INSTR_PROF_SECT_ENTRY(IPSK_data, \ + INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON), \ + INSTR_PROF_DATA_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_cnts, \ + INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON), \ + INSTR_PROF_CNTS_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_name, \ + INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \ + INSTR_PROF_NAME_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_vals, \ + INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \ + INSTR_PROF_VALS_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \ + INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \ + INSTR_PROF_VNODES_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_covmap, \ + INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \ + INSTR_PROF_COVMAP_COFF, "__LLVM_COV,") +INSTR_PROF_SECT_ENTRY(IPSK_covfun, \ + INSTR_PROF_QUOTE(INSTR_PROF_COVFUN_COMMON), \ + INSTR_PROF_COVFUN_COFF, "__LLVM_COV,") +INSTR_PROF_SECT_ENTRY(IPSK_orderfile, \ + INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COMMON), \ + INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COFF), "__DATA,") + +#undef INSTR_PROF_SECT_ENTRY +#endif + + +#ifdef INSTR_PROF_VALUE_PROF_DATA +#define INSTR_PROF_DATA_DEFINED + +#define INSTR_PROF_MAX_NUM_VAL_PER_SITE 255 +/*! + * This is the header of the data structure that defines the on-disk + * layout of the value profile data of a particular kind for one function. + */ +typedef struct ValueProfRecord { + /* The kind of the value profile record. */ + uint32_t Kind; + /* + * The number of value profile sites. It is guaranteed to be non-zero; + * otherwise the record for this kind won't be emitted. + */ + uint32_t NumValueSites; + /* + * The first element of the array that stores the number of profiled + * values for each value site. The size of the array is NumValueSites. + * Since NumValueSites is greater than zero, there is at least one + * element in the array. + */ + uint8_t SiteCountArray[1]; + + /* + * The fake declaration is for documentation purpose only. + * Align the start of next field to be on 8 byte boundaries. + uint8_t Padding[X]; + */ + + /* The array of value profile data. The size of the array is the sum + * of all elements in SiteCountArray[]. + InstrProfValueData ValueData[]; + */ + +#ifdef __cplusplus + /*! + * Return the number of value sites. + */ + uint32_t getNumValueSites() const { return NumValueSites; } + /*! + * Read data from this record and save it to Record. + */ + void deserializeTo(InstrProfRecord &Record, + InstrProfSymtab *SymTab); + /* + * In-place byte swap: + * Do byte swap for this instance. \c Old is the original order before + * the swap, and \c New is the New byte order. + */ + void swapBytes(support::endianness Old, support::endianness New); +#endif +} ValueProfRecord; + +/*! + * Per-function header/control data structure for value profiling + * data in indexed format. + */ +typedef struct ValueProfData { + /* + * Total size in bytes including this field. It must be a multiple + * of sizeof(uint64_t). + */ + uint32_t TotalSize; + /* + *The number of value profile kinds that has value profile data. + * In this implementation, a value profile kind is considered to + * have profile data if the number of value profile sites for the + * kind is not zero. More aggressively, the implementation can + * choose to check the actual data value: if none of the value sites + * has any profiled values, the kind can be skipped. + */ + uint32_t NumValueKinds; + + /* + * Following are a sequence of variable length records. The prefix/header + * of each record is defined by ValueProfRecord type. The number of + * records is NumValueKinds. + * ValueProfRecord Record_1; + * ValueProfRecord Record_N; + */ + +#if __cplusplus + /*! + * Return the total size in bytes of the on-disk value profile data + * given the data stored in Record. + */ + static uint32_t getSize(const InstrProfRecord &Record); + /*! + * Return a pointer to \c ValueProfData instance ready to be streamed. + */ + static std::unique_ptr<ValueProfData> + serializeFrom(const InstrProfRecord &Record); + /*! + * Check the integrity of the record. + */ + Error checkIntegrity(); + /*! + * Return a pointer to \c ValueProfileData instance ready to be read. + * All data in the instance are properly byte swapped. The input + * data is assumed to be in little endian order. + */ + static Expected<std::unique_ptr<ValueProfData>> + getValueProfData(const unsigned char *SrcBuffer, + const unsigned char *const SrcBufferEnd, + support::endianness SrcDataEndianness); + /*! + * Swap byte order from \c Endianness order to host byte order. + */ + void swapBytesToHost(support::endianness Endianness); + /*! + * Swap byte order from host byte order to \c Endianness order. + */ + void swapBytesFromHost(support::endianness Endianness); + /*! + * Return the total size of \c ValueProfileData. + */ + uint32_t getSize() const { return TotalSize; } + /*! + * Read data from this data and save it to \c Record. + */ + void deserializeTo(InstrProfRecord &Record, + InstrProfSymtab *SymTab); + void operator delete(void *ptr) { ::operator delete(ptr); } +#endif +} ValueProfData; + +/* + * The closure is designed to abstact away two types of value profile data: + * - InstrProfRecord which is the primary data structure used to + * represent profile data in host tools (reader, writer, and profile-use) + * - value profile runtime data structure suitable to be used by C + * runtime library. + * + * Both sources of data need to serialize to disk/memory-buffer in common + * format: ValueProfData. The abstraction allows compiler-rt's raw profiler + * writer to share the same format and code with indexed profile writer. + * + * For documentation of the member methods below, refer to corresponding methods + * in class InstrProfRecord. + */ +typedef struct ValueProfRecordClosure { + const void *Record; + uint32_t (*GetNumValueKinds)(const void *Record); + uint32_t (*GetNumValueSites)(const void *Record, uint32_t VKind); + uint32_t (*GetNumValueData)(const void *Record, uint32_t VKind); + uint32_t (*GetNumValueDataForSite)(const void *R, uint32_t VK, uint32_t S); + + /* + * After extracting the value profile data from the value profile record, + * this method is used to map the in-memory value to on-disk value. If + * the method is null, value will be written out untranslated. + */ + uint64_t (*RemapValueData)(uint32_t, uint64_t Value); + void (*GetValueForSite)(const void *R, InstrProfValueData *Dst, uint32_t K, + uint32_t S); + ValueProfData *(*AllocValueProfData)(size_t TotalSizeInBytes); +} ValueProfRecordClosure; + +INSTR_PROF_VISIBILITY ValueProfRecord * +getFirstValueProfRecord(ValueProfData *VPD); +INSTR_PROF_VISIBILITY ValueProfRecord * +getValueProfRecordNext(ValueProfRecord *VPR); +INSTR_PROF_VISIBILITY InstrProfValueData * +getValueProfRecordValueData(ValueProfRecord *VPR); +INSTR_PROF_VISIBILITY uint32_t +getValueProfRecordHeaderSize(uint32_t NumValueSites); + +#undef INSTR_PROF_VALUE_PROF_DATA +#endif /* INSTR_PROF_VALUE_PROF_DATA */ + + +#ifdef INSTR_PROF_COMMON_API_IMPL +#define INSTR_PROF_DATA_DEFINED +#ifdef __cplusplus +#define INSTR_PROF_INLINE inline +#define INSTR_PROF_NULLPTR nullptr +#else +#define INSTR_PROF_INLINE +#define INSTR_PROF_NULLPTR NULL +#endif + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +/*! + * Return the \c ValueProfRecord header size including the + * padding bytes. + */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +uint32_t getValueProfRecordHeaderSize(uint32_t NumValueSites) { + uint32_t Size = offsetof(ValueProfRecord, SiteCountArray) + + sizeof(uint8_t) * NumValueSites; + /* Round the size to multiple of 8 bytes. */ + Size = (Size + 7) & ~7; + return Size; +} + +/*! + * Return the total size of the value profile record including the + * header and the value data. + */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +uint32_t getValueProfRecordSize(uint32_t NumValueSites, + uint32_t NumValueData) { + return getValueProfRecordHeaderSize(NumValueSites) + + sizeof(InstrProfValueData) * NumValueData; +} + +/*! + * Return the pointer to the start of value data array. + */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +InstrProfValueData *getValueProfRecordValueData(ValueProfRecord *This) { + return (InstrProfValueData *)((char *)This + getValueProfRecordHeaderSize( + This->NumValueSites)); +} + +/*! + * Return the total number of value data for \c This record. + */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +uint32_t getValueProfRecordNumValueData(ValueProfRecord *This) { + uint32_t NumValueData = 0; + uint32_t I; + for (I = 0; I < This->NumValueSites; I++) + NumValueData += This->SiteCountArray[I]; + return NumValueData; +} + +/*! + * Use this method to advance to the next \c This \c ValueProfRecord. + */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +ValueProfRecord *getValueProfRecordNext(ValueProfRecord *This) { + uint32_t NumValueData = getValueProfRecordNumValueData(This); + return (ValueProfRecord *)((char *)This + + getValueProfRecordSize(This->NumValueSites, + NumValueData)); +} + +/*! + * Return the first \c ValueProfRecord instance. + */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +ValueProfRecord *getFirstValueProfRecord(ValueProfData *This) { + return (ValueProfRecord *)((char *)This + sizeof(ValueProfData)); +} + +/* Closure based interfaces. */ + +/*! + * Return the total size in bytes of the on-disk value profile data + * given the data stored in Record. + */ +INSTR_PROF_VISIBILITY uint32_t +getValueProfDataSize(ValueProfRecordClosure *Closure) { + uint32_t Kind; + uint32_t TotalSize = sizeof(ValueProfData); + const void *Record = Closure->Record; + + for (Kind = IPVK_First; Kind <= IPVK_Last; Kind++) { + uint32_t NumValueSites = Closure->GetNumValueSites(Record, Kind); + if (!NumValueSites) + continue; + TotalSize += getValueProfRecordSize(NumValueSites, + Closure->GetNumValueData(Record, Kind)); + } + return TotalSize; +} + +/*! + * Extract value profile data of a function for the profile kind \c ValueKind + * from the \c Closure and serialize the data into \c This record instance. + */ +INSTR_PROF_VISIBILITY void +serializeValueProfRecordFrom(ValueProfRecord *This, + ValueProfRecordClosure *Closure, + uint32_t ValueKind, uint32_t NumValueSites) { + uint32_t S; + const void *Record = Closure->Record; + This->Kind = ValueKind; + This->NumValueSites = NumValueSites; + InstrProfValueData *DstVD = getValueProfRecordValueData(This); + + for (S = 0; S < NumValueSites; S++) { + uint32_t ND = Closure->GetNumValueDataForSite(Record, ValueKind, S); + This->SiteCountArray[S] = ND; + Closure->GetValueForSite(Record, DstVD, ValueKind, S); + DstVD += ND; + } +} + +/*! + * Extract value profile data of a function from the \c Closure + * and serialize the data into \c DstData if it is not NULL or heap + * memory allocated by the \c Closure's allocator method. If \c + * DstData is not null, the caller is expected to set the TotalSize + * in DstData. + */ +INSTR_PROF_VISIBILITY ValueProfData * +serializeValueProfDataFrom(ValueProfRecordClosure *Closure, + ValueProfData *DstData) { + uint32_t Kind; + uint32_t TotalSize = + DstData ? DstData->TotalSize : getValueProfDataSize(Closure); + + ValueProfData *VPD = + DstData ? DstData : Closure->AllocValueProfData(TotalSize); + + VPD->TotalSize = TotalSize; + VPD->NumValueKinds = Closure->GetNumValueKinds(Closure->Record); + ValueProfRecord *VR = getFirstValueProfRecord(VPD); + for (Kind = IPVK_First; Kind <= IPVK_Last; Kind++) { + uint32_t NumValueSites = Closure->GetNumValueSites(Closure->Record, Kind); + if (!NumValueSites) + continue; + serializeValueProfRecordFrom(VR, Closure, Kind, NumValueSites); + VR = getValueProfRecordNext(VR); + } + return VPD; +} + +#undef INSTR_PROF_COMMON_API_IMPL +#endif /* INSTR_PROF_COMMON_API_IMPL */ + +/*============================================================================*/ + +#ifndef INSTR_PROF_DATA_DEFINED + +#ifndef INSTR_PROF_DATA_INC +#define INSTR_PROF_DATA_INC + +/* Helper macros. */ +#define INSTR_PROF_SIMPLE_QUOTE(x) #x +#define INSTR_PROF_QUOTE(x) INSTR_PROF_SIMPLE_QUOTE(x) +#define INSTR_PROF_SIMPLE_CONCAT(x,y) x ## y +#define INSTR_PROF_CONCAT(x,y) INSTR_PROF_SIMPLE_CONCAT(x,y) + +/* Magic number to detect file format and endianness. + * Use 255 at one end, since no UTF-8 file can use that character. Avoid 0, + * so that utilities, like strings, don't grab it as a string. 129 is also + * invalid UTF-8, and high enough to be interesting. + * Use "lprofr" in the centre to stand for "LLVM Profile Raw", or "lprofR" + * for 32-bit platforms. + */ +#define INSTR_PROF_RAW_MAGIC_64 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \ + (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \ + (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129 +#define INSTR_PROF_RAW_MAGIC_32 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \ + (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \ + (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 + +/* FIXME: Please remedy the fixme in the header before bumping the version. */ +/* Raw profile format version (start from 1). */ +#define INSTR_PROF_RAW_VERSION 8 +/* Indexed profile format version (start from 1). */ +#define INSTR_PROF_INDEX_VERSION 7 +/* Coverage mapping format version (start from 0). */ +#define INSTR_PROF_COVMAP_VERSION 5 + +/* Profile version is always of type uint64_t. Reserve the upper 8 bits in the + * version for other variants of profile. We set the lowest bit of the upper 8 + * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentation + * generated profile, and 0 if this is a Clang FE generated profile. + * 1 in bit 57 indicates there are context-sensitive records in the profile. + * The 59th bit indicates whether to use debug info to correlate profiles. + * The 60th bit indicates single byte coverage instrumentation. + * The 61st bit indicates function entry instrumentation only. + */ +#define VARIANT_MASKS_ALL 0xff00000000000000ULL +#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) +#define VARIANT_MASK_IR_PROF (0x1ULL << 56) +#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) +#define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58) +#define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59) +#define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60) +#define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61) +#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version +#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime +#define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias + +/* The variable that holds the name of the profile data + * specified via command line. */ +#define INSTR_PROF_PROFILE_NAME_VAR __llvm_profile_filename + +/* section name strings common to all targets other + than WIN32 */ +#define INSTR_PROF_DATA_COMMON __llvm_prf_data +#define INSTR_PROF_NAME_COMMON __llvm_prf_names +#define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts +#define INSTR_PROF_VALS_COMMON __llvm_prf_vals +#define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds +#define INSTR_PROF_COVMAP_COMMON __llvm_covmap +#define INSTR_PROF_COVFUN_COMMON __llvm_covfun +#define INSTR_PROF_ORDERFILE_COMMON __llvm_orderfile +/* Windows section names. Because these section names contain dollar characters, + * they must be quoted. + */ +#define INSTR_PROF_DATA_COFF ".lprfd$M" +#define INSTR_PROF_NAME_COFF ".lprfn$M" +#define INSTR_PROF_CNTS_COFF ".lprfc$M" +#define INSTR_PROF_VALS_COFF ".lprfv$M" +#define INSTR_PROF_VNODES_COFF ".lprfnd$M" +#define INSTR_PROF_COVMAP_COFF ".lcovmap$M" +#define INSTR_PROF_COVFUN_COFF ".lcovfun$M" +#define INSTR_PROF_ORDERFILE_COFF ".lorderfile$M" + +#ifdef _WIN32 +/* Runtime section names and name strings. */ +#define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_DATA_COFF +#define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_NAME_COFF +#define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_CNTS_COFF +/* Array of pointers. Each pointer points to a list + * of value nodes associated with one value site. + */ +#define INSTR_PROF_VALS_SECT_NAME INSTR_PROF_VALS_COFF +/* Value profile nodes section. */ +#define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_VNODES_COFF +#define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_COVMAP_COFF +#define INSTR_PROF_COVFUN_SECT_NAME INSTR_PROF_COVFUN_COFF +#define INSTR_PROF_ORDERFILE_SECT_NAME INSTR_PROF_ORDERFILE_COFF +#else +/* Runtime section names and name strings. */ +#define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON) +#define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON) +#define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON) +/* Array of pointers. Each pointer points to a list + * of value nodes associated with one value site. + */ +#define INSTR_PROF_VALS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON) +/* Value profile nodes section. */ +#define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON) +#define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON) +#define INSTR_PROF_COVFUN_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_COVFUN_COMMON) +/* Order file instrumentation. */ +#define INSTR_PROF_ORDERFILE_SECT_NAME \ + INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COMMON) +#endif + +#define INSTR_PROF_ORDERFILE_BUFFER_NAME _llvm_order_file_buffer +#define INSTR_PROF_ORDERFILE_BUFFER_NAME_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_BUFFER_NAME) +#define INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME _llvm_order_file_buffer_idx +#define INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME) + +/* Macros to define start/stop section symbol for a given + * section on Linux. For instance + * INSTR_PROF_SECT_START(INSTR_PROF_DATA_SECT_NAME) will + * expand to __start___llvm_prof_data + */ +#define INSTR_PROF_SECT_START(Sect) \ + INSTR_PROF_CONCAT(__start_,Sect) +#define INSTR_PROF_SECT_STOP(Sect) \ + INSTR_PROF_CONCAT(__stop_,Sect) + +/* Value Profiling API linkage name. */ +#define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target +#define INSTR_PROF_VALUE_PROF_FUNC_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC) +#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC __llvm_profile_instrument_memop +#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_MEMOP_FUNC) + +/* InstrProfile per-function control data alignment. */ +#define INSTR_PROF_DATA_ALIGNMENT 8 + +/* The data structure that represents a tracked value by the + * value profiler. + */ +typedef struct InstrProfValueData { + /* Profiled value. */ + uint64_t Value; + /* Number of times the value appears in the training run. */ + uint64_t Count; +} InstrProfValueData; + +#endif /* INSTR_PROF_DATA_INC */ + +#ifndef INSTR_ORDER_FILE_INC +/* The maximal # of functions: 128*1024 (the buffer size will be 128*4 KB). */ +#define INSTR_ORDER_FILE_BUFFER_SIZE 131072 +#define INSTR_ORDER_FILE_BUFFER_BITS 17 +#define INSTR_ORDER_FILE_BUFFER_MASK 0x1ffff +#endif /* INSTR_ORDER_FILE_INC */ +#else +#undef INSTR_PROF_DATA_DEFINED +#endif + +#undef COVMAP_V2_OR_V3 + +#ifdef INSTR_PROF_VALUE_PROF_MEMOP_API + +#ifdef __cplusplus +#define INSTR_PROF_INLINE inline +#else +#define INSTR_PROF_INLINE +#endif + +/* The value range buckets (22 buckets) for the memop size value profiling looks + * like: + * + * [0, 0] + * [1, 1] + * [2, 2] + * [3, 3] + * [4, 4] + * [5, 5] + * [6, 6] + * [7, 7] + * [8, 8] + * [9, 15] + * [16, 16] + * [17, 31] + * [32, 32] + * [33, 63] + * [64, 64] + * [65, 127] + * [128, 128] + * [129, 255] + * [256, 256] + * [257, 511] + * [512, 512] + * [513, UINT64_MAX] + * + * Each range has a 'representative value' which is the lower end value of the + * range and used to store in the runtime profile data records and the VP + * metadata. For example, it's 2 for [2, 2] and 64 for [65, 127]. + */ +#define INSTR_PROF_NUM_BUCKETS 22 + +/* + * Clz and Popcount. This code was copied from + * compiler-rt/lib/fuzzer/{FuzzerBuiltins.h,FuzzerBuiltinsMsvc.h} and + * llvm/include/llvm/Support/MathExtras.h. + */ +#if defined(_MSC_VER) && !defined(__clang__) + +#include <intrin.h> +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfClzll(unsigned long long X) { + unsigned long LeadZeroIdx = 0; +#if !defined(_M_ARM64) && !defined(_M_X64) + // Scan the high 32 bits. + if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X >> 32))) + return (int)(63 - (LeadZeroIdx + 32)); // Create a bit offset + // from the MSB. + // Scan the low 32 bits. + if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X))) + return (int)(63 - LeadZeroIdx); +#else + if (_BitScanReverse64(&LeadZeroIdx, X)) return 63 - LeadZeroIdx; +#endif + return 64; +} +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfPopcountll(unsigned long long X) { + // This code originates from https://reviews.llvm.org/rG30626254510f. + unsigned long long v = X; + v = v - ((v >> 1) & 0x5555555555555555ULL); + v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); + v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; + return (int)((unsigned long long)(v * 0x0101010101010101ULL) >> 56); +} + +#else + +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfClzll(unsigned long long X) { return __builtin_clzll(X); } +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfPopcountll(unsigned long long X) { return __builtin_popcountll(X); } + +#endif /* defined(_MSC_VER) && !defined(__clang__) */ + +/* Map an (observed) memop size value to the representative value of its range. + * For example, 5 -> 5, 22 -> 17, 99 -> 65, 256 -> 256, 1001 -> 513. */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE uint64_t +InstrProfGetRangeRepValue(uint64_t Value) { + if (Value <= 8) + // The first ranges are individually tracked. Use the value as is. + return Value; + else if (Value >= 513) + // The last range is mapped to its lowest value. + return 513; + else if (InstProfPopcountll(Value) == 1) + // If it's a power of two, use it as is. + return Value; + else + // Otherwise, take to the previous power of two + 1. + return (UINT64_C(1) << (64 - InstProfClzll(Value) - 1)) + 1; +} + +/* Return true if the range that an (observed) memop size value belongs to has + * only a single value in the range. For example, 0 -> true, 8 -> true, 10 -> + * false, 64 -> true, 100 -> false, 513 -> false. */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE unsigned +InstrProfIsSingleValRange(uint64_t Value) { + if (Value <= 8) + // The first ranges are individually tracked. + return 1; + else if (InstProfPopcountll(Value) == 1) + // If it's a power of two, there's only one value. + return 1; + else + // Otherwise, there's more than one value in the range. + return 0; +} + +#endif /* INSTR_PROF_VALUE_PROF_MEMOP_API */ diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/InstrProfReader.h b/contrib/libs/llvm14/include/llvm/ProfileData/InstrProfReader.h new file mode 100644 index 0000000000..e3bf7cf2d1 --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/InstrProfReader.h @@ -0,0 +1,661 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading profiling data for instrumentation +// based PGO and coverage. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H +#define LLVM_PROFILEDATA_INSTRPROFREADER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/InstrProfCorrelator.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/OnDiskHashTable.h" +#include "llvm/Support/SwapByteOrder.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <memory> +#include <utility> +#include <vector> + +namespace llvm { + +class InstrProfReader; + +/// A file format agnostic iterator over profiling data. +class InstrProfIterator { +public: + using iterator_category = std::input_iterator_tag; + using value_type = NamedInstrProfRecord; + using difference_type = std::ptrdiff_t; + using pointer = value_type *; + using reference = value_type &; + +private: + InstrProfReader *Reader = nullptr; + value_type Record; + + void Increment(); + +public: + InstrProfIterator() = default; + InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); } + + InstrProfIterator &operator++() { Increment(); return *this; } + bool operator==(const InstrProfIterator &RHS) const { + return Reader == RHS.Reader; + } + bool operator!=(const InstrProfIterator &RHS) const { + return Reader != RHS.Reader; + } + value_type &operator*() { return Record; } + value_type *operator->() { return &Record; } +}; + +/// Base class and interface for reading profiling data of any known instrprof +/// format. Provides an iterator over NamedInstrProfRecords. +class InstrProfReader { + instrprof_error LastError = instrprof_error::success; + std::string LastErrorMsg; + +public: + InstrProfReader() = default; + virtual ~InstrProfReader() = default; + + /// Read the header. Required before reading first record. + virtual Error readHeader() = 0; + + /// Read a single record. + virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0; + + /// Print binary ids on stream OS. + virtual Error printBinaryIds(raw_ostream &OS) { return success(); }; + + /// Iterator over profile data. + InstrProfIterator begin() { return InstrProfIterator(this); } + InstrProfIterator end() { return InstrProfIterator(); } + + virtual bool isIRLevelProfile() const = 0; + + virtual bool hasCSIRLevelProfile() const = 0; + + virtual bool instrEntryBBEnabled() const = 0; + + /// Return true if we must provide debug info to create PGO profiles. + virtual bool useDebugInfoCorrelate() const { return false; } + + /// Return true if the profile has single byte counters representing coverage. + virtual bool hasSingleByteCoverage() const = 0; + + /// Return true if the profile only instruments function entries. + virtual bool functionEntryOnly() const = 0; + + /// Returns a BitsetEnum describing the attributes of the profile. To check + /// individual attributes prefer using the helpers above. + virtual InstrProfKind getProfileKind() const = 0; + + /// Return the PGO symtab. There are three different readers: + /// Raw, Text, and Indexed profile readers. The first two types + /// of readers are used only by llvm-profdata tool, while the indexed + /// profile reader is also used by llvm-cov tool and the compiler ( + /// backend or frontend). Since creating PGO symtab can create + /// significant runtime and memory overhead (as it touches data + /// for the whole program), InstrProfSymtab for the indexed profile + /// reader should be created on demand and it is recommended to be + /// only used for dumping purpose with llvm-proftool, not with the + /// compiler. + virtual InstrProfSymtab &getSymtab() = 0; + + /// Compute the sum of counts and return in Sum. + void accumulateCounts(CountSumOrPercent &Sum, bool IsCS); + +protected: + std::unique_ptr<InstrProfSymtab> Symtab; + + /// Set the current error and return same. + Error error(instrprof_error Err, const std::string &ErrMsg = "") { + LastError = Err; + LastErrorMsg = ErrMsg; + if (Err == instrprof_error::success) + return Error::success(); + return make_error<InstrProfError>(Err, ErrMsg); + } + + Error error(Error &&E) { + handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { + LastError = IPE.get(); + LastErrorMsg = IPE.getMessage(); + }); + return make_error<InstrProfError>(LastError, LastErrorMsg); + } + + /// Clear the current error and return a successful one. + Error success() { return error(instrprof_error::success); } + +public: + /// Return true if the reader has finished reading the profile data. + bool isEOF() { return LastError == instrprof_error::eof; } + + /// Return true if the reader encountered an error reading profiling data. + bool hasError() { return LastError != instrprof_error::success && !isEOF(); } + + /// Get the current error. + Error getError() { + if (hasError()) + return make_error<InstrProfError>(LastError, LastErrorMsg); + return Error::success(); + } + + /// Factory method to create an appropriately typed reader for the given + /// instrprof file. + static Expected<std::unique_ptr<InstrProfReader>> + create(const Twine &Path, const InstrProfCorrelator *Correlator = nullptr); + + static Expected<std::unique_ptr<InstrProfReader>> + create(std::unique_ptr<MemoryBuffer> Buffer, + const InstrProfCorrelator *Correlator = nullptr); +}; + +/// Reader for the simple text based instrprof format. +/// +/// This format is a simple text format that's suitable for test data. Records +/// are separated by one or more blank lines, and record fields are separated by +/// new lines. +/// +/// Each record consists of a function name, a function hash, a number of +/// counters, and then each counter value, in that order. +class TextInstrProfReader : public InstrProfReader { +private: + /// The profile data file contents. + std::unique_ptr<MemoryBuffer> DataBuffer; + /// Iterator over the profile data. + line_iterator Line; + /// The attributes of the current profile. + InstrProfKind ProfileKind = InstrProfKind::Unknown; + + Error readValueProfileData(InstrProfRecord &Record); + +public: + TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_) + : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} + TextInstrProfReader(const TextInstrProfReader &) = delete; + TextInstrProfReader &operator=(const TextInstrProfReader &) = delete; + + /// Return true if the given buffer is in text instrprof format. + static bool hasFormat(const MemoryBuffer &Buffer); + + bool isIRLevelProfile() const override { + return static_cast<bool>(ProfileKind & InstrProfKind::IR); + } + + bool hasCSIRLevelProfile() const override { + return static_cast<bool>(ProfileKind & InstrProfKind::CS); + } + + bool instrEntryBBEnabled() const override { + return static_cast<bool>(ProfileKind & InstrProfKind::BB); + } + + bool hasSingleByteCoverage() const override { + return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage); + } + + bool functionEntryOnly() const override { + return static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly); + } + + InstrProfKind getProfileKind() const override { return ProfileKind; } + + /// Read the header. + Error readHeader() override; + + /// Read a single record. + Error readNextRecord(NamedInstrProfRecord &Record) override; + + InstrProfSymtab &getSymtab() override { + assert(Symtab.get()); + return *Symtab.get(); + } +}; + +/// Reader for the raw instrprof binary format from runtime. +/// +/// This format is a raw memory dump of the instrumentation-based profiling data +/// from the runtime. It has no index. +/// +/// Templated on the unsigned type whose size matches pointers on the platform +/// that wrote the profile. +template <class IntPtrT> +class RawInstrProfReader : public InstrProfReader { +private: + /// The profile data file contents. + std::unique_ptr<MemoryBuffer> DataBuffer; + /// If available, this hold the ProfileData array used to correlate raw + /// instrumentation data to their functions. + const InstrProfCorrelatorImpl<IntPtrT> *Correlator; + bool ShouldSwapBytes; + // The value of the version field of the raw profile data header. The lower 56 + // bits specifies the format version and the most significant 8 bits specify + // the variant types of the profile. + uint64_t Version; + uint64_t CountersDelta; + uint64_t NamesDelta; + const RawInstrProf::ProfileData<IntPtrT> *Data; + const RawInstrProf::ProfileData<IntPtrT> *DataEnd; + const char *CountersStart; + const char *CountersEnd; + const char *NamesStart; + const char *NamesEnd; + // After value profile is all read, this pointer points to + // the header of next profile data (if exists) + const uint8_t *ValueDataStart; + uint32_t ValueKindLast; + uint32_t CurValueDataSize; + + uint64_t BinaryIdsSize; + const uint8_t *BinaryIdsStart; + +public: + RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer, + const InstrProfCorrelator *Correlator) + : DataBuffer(std::move(DataBuffer)), + Correlator(dyn_cast_or_null<const InstrProfCorrelatorImpl<IntPtrT>>( + Correlator)) {} + RawInstrProfReader(const RawInstrProfReader &) = delete; + RawInstrProfReader &operator=(const RawInstrProfReader &) = delete; + + static bool hasFormat(const MemoryBuffer &DataBuffer); + Error readHeader() override; + Error readNextRecord(NamedInstrProfRecord &Record) override; + Error printBinaryIds(raw_ostream &OS) override; + + bool isIRLevelProfile() const override { + return (Version & VARIANT_MASK_IR_PROF) != 0; + } + + bool hasCSIRLevelProfile() const override { + return (Version & VARIANT_MASK_CSIR_PROF) != 0; + } + + bool instrEntryBBEnabled() const override { + return (Version & VARIANT_MASK_INSTR_ENTRY) != 0; + } + + bool useDebugInfoCorrelate() const override { + return (Version & VARIANT_MASK_DBG_CORRELATE) != 0; + } + + bool hasSingleByteCoverage() const override { + return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0; + } + + bool functionEntryOnly() const override { + return (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0; + } + + /// Returns a BitsetEnum describing the attributes of the raw instr profile. + InstrProfKind getProfileKind() const override; + + InstrProfSymtab &getSymtab() override { + assert(Symtab.get()); + return *Symtab.get(); + } + +private: + Error createSymtab(InstrProfSymtab &Symtab); + Error readNextHeader(const char *CurrentPos); + Error readHeader(const RawInstrProf::Header &Header); + + template <class IntT> IntT swap(IntT Int) const { + return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; + } + + support::endianness getDataEndianness() const { + support::endianness HostEndian = getHostEndianness(); + if (!ShouldSwapBytes) + return HostEndian; + if (HostEndian == support::little) + return support::big; + else + return support::little; + } + + inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { + return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); + } + + Error readName(NamedInstrProfRecord &Record); + Error readFuncHash(NamedInstrProfRecord &Record); + Error readRawCounts(InstrProfRecord &Record); + Error readValueProfilingData(InstrProfRecord &Record); + bool atEnd() const { return Data == DataEnd; } + + void advanceData() { + // `CountersDelta` is a constant zero when using debug info correlation. + if (!Correlator) { + // The initial CountersDelta is the in-memory address difference between + // the data and counts sections: + // start(__llvm_prf_cnts) - start(__llvm_prf_data) + // As we advance to the next record, we maintain the correct CountersDelta + // with respect to the next record. + CountersDelta -= sizeof(*Data); + } + Data++; + ValueDataStart += CurValueDataSize; + } + + const char *getNextHeaderPos() const { + assert(atEnd()); + return (const char *)ValueDataStart; + } + + StringRef getName(uint64_t NameRef) const { + return Symtab->getFuncName(swap(NameRef)); + } + + int getCounterTypeSize() const { + return hasSingleByteCoverage() ? sizeof(uint8_t) : sizeof(uint64_t); + } +}; + +using RawInstrProfReader32 = RawInstrProfReader<uint32_t>; +using RawInstrProfReader64 = RawInstrProfReader<uint64_t>; + +namespace IndexedInstrProf { + +enum class HashT : uint32_t; + +} // end namespace IndexedInstrProf + +/// Trait for lookups into the on-disk hash table for the binary instrprof +/// format. +class InstrProfLookupTrait { + std::vector<NamedInstrProfRecord> DataBuffer; + IndexedInstrProf::HashT HashType; + unsigned FormatVersion; + // Endianness of the input value profile data. + // It should be LE by default, but can be changed + // for testing purpose. + support::endianness ValueProfDataEndianness = support::little; + +public: + InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) + : HashType(HashType), FormatVersion(FormatVersion) {} + + using data_type = ArrayRef<NamedInstrProfRecord>; + + using internal_key_type = StringRef; + using external_key_type = StringRef; + using hash_value_type = uint64_t; + using offset_type = uint64_t; + + static bool EqualKey(StringRef A, StringRef B) { return A == B; } + static StringRef GetInternalKey(StringRef K) { return K; } + static StringRef GetExternalKey(StringRef K) { return K; } + + hash_value_type ComputeHash(StringRef K); + + static std::pair<offset_type, offset_type> + ReadKeyDataLength(const unsigned char *&D) { + using namespace support; + + offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D); + offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D); + return std::make_pair(KeyLen, DataLen); + } + + StringRef ReadKey(const unsigned char *D, offset_type N) { + return StringRef((const char *)D, N); + } + + bool readValueProfilingData(const unsigned char *&D, + const unsigned char *const End); + data_type ReadData(StringRef K, const unsigned char *D, offset_type N); + + // Used for testing purpose only. + void setValueProfDataEndianness(support::endianness Endianness) { + ValueProfDataEndianness = Endianness; + } +}; + +struct InstrProfReaderIndexBase { + virtual ~InstrProfReaderIndexBase() = default; + + // Read all the profile records with the same key pointed to the current + // iterator. + virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0; + + // Read all the profile records with the key equal to FuncName + virtual Error getRecords(StringRef FuncName, + ArrayRef<NamedInstrProfRecord> &Data) = 0; + virtual void advanceToNextKey() = 0; + virtual bool atEnd() const = 0; + virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; + virtual uint64_t getVersion() const = 0; + virtual bool isIRLevelProfile() const = 0; + virtual bool hasCSIRLevelProfile() const = 0; + virtual bool instrEntryBBEnabled() const = 0; + virtual bool hasSingleByteCoverage() const = 0; + virtual bool functionEntryOnly() const = 0; + virtual InstrProfKind getProfileKind() const = 0; + virtual Error populateSymtab(InstrProfSymtab &) = 0; +}; + +using OnDiskHashTableImplV3 = + OnDiskIterableChainedHashTable<InstrProfLookupTrait>; + +template <typename HashTableImpl> +class InstrProfReaderItaniumRemapper; + +template <typename HashTableImpl> +class InstrProfReaderIndex : public InstrProfReaderIndexBase { +private: + std::unique_ptr<HashTableImpl> HashTable; + typename HashTableImpl::data_iterator RecordIterator; + uint64_t FormatVersion; + + friend class InstrProfReaderItaniumRemapper<HashTableImpl>; + +public: + InstrProfReaderIndex(const unsigned char *Buckets, + const unsigned char *const Payload, + const unsigned char *const Base, + IndexedInstrProf::HashT HashType, uint64_t Version); + ~InstrProfReaderIndex() override = default; + + Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override; + Error getRecords(StringRef FuncName, + ArrayRef<NamedInstrProfRecord> &Data) override; + void advanceToNextKey() override { RecordIterator++; } + + bool atEnd() const override { + return RecordIterator == HashTable->data_end(); + } + + void setValueProfDataEndianness(support::endianness Endianness) override { + HashTable->getInfoObj().setValueProfDataEndianness(Endianness); + } + + uint64_t getVersion() const override { return GET_VERSION(FormatVersion); } + + bool isIRLevelProfile() const override { + return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; + } + + bool hasCSIRLevelProfile() const override { + return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0; + } + + bool instrEntryBBEnabled() const override { + return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0; + } + + bool hasSingleByteCoverage() const override { + return (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) != 0; + } + + bool functionEntryOnly() const override { + return (FormatVersion & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0; + } + + InstrProfKind getProfileKind() const override; + + Error populateSymtab(InstrProfSymtab &Symtab) override { + return Symtab.create(HashTable->keys()); + } +}; + +/// Name matcher supporting fuzzy matching of symbol names to names in profiles. +class InstrProfReaderRemapper { +public: + virtual ~InstrProfReaderRemapper() = default; + virtual Error populateRemappings() { return Error::success(); } + virtual Error getRecords(StringRef FuncName, + ArrayRef<NamedInstrProfRecord> &Data) = 0; +}; + +/// Reader for the indexed binary instrprof format. +class IndexedInstrProfReader : public InstrProfReader { +private: + /// The profile data file contents. + std::unique_ptr<MemoryBuffer> DataBuffer; + /// The profile remapping file contents. + std::unique_ptr<MemoryBuffer> RemappingBuffer; + /// The index into the profile data. + std::unique_ptr<InstrProfReaderIndexBase> Index; + /// The profile remapping file contents. + std::unique_ptr<InstrProfReaderRemapper> Remapper; + /// Profile summary data. + std::unique_ptr<ProfileSummary> Summary; + /// Context sensitive profile summary data. + std::unique_ptr<ProfileSummary> CS_Summary; + // Index to the current record in the record array. + unsigned RecordIndex; + + // Read the profile summary. Return a pointer pointing to one byte past the + // end of the summary data if it exists or the input \c Cur. + // \c UseCS indicates whether to use the context-sensitive profile summary. + const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, + const unsigned char *Cur, bool UseCS); + +public: + IndexedInstrProfReader( + std::unique_ptr<MemoryBuffer> DataBuffer, + std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr) + : DataBuffer(std::move(DataBuffer)), + RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {} + IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; + IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; + + /// Return the profile version. + uint64_t getVersion() const { return Index->getVersion(); } + bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); } + bool hasCSIRLevelProfile() const override { + return Index->hasCSIRLevelProfile(); + } + + bool instrEntryBBEnabled() const override { + return Index->instrEntryBBEnabled(); + } + + bool hasSingleByteCoverage() const override { + return Index->hasSingleByteCoverage(); + } + + bool functionEntryOnly() const override { return Index->functionEntryOnly(); } + + /// Returns a BitsetEnum describing the attributes of the indexed instr + /// profile. + InstrProfKind getProfileKind() const override { + return Index->getProfileKind(); + } + + /// Return true if the given buffer is in an indexed instrprof format. + static bool hasFormat(const MemoryBuffer &DataBuffer); + + /// Read the file header. + Error readHeader() override; + /// Read a single record. + Error readNextRecord(NamedInstrProfRecord &Record) override; + + /// Return the NamedInstrProfRecord associated with FuncName and FuncHash + Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName, + uint64_t FuncHash); + + /// Fill Counts with the profile data for the given function name. + Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, + std::vector<uint64_t> &Counts); + + /// Return the maximum of all known function counts. + /// \c UseCS indicates whether to use the context-sensitive count. + uint64_t getMaximumFunctionCount(bool UseCS) { + if (UseCS) { + assert(CS_Summary && "No context sensitive profile summary"); + return CS_Summary->getMaxFunctionCount(); + } else { + assert(Summary && "No profile summary"); + return Summary->getMaxFunctionCount(); + } + } + + /// Factory method to create an indexed reader. + static Expected<std::unique_ptr<IndexedInstrProfReader>> + create(const Twine &Path, const Twine &RemappingPath = ""); + + static Expected<std::unique_ptr<IndexedInstrProfReader>> + create(std::unique_ptr<MemoryBuffer> Buffer, + std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr); + + // Used for testing purpose only. + void setValueProfDataEndianness(support::endianness Endianness) { + Index->setValueProfDataEndianness(Endianness); + } + + // See description in the base class. This interface is designed + // to be used by llvm-profdata (for dumping). Avoid using this when + // the client is the compiler. + InstrProfSymtab &getSymtab() override; + + /// Return the profile summary. + /// \c UseCS indicates whether to use the context-sensitive summary. + ProfileSummary &getSummary(bool UseCS) { + if (UseCS) { + assert(CS_Summary && "No context sensitive summary"); + return *(CS_Summary.get()); + } else { + assert(Summary && "No profile summary"); + return *(Summary.get()); + } + } +}; + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_INSTRPROFREADER_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/InstrProfWriter.h b/contrib/libs/llvm14/include/llvm/ProfileData/InstrProfWriter.h new file mode 100644 index 0000000000..ac6c756e75 --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/InstrProfWriter.h @@ -0,0 +1,145 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- InstrProfWriter.h - Instrumented profiling writer --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing profiling data for instrumentation +// based PGO and coverage. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_INSTRPROFWRITER_H +#define LLVM_PROFILEDATA_INSTRPROFWRITER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cstdint> +#include <memory> + +namespace llvm { + +/// Writer for instrumentation based profile data. +class InstrProfRecordWriterTrait; +class ProfOStream; +class raw_fd_ostream; + +class InstrProfWriter { +public: + using ProfilingData = SmallDenseMap<uint64_t, InstrProfRecord>; + +private: + bool Sparse; + StringMap<ProfilingData> FunctionData; + // An enum describing the attributes of the profile. + InstrProfKind ProfileKind = InstrProfKind::Unknown; + // Use raw pointer here for the incomplete type object. + InstrProfRecordWriterTrait *InfoObj; + +public: + InstrProfWriter(bool Sparse = false); + ~InstrProfWriter(); + + StringMap<ProfilingData> &getProfileData() { return FunctionData; } + + /// Add function counts for the given function. If there are already counts + /// for this function and the hash and number of counts match, each counter is + /// summed. Optionally scale counts by \p Weight. + void addRecord(NamedInstrProfRecord &&I, uint64_t Weight, + function_ref<void(Error)> Warn); + void addRecord(NamedInstrProfRecord &&I, function_ref<void(Error)> Warn) { + addRecord(std::move(I), 1, Warn); + } + + /// Merge existing function counts from the given writer. + void mergeRecordsFromWriter(InstrProfWriter &&IPW, + function_ref<void(Error)> Warn); + + /// Write the profile to \c OS + Error write(raw_fd_ostream &OS); + + /// Write the profile in text format to \c OS + Error writeText(raw_fd_ostream &OS); + + Error validateRecord(const InstrProfRecord &Func); + + /// Write \c Record in text format to \c OS + static void writeRecordInText(StringRef Name, uint64_t Hash, + const InstrProfRecord &Counters, + InstrProfSymtab &Symtab, raw_fd_ostream &OS); + + /// Write the profile, returning the raw data. For testing. + std::unique_ptr<MemoryBuffer> writeBuffer(); + + /// Update the attributes of the current profile from the attributes + /// specified. An error is returned if IR and FE profiles are mixed. + Error mergeProfileKind(const InstrProfKind Other) { + // If the kind is unset, this is the first profile we are merging so just + // set it to the given type. + if (ProfileKind == InstrProfKind::Unknown) { + ProfileKind = Other; + return Error::success(); + } + + // Returns true if merging is should fail assuming A and B are incompatible. + auto testIncompatible = [&](InstrProfKind A, InstrProfKind B) { + return (static_cast<bool>(ProfileKind & A) && + static_cast<bool>(Other & B)) || + (static_cast<bool>(ProfileKind & B) && + static_cast<bool>(Other & A)); + }; + + // Check if the profiles are in-compatible. Clang frontend profiles can't be + // merged with other profile types. + if (static_cast<bool>((ProfileKind & InstrProfKind::FE) ^ + (Other & InstrProfKind::FE))) { + return make_error<InstrProfError>(instrprof_error::unsupported_version); + } + if (testIncompatible(InstrProfKind::FunctionEntryOnly, InstrProfKind::BB)) { + return make_error<InstrProfError>( + instrprof_error::unsupported_version, + "cannot merge FunctionEntryOnly profiles and BB profiles together"); + } + + // Now we update the profile type with the bits that are set. + ProfileKind |= Other; + return Error::success(); + } + + // Internal interface for testing purpose only. + void setValueProfDataEndianness(support::endianness Endianness); + void setOutputSparse(bool Sparse); + // Compute the overlap b/w this object and Other. Program level result is + // stored in Overlap and function level result is stored in FuncLevelOverlap. + void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap, + OverlapStats &FuncLevelOverlap, + const OverlapFuncFilters &FuncFilter); + +private: + void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I, + uint64_t Weight, function_ref<void(Error)> Warn); + bool shouldEncodeData(const ProfilingData &PD); + + Error writeImpl(ProfOStream &OS); +}; + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_INSTRPROFWRITER_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/MemProfData.inc b/contrib/libs/llvm14/include/llvm/ProfileData/MemProfData.inc new file mode 100644 index 0000000000..ff22a69796 --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/MemProfData.inc @@ -0,0 +1,152 @@ +#ifndef LLVM_PROFILEDATA_MEMPROFDATA_INC +#define LLVM_PROFILEDATA_MEMPROFDATA_INC +/*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ +/* + * This is the main file that defines all the data structure, signature, + * constant literals that are shared across profiling runtime library, + * and host tools (reader/writer). + * + * This file has two identical copies. The primary copy lives in LLVM and + * the other one sits in compiler-rt/include/profile directory. To make changes + * in this file, first modify the primary copy and copy it over to compiler-rt. + * Testing of any change in this file can start only after the two copies are + * synced up. + * +\*===----------------------------------------------------------------------===*/ + +#ifdef _MSC_VER +#define PACKED(...) __pragma(pack(push,1)) __VA_ARGS__ __pragma(pack(pop)) +#else +#define PACKED(...) __VA_ARGS__ __attribute__((__packed__)) +#endif + +// A 64-bit magic number to uniquely identify the raw binary memprof profile file. +#define MEMPROF_RAW_MAGIC_64 \ + ((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | \ + (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129) + +// The version number of the raw binary format. +#define MEMPROF_RAW_VERSION 1ULL + +namespace llvm { +namespace memprof { +// A struct describing the header used for the raw binary memprof profile format. +PACKED(struct Header { + uint64_t Magic; + uint64_t Version; + uint64_t TotalSize; + uint64_t SegmentOffset; + uint64_t MIBOffset; + uint64_t StackOffset; +}); + + +// A struct describing the information necessary to describe a /proc/maps +// segment entry for a particular binary/library identified by its build id. +PACKED(struct SegmentEntry { + uint64_t Start; + uint64_t End; + uint64_t Offset; + // This field is unused until sanitizer procmaps support for build ids for + // Linux-Elf is implemented. + uint8_t BuildId[32] = {0}; + + SegmentEntry(uint64_t S, uint64_t E, uint64_t O) : + Start(S), End(E), Offset(O) {} + + SegmentEntry(const SegmentEntry& S) { + Start = S.Start; + End = S.End; + Offset = S.Offset; + } + + SegmentEntry& operator=(const SegmentEntry& S) { + Start = S.Start; + End = S.End; + Offset = S.Offset; + return *this; + } + + bool operator==(const SegmentEntry& S) const { + return Start == S.Start && + End == S.End && + Offset == S.Offset; + } +}); + +// A struct representing the heap allocation characteristics of a particular +// runtime context. This struct is shared between the compiler-rt runtime and +// the raw profile reader. The indexed format uses a separate, self-describing +// backwards compatible format. +PACKED(struct MemInfoBlock { + uint32_t alloc_count; + uint64_t total_access_count, min_access_count, max_access_count; + uint64_t total_size; + uint32_t min_size, max_size; + uint32_t alloc_timestamp, dealloc_timestamp; + uint64_t total_lifetime; + uint32_t min_lifetime, max_lifetime; + uint32_t alloc_cpu_id, dealloc_cpu_id; + uint32_t num_migrated_cpu; + + // Only compared to prior deallocated object currently. + uint32_t num_lifetime_overlaps; + uint32_t num_same_alloc_cpu; + uint32_t num_same_dealloc_cpu; + + uint64_t data_type_id; // TODO: hash of type name + + MemInfoBlock() : alloc_count(0) {} + + MemInfoBlock(uint32_t size, uint64_t access_count, uint32_t alloc_timestamp, + uint32_t dealloc_timestamp, uint32_t alloc_cpu, uint32_t dealloc_cpu) + : alloc_count(1), total_access_count(access_count), + min_access_count(access_count), max_access_count(access_count), + total_size(size), min_size(size), max_size(size), + alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp), + total_lifetime(dealloc_timestamp - alloc_timestamp), + min_lifetime(total_lifetime), max_lifetime(total_lifetime), + alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu), + num_lifetime_overlaps(0), num_same_alloc_cpu(0), + num_same_dealloc_cpu(0) { + num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id; + } + + void Merge(const MemInfoBlock &newMIB) { + alloc_count += newMIB.alloc_count; + + total_access_count += newMIB.total_access_count; + min_access_count = newMIB.min_access_count < min_access_count ? newMIB.min_access_count : min_access_count; + max_access_count = newMIB.max_access_count < max_access_count ? newMIB.max_access_count : max_access_count; + + total_size += newMIB.total_size; + min_size = newMIB.min_size < min_size ? newMIB.min_size : min_size; + max_size = newMIB.max_size < max_size ? newMIB.max_size : max_size; + + total_lifetime += newMIB.total_lifetime; + min_lifetime = newMIB.min_lifetime < min_lifetime ? newMIB.min_lifetime : min_lifetime; + max_lifetime = newMIB.max_lifetime > max_lifetime ? newMIB.max_lifetime : max_lifetime; + + // We know newMIB was deallocated later, so just need to check if it was + // allocated before last one deallocated. + num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp; + alloc_timestamp = newMIB.alloc_timestamp; + dealloc_timestamp = newMIB.dealloc_timestamp; + + num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id; + num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id; + alloc_cpu_id = newMIB.alloc_cpu_id; + dealloc_cpu_id = newMIB.dealloc_cpu_id; + } +}); + +} // namespace memprof +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/ProfileCommon.h b/contrib/libs/llvm14/include/llvm/ProfileData/ProfileCommon.h new file mode 100644 index 0000000000..891f86edd3 --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/ProfileCommon.h @@ -0,0 +1,122 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- ProfileCommon.h - Common profiling APIs. -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains data structures and functions common to both instrumented +// and sample profiling. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_PROFILECOMMON_H +#define LLVM_PROFILEDATA_PROFILECOMMON_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/Error.h" +#include <algorithm> +#include <cstdint> +#include <functional> +#include <map> +#include <memory> +#include <vector> + +namespace llvm { + +namespace sampleprof { + +class FunctionSamples; + +} // end namespace sampleprof + +inline const char *getHotSectionPrefix() { return "hot"; } +inline const char *getUnlikelySectionPrefix() { return "unlikely"; } + +class ProfileSummaryBuilder { +private: + /// We keep track of the number of times a count (block count or samples) + /// appears in the profile. The map is kept sorted in the descending order of + /// counts. + std::map<uint64_t, uint32_t, std::greater<uint64_t>> CountFrequencies; + std::vector<uint32_t> DetailedSummaryCutoffs; + +protected: + SummaryEntryVector DetailedSummary; + uint64_t TotalCount = 0; + uint64_t MaxCount = 0; + uint64_t MaxFunctionCount = 0; + uint32_t NumCounts = 0; + uint32_t NumFunctions = 0; + + ProfileSummaryBuilder(std::vector<uint32_t> Cutoffs) + : DetailedSummaryCutoffs(std::move(Cutoffs)) {} + ~ProfileSummaryBuilder() = default; + + inline void addCount(uint64_t Count); + void computeDetailedSummary(); + +public: + /// A vector of useful cutoff values for detailed summary. + static const ArrayRef<uint32_t> DefaultCutoffs; + + /// Find the summary entry for a desired percentile of counts. + static const ProfileSummaryEntry & + getEntryForPercentile(const SummaryEntryVector &DS, uint64_t Percentile); + static uint64_t getHotCountThreshold(const SummaryEntryVector &DS); + static uint64_t getColdCountThreshold(const SummaryEntryVector &DS); +}; + +class InstrProfSummaryBuilder final : public ProfileSummaryBuilder { + uint64_t MaxInternalBlockCount = 0; + + inline void addEntryCount(uint64_t Count); + inline void addInternalCount(uint64_t Count); + +public: + InstrProfSummaryBuilder(std::vector<uint32_t> Cutoffs) + : ProfileSummaryBuilder(std::move(Cutoffs)) {} + + void addRecord(const InstrProfRecord &); + std::unique_ptr<ProfileSummary> getSummary(); +}; + +class SampleProfileSummaryBuilder final : public ProfileSummaryBuilder { +public: + SampleProfileSummaryBuilder(std::vector<uint32_t> Cutoffs) + : ProfileSummaryBuilder(std::move(Cutoffs)) {} + + void addRecord(const sampleprof::FunctionSamples &FS, + bool isCallsiteSample = false); + std::unique_ptr<ProfileSummary> + computeSummaryForProfiles(const sampleprof::SampleProfileMap &Profiles); + std::unique_ptr<ProfileSummary> getSummary(); +}; + +/// This is called when a count is seen in the profile. +void ProfileSummaryBuilder::addCount(uint64_t Count) { + TotalCount += Count; + if (Count > MaxCount) + MaxCount = Count; + NumCounts++; + CountFrequencies[Count]++; +} + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_PROFILECOMMON_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/RawMemProfReader.h b/contrib/libs/llvm14/include/llvm/ProfileData/RawMemProfReader.h new file mode 100644 index 0000000000..50e22d9c32 --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/RawMemProfReader.h @@ -0,0 +1,54 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +#ifndef LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ +#define LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ +//===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading MemProf profiling data. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" + +namespace llvm { +namespace memprof { + +class RawMemProfReader { +public: + RawMemProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) + : DataBuffer(std::move(DataBuffer)) {} + // Prints aggregate counts for each raw profile parsed from the DataBuffer. + void printSummaries(raw_ostream &OS) const; + + // Return true if the \p DataBuffer starts with magic bytes indicating it is + // a raw binary memprof profile. + static bool hasFormat(const MemoryBuffer &DataBuffer); + + // Create a RawMemProfReader after sanity checking the contents of the file at + // \p Path. + static Expected<std::unique_ptr<RawMemProfReader>> create(const Twine &Path); + +private: + std::unique_ptr<MemoryBuffer> DataBuffer; +}; + +} // namespace memprof +} // namespace llvm + +#endif // LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/SampleProf.h b/contrib/libs/llvm14/include/llvm/ProfileData/SampleProf.h new file mode 100644 index 0000000000..614dd3981a --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/SampleProf.h @@ -0,0 +1,1290 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains common definitions used in the reading and writing of +// sample profile data. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_SAMPLEPROF_H +#define LLVM_PROFILEDATA_SAMPLEPROF_H + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdint> +#include <list> +#include <map> +#include <set> +#include <sstream> +#include <string> +#include <system_error> +#include <unordered_map> +#include <utility> + +namespace llvm { + +const std::error_category &sampleprof_category(); + +enum class sampleprof_error { + success = 0, + bad_magic, + unsupported_version, + too_large, + truncated, + malformed, + unrecognized_format, + unsupported_writing_format, + truncated_name_table, + not_implemented, + counter_overflow, + ostream_seek_unsupported, + compress_failed, + uncompress_failed, + zlib_unavailable, + hash_mismatch +}; + +inline std::error_code make_error_code(sampleprof_error E) { + return std::error_code(static_cast<int>(E), sampleprof_category()); +} + +inline sampleprof_error MergeResult(sampleprof_error &Accumulator, + sampleprof_error Result) { + // Prefer first error encountered as later errors may be secondary effects of + // the initial problem. + if (Accumulator == sampleprof_error::success && + Result != sampleprof_error::success) + Accumulator = Result; + return Accumulator; +} + +} // end namespace llvm + +namespace std { + +template <> +struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {}; + +} // end namespace std + +namespace llvm { +namespace sampleprof { + +enum SampleProfileFormat { + SPF_None = 0, + SPF_Text = 0x1, + SPF_Compact_Binary = 0x2, + SPF_GCC = 0x3, + SPF_Ext_Binary = 0x4, + SPF_Binary = 0xff +}; + +static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { + return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) | + uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) | + uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) | + uint64_t('2') << (64 - 56) | uint64_t(Format); +} + +/// Get the proper representation of a string according to whether the +/// current Format uses MD5 to represent the string. +static inline StringRef getRepInFormat(StringRef Name, bool UseMD5, + std::string &GUIDBuf) { + if (Name.empty() || !UseMD5) + return Name; + GUIDBuf = std::to_string(Function::getGUID(Name)); + return GUIDBuf; +} + +static inline uint64_t SPVersion() { return 103; } + +// Section Type used by SampleProfileExtBinaryBaseReader and +// SampleProfileExtBinaryBaseWriter. Never change the existing +// value of enum. Only append new ones. +enum SecType { + SecInValid = 0, + SecProfSummary = 1, + SecNameTable = 2, + SecProfileSymbolList = 3, + SecFuncOffsetTable = 4, + SecFuncMetadata = 5, + SecCSNameTable = 6, + // marker for the first type of profile. + SecFuncProfileFirst = 32, + SecLBRProfile = SecFuncProfileFirst +}; + +static inline std::string getSecName(SecType Type) { + switch ((int)Type) { // Avoid -Wcovered-switch-default + case SecInValid: + return "InvalidSection"; + case SecProfSummary: + return "ProfileSummarySection"; + case SecNameTable: + return "NameTableSection"; + case SecProfileSymbolList: + return "ProfileSymbolListSection"; + case SecFuncOffsetTable: + return "FuncOffsetTableSection"; + case SecFuncMetadata: + return "FunctionMetadata"; + case SecCSNameTable: + return "CSNameTableSection"; + case SecLBRProfile: + return "LBRProfileSection"; + default: + return "UnknownSection"; + } +} + +// Entry type of section header table used by SampleProfileExtBinaryBaseReader +// and SampleProfileExtBinaryBaseWriter. +struct SecHdrTableEntry { + SecType Type; + uint64_t Flags; + uint64_t Offset; + uint64_t Size; + // The index indicating the location of the current entry in + // SectionHdrLayout table. + uint32_t LayoutIndex; +}; + +// Flags common for all sections are defined here. In SecHdrTableEntry::Flags, +// common flags will be saved in the lower 32bits and section specific flags +// will be saved in the higher 32 bits. +enum class SecCommonFlags : uint32_t { + SecFlagInValid = 0, + SecFlagCompress = (1 << 0), + // Indicate the section contains only profile without context. + SecFlagFlat = (1 << 1) +}; + +// Section specific flags are defined here. +// !!!Note: Everytime a new enum class is created here, please add +// a new check in verifySecFlag. +enum class SecNameTableFlags : uint32_t { + SecFlagInValid = 0, + SecFlagMD5Name = (1 << 0), + // Store MD5 in fixed length instead of ULEB128 so NameTable can be + // accessed like an array. + SecFlagFixedLengthMD5 = (1 << 1), + // Profile contains ".__uniq." suffix name. Compiler shouldn't strip + // the suffix when doing profile matching when seeing the flag. + SecFlagUniqSuffix = (1 << 2) +}; +enum class SecProfSummaryFlags : uint32_t { + SecFlagInValid = 0, + /// SecFlagPartial means the profile is for common/shared code. + /// The common profile is usually merged from profiles collected + /// from running other targets. + SecFlagPartial = (1 << 0), + /// SecFlagContext means this is context-sensitive flat profile for + /// CSSPGO + SecFlagFullContext = (1 << 1), + /// SecFlagFSDiscriminator means this profile uses flow-sensitive + /// discriminators. + SecFlagFSDiscriminator = (1 << 2), + /// SecFlagIsCSNested means this is context-sensitive nested profile for + /// CSSPGO + SecFlagIsCSNested = (1 << 4), +}; + +enum class SecFuncMetadataFlags : uint32_t { + SecFlagInvalid = 0, + SecFlagIsProbeBased = (1 << 0), + SecFlagHasAttribute = (1 << 1), +}; + +enum class SecFuncOffsetFlags : uint32_t { + SecFlagInvalid = 0, + // Store function offsets in an order of contexts. The order ensures that + // callee contexts of a given context laid out next to it. + SecFlagOrdered = (1 << 0), +}; + +// Verify section specific flag is used for the correct section. +template <class SecFlagType> +static inline void verifySecFlag(SecType Type, SecFlagType Flag) { + // No verification is needed for common flags. + if (std::is_same<SecCommonFlags, SecFlagType>()) + return; + + // Verification starts here for section specific flag. + bool IsFlagLegal = false; + switch (Type) { + case SecNameTable: + IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>(); + break; + case SecProfSummary: + IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>(); + break; + case SecFuncMetadata: + IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>(); + break; + default: + case SecFuncOffsetTable: + IsFlagLegal = std::is_same<SecFuncOffsetFlags, SecFlagType>(); + break; + } + if (!IsFlagLegal) + llvm_unreachable("Misuse of a flag in an incompatible section"); +} + +template <class SecFlagType> +static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { + verifySecFlag(Entry.Type, Flag); + auto FVal = static_cast<uint64_t>(Flag); + bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); + Entry.Flags |= IsCommon ? FVal : (FVal << 32); +} + +template <class SecFlagType> +static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { + verifySecFlag(Entry.Type, Flag); + auto FVal = static_cast<uint64_t>(Flag); + bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); + Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32)); +} + +template <class SecFlagType> +static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) { + verifySecFlag(Entry.Type, Flag); + auto FVal = static_cast<uint64_t>(Flag); + bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); + return Entry.Flags & (IsCommon ? FVal : (FVal << 32)); +} + +/// Represents the relative location of an instruction. +/// +/// Instruction locations are specified by the line offset from the +/// beginning of the function (marked by the line where the function +/// header is) and the discriminator value within that line. +/// +/// The discriminator value is useful to distinguish instructions +/// that are on the same line but belong to different basic blocks +/// (e.g., the two post-increment instructions in "if (p) x++; else y++;"). +struct LineLocation { + LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {} + + void print(raw_ostream &OS) const; + void dump() const; + + bool operator<(const LineLocation &O) const { + return LineOffset < O.LineOffset || + (LineOffset == O.LineOffset && Discriminator < O.Discriminator); + } + + bool operator==(const LineLocation &O) const { + return LineOffset == O.LineOffset && Discriminator == O.Discriminator; + } + + bool operator!=(const LineLocation &O) const { + return LineOffset != O.LineOffset || Discriminator != O.Discriminator; + } + + uint32_t LineOffset; + uint32_t Discriminator; +}; + +raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc); + +/// Representation of a single sample record. +/// +/// A sample record is represented by a positive integer value, which +/// indicates how frequently was the associated line location executed. +/// +/// Additionally, if the associated location contains a function call, +/// the record will hold a list of all the possible called targets. For +/// direct calls, this will be the exact function being invoked. For +/// indirect calls (function pointers, virtual table dispatch), this +/// will be a list of one or more functions. +class SampleRecord { +public: + using CallTarget = std::pair<StringRef, uint64_t>; + struct CallTargetComparator { + bool operator()(const CallTarget &LHS, const CallTarget &RHS) const { + if (LHS.second != RHS.second) + return LHS.second > RHS.second; + + return LHS.first < RHS.first; + } + }; + + using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>; + using CallTargetMap = StringMap<uint64_t>; + SampleRecord() = default; + + /// Increment the number of samples for this record by \p S. + /// Optionally scale sample count \p S by \p Weight. + /// + /// Sample counts accumulate using saturating arithmetic, to avoid wrapping + /// around unsigned integers. + sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) { + bool Overflowed; + NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed); + return Overflowed ? sampleprof_error::counter_overflow + : sampleprof_error::success; + } + + /// Add called function \p F with samples \p S. + /// Optionally scale sample count \p S by \p Weight. + /// + /// Sample counts accumulate using saturating arithmetic, to avoid wrapping + /// around unsigned integers. + sampleprof_error addCalledTarget(StringRef F, uint64_t S, + uint64_t Weight = 1) { + uint64_t &TargetSamples = CallTargets[F]; + bool Overflowed; + TargetSamples = + SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed); + return Overflowed ? sampleprof_error::counter_overflow + : sampleprof_error::success; + } + + /// Return true if this sample record contains function calls. + bool hasCalls() const { return !CallTargets.empty(); } + + uint64_t getSamples() const { return NumSamples; } + const CallTargetMap &getCallTargets() const { return CallTargets; } + const SortedCallTargetSet getSortedCallTargets() const { + return SortCallTargets(CallTargets); + } + + /// Sort call targets in descending order of call frequency. + static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) { + SortedCallTargetSet SortedTargets; + for (const auto &I : Targets) { + SortedTargets.emplace(I.first(), I.second); + } + return SortedTargets; + } + + /// Prorate call targets by a distribution factor. + static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, + float DistributionFactor) { + CallTargetMap AdjustedTargets; + for (const auto &I : Targets) { + AdjustedTargets[I.first()] = I.second * DistributionFactor; + } + return AdjustedTargets; + } + + /// Merge the samples in \p Other into this record. + /// Optionally scale sample counts by \p Weight. + sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1); + void print(raw_ostream &OS, unsigned Indent) const; + void dump() const; + +private: + uint64_t NumSamples = 0; + CallTargetMap CallTargets; +}; + +raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); + +// State of context associated with FunctionSamples +enum ContextStateMask { + UnknownContext = 0x0, // Profile without context + RawContext = 0x1, // Full context profile from input profile + SyntheticContext = 0x2, // Synthetic context created for context promotion + InlinedContext = 0x4, // Profile for context that is inlined into caller + MergedContext = 0x8 // Profile for context merged into base profile +}; + +// Attribute of context associated with FunctionSamples +enum ContextAttributeMask { + ContextNone = 0x0, + ContextWasInlined = 0x1, // Leaf of context was inlined in previous build + ContextShouldBeInlined = 0x2, // Leaf of context should be inlined +}; + +// Represents a context frame with function name and line location +struct SampleContextFrame { + StringRef FuncName; + LineLocation Location; + + SampleContextFrame() : Location(0, 0) {} + + SampleContextFrame(StringRef FuncName, LineLocation Location) + : FuncName(FuncName), Location(Location) {} + + bool operator==(const SampleContextFrame &That) const { + return Location == That.Location && FuncName == That.FuncName; + } + + bool operator!=(const SampleContextFrame &That) const { + return !(*this == That); + } + + std::string toString(bool OutputLineLocation) const { + std::ostringstream OContextStr; + OContextStr << FuncName.str(); + if (OutputLineLocation) { + OContextStr << ":" << Location.LineOffset; + if (Location.Discriminator) + OContextStr << "." << Location.Discriminator; + } + return OContextStr.str(); + } +}; + +static inline hash_code hash_value(const SampleContextFrame &arg) { + return hash_combine(arg.FuncName, arg.Location.LineOffset, + arg.Location.Discriminator); +} + +using SampleContextFrameVector = SmallVector<SampleContextFrame, 1>; +using SampleContextFrames = ArrayRef<SampleContextFrame>; + +struct SampleContextFrameHash { + uint64_t operator()(const SampleContextFrameVector &S) const { + return hash_combine_range(S.begin(), S.end()); + } +}; + +// Sample context for FunctionSamples. It consists of the calling context, +// the function name and context state. Internally sample context is represented +// using ArrayRef, which is also the input for constructing a `SampleContext`. +// It can accept and represent both full context string as well as context-less +// function name. +// For a CS profile, a full context vector can look like: +// `main:3 _Z5funcAi:1 _Z8funcLeafi` +// For a base CS profile without calling context, the context vector should only +// contain the leaf frame name. +// For a non-CS profile, the context vector should be empty. +class SampleContext { +public: + SampleContext() : State(UnknownContext), Attributes(ContextNone) {} + + SampleContext(StringRef Name) + : Name(Name), State(UnknownContext), Attributes(ContextNone) {} + + SampleContext(SampleContextFrames Context, + ContextStateMask CState = RawContext) + : Attributes(ContextNone) { + assert(!Context.empty() && "Context is empty"); + setContext(Context, CState); + } + + // Give a context string, decode and populate internal states like + // Function name, Calling context and context state. Example of input + // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` + SampleContext(StringRef ContextStr, + std::list<SampleContextFrameVector> &CSNameTable, + ContextStateMask CState = RawContext) + : Attributes(ContextNone) { + assert(!ContextStr.empty()); + // Note that `[]` wrapped input indicates a full context string, otherwise + // it's treated as context-less function name only. + bool HasContext = ContextStr.startswith("["); + if (!HasContext) { + State = UnknownContext; + Name = ContextStr; + } else { + CSNameTable.emplace_back(); + SampleContextFrameVector &Context = CSNameTable.back(); + createCtxVectorFromStr(ContextStr, Context); + setContext(Context, CState); + } + } + + /// Create a context vector from a given context string and save it in + /// `Context`. + static void createCtxVectorFromStr(StringRef ContextStr, + SampleContextFrameVector &Context) { + // Remove encapsulating '[' and ']' if any + ContextStr = ContextStr.substr(1, ContextStr.size() - 2); + StringRef ContextRemain = ContextStr; + StringRef ChildContext; + StringRef CalleeName; + while (!ContextRemain.empty()) { + auto ContextSplit = ContextRemain.split(" @ "); + ChildContext = ContextSplit.first; + ContextRemain = ContextSplit.second; + LineLocation CallSiteLoc(0, 0); + decodeContextString(ChildContext, CalleeName, CallSiteLoc); + Context.emplace_back(CalleeName, CallSiteLoc); + } + } + + // Promote context by removing top frames with the length of + // `ContextFramesToRemove`. Note that with array representation of context, + // the promotion is effectively a slice operation with first + // `ContextFramesToRemove` elements removed from left. + void promoteOnPath(uint32_t ContextFramesToRemove) { + assert(ContextFramesToRemove <= FullContext.size() && + "Cannot remove more than the whole context"); + FullContext = FullContext.drop_front(ContextFramesToRemove); + } + + // Decode context string for a frame to get function name and location. + // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. + static void decodeContextString(StringRef ContextStr, StringRef &FName, + LineLocation &LineLoc) { + // Get function name + auto EntrySplit = ContextStr.split(':'); + FName = EntrySplit.first; + + LineLoc = {0, 0}; + if (!EntrySplit.second.empty()) { + // Get line offset, use signed int for getAsInteger so string will + // be parsed as signed. + int LineOffset = 0; + auto LocSplit = EntrySplit.second.split('.'); + LocSplit.first.getAsInteger(10, LineOffset); + LineLoc.LineOffset = LineOffset; + + // Get discriminator + if (!LocSplit.second.empty()) + LocSplit.second.getAsInteger(10, LineLoc.Discriminator); + } + } + + operator SampleContextFrames() const { return FullContext; } + bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; } + void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; } + uint32_t getAllAttributes() { return Attributes; } + void setAllAttributes(uint32_t A) { Attributes = A; } + bool hasState(ContextStateMask S) { return State & (uint32_t)S; } + void setState(ContextStateMask S) { State |= (uint32_t)S; } + void clearState(ContextStateMask S) { State &= (uint32_t)~S; } + bool hasContext() const { return State != UnknownContext; } + bool isBaseContext() const { return FullContext.size() == 1; } + StringRef getName() const { return Name; } + SampleContextFrames getContextFrames() const { return FullContext; } + + static std::string getContextString(SampleContextFrames Context, + bool IncludeLeafLineLocation = false) { + std::ostringstream OContextStr; + for (uint32_t I = 0; I < Context.size(); I++) { + if (OContextStr.str().size()) { + OContextStr << " @ "; + } + OContextStr << Context[I].toString(I != Context.size() - 1 || + IncludeLeafLineLocation); + } + return OContextStr.str(); + } + + std::string toString() const { + if (!hasContext()) + return Name.str(); + return getContextString(FullContext, false); + } + + uint64_t getHashCode() const { + return hasContext() ? hash_value(getContextFrames()) + : hash_value(getName()); + } + + /// Set the name of the function and clear the current context. + void setName(StringRef FunctionName) { + Name = FunctionName; + FullContext = SampleContextFrames(); + State = UnknownContext; + } + + void setContext(SampleContextFrames Context, + ContextStateMask CState = RawContext) { + assert(CState != UnknownContext); + FullContext = Context; + Name = Context.back().FuncName; + State = CState; + } + + bool operator==(const SampleContext &That) const { + return State == That.State && Name == That.Name && + FullContext == That.FullContext; + } + + bool operator!=(const SampleContext &That) const { return !(*this == That); } + + bool operator<(const SampleContext &That) const { + if (State != That.State) + return State < That.State; + + if (!hasContext()) { + return (Name.compare(That.Name)) == -1; + } + + uint64_t I = 0; + while (I < std::min(FullContext.size(), That.FullContext.size())) { + auto &Context1 = FullContext[I]; + auto &Context2 = That.FullContext[I]; + auto V = Context1.FuncName.compare(Context2.FuncName); + if (V) + return V == -1; + if (Context1.Location != Context2.Location) + return Context1.Location < Context2.Location; + I++; + } + + return FullContext.size() < That.FullContext.size(); + } + + struct Hash { + uint64_t operator()(const SampleContext &Context) const { + return Context.getHashCode(); + } + }; + + bool IsPrefixOf(const SampleContext &That) const { + auto ThisContext = FullContext; + auto ThatContext = That.FullContext; + if (ThatContext.size() < ThisContext.size()) + return false; + ThatContext = ThatContext.take_front(ThisContext.size()); + // Compare Leaf frame first + if (ThisContext.back().FuncName != ThatContext.back().FuncName) + return false; + // Compare leading context + return ThisContext.drop_back() == ThatContext.drop_back(); + } + +private: + /// Mangled name of the function. + StringRef Name; + // Full context including calling context and leaf function name + SampleContextFrames FullContext; + // State of the associated sample profile + uint32_t State; + // Attribute of the associated sample profile + uint32_t Attributes; +}; + +static inline hash_code hash_value(const SampleContext &arg) { + return arg.hasContext() ? hash_value(arg.getContextFrames()) + : hash_value(arg.getName()); +} + +class FunctionSamples; +class SampleProfileReaderItaniumRemapper; + +using BodySampleMap = std::map<LineLocation, SampleRecord>; +// NOTE: Using a StringMap here makes parsed profiles consume around 17% more +// memory, which is *very* significant for large profiles. +using FunctionSamplesMap = std::map<std::string, FunctionSamples, std::less<>>; +using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>; + +/// Representation of the samples collected for a function. +/// +/// This data structure contains all the collected samples for the body +/// of a function. Each sample corresponds to a LineLocation instance +/// within the body of the function. +class FunctionSamples { +public: + FunctionSamples() = default; + + void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const; + void dump() const; + + sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) { + bool Overflowed; + TotalSamples = + SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed); + return Overflowed ? sampleprof_error::counter_overflow + : sampleprof_error::success; + } + + void setTotalSamples(uint64_t Num) { TotalSamples = Num; } + + sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { + bool Overflowed; + TotalHeadSamples = + SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed); + return Overflowed ? sampleprof_error::counter_overflow + : sampleprof_error::success; + } + + sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator, + uint64_t Num, uint64_t Weight = 1) { + return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples( + Num, Weight); + } + + sampleprof_error addCalledTargetSamples(uint32_t LineOffset, + uint32_t Discriminator, + StringRef FName, uint64_t Num, + uint64_t Weight = 1) { + return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( + FName, Num, Weight); + } + + sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num, + uint64_t Weight = 1) { + SampleRecord S; + S.addSamples(Num, Weight); + return BodySamples[LineLocation(Index, 0)].merge(S, Weight); + } + + // Accumulate all body samples to set total samples. + void updateTotalSamples() { + setTotalSamples(0); + for (const auto &I : BodySamples) + addTotalSamples(I.second.getSamples()); + + for (auto &I : CallsiteSamples) { + for (auto &CS : I.second) { + CS.second.updateTotalSamples(); + addTotalSamples(CS.second.getTotalSamples()); + } + } + } + + // Set current context and all callee contexts to be synthetic. + void SetContextSynthetic() { + Context.setState(SyntheticContext); + for (auto &I : CallsiteSamples) { + for (auto &CS : I.second) { + CS.second.SetContextSynthetic(); + } + } + } + + /// Return the number of samples collected at the given location. + /// Each location is specified by \p LineOffset and \p Discriminator. + /// If the location is not found in profile, return error. + ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset, + uint32_t Discriminator) const { + const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); + if (ret == BodySamples.end()) + return std::error_code(); + return ret->second.getSamples(); + } + + /// Returns the call target map collected at a given location. + /// Each location is specified by \p LineOffset and \p Discriminator. + /// If the location is not found in profile, return error. + ErrorOr<SampleRecord::CallTargetMap> + findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const { + const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); + if (ret == BodySamples.end()) + return std::error_code(); + return ret->second.getCallTargets(); + } + + /// Returns the call target map collected at a given location specified by \p + /// CallSite. If the location is not found in profile, return error. + ErrorOr<SampleRecord::CallTargetMap> + findCallTargetMapAt(const LineLocation &CallSite) const { + const auto &Ret = BodySamples.find(CallSite); + if (Ret == BodySamples.end()) + return std::error_code(); + return Ret->second.getCallTargets(); + } + + /// Return the function samples at the given callsite location. + FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) { + return CallsiteSamples[Loc]; + } + + /// Returns the FunctionSamplesMap at the given \p Loc. + const FunctionSamplesMap * + findFunctionSamplesMapAt(const LineLocation &Loc) const { + auto iter = CallsiteSamples.find(Loc); + if (iter == CallsiteSamples.end()) + return nullptr; + return &iter->second; + } + + /// Returns a pointer to FunctionSamples at the given callsite location + /// \p Loc with callee \p CalleeName. If no callsite can be found, relax + /// the restriction to return the FunctionSamples at callsite location + /// \p Loc with the maximum total sample count. If \p Remapper is not + /// nullptr, use \p Remapper to find FunctionSamples with equivalent name + /// as \p CalleeName. + const FunctionSamples * + findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName, + SampleProfileReaderItaniumRemapper *Remapper) const; + + bool empty() const { return TotalSamples == 0; } + + /// Return the total number of samples collected inside the function. + uint64_t getTotalSamples() const { return TotalSamples; } + + /// Return the total number of branch samples that have the function as the + /// branch target. This should be equivalent to the sample of the first + /// instruction of the symbol. But as we directly get this info for raw + /// profile without referring to potentially inaccurate debug info, this + /// gives more accurate profile data and is preferred for standalone symbols. + uint64_t getHeadSamples() const { return TotalHeadSamples; } + + /// Return the sample count of the first instruction of the function. + /// The function can be either a standalone symbol or an inlined function. + uint64_t getEntrySamples() const { + if (FunctionSamples::ProfileIsCSFlat && getHeadSamples()) { + // For CS profile, if we already have more accurate head samples + // counted by branch sample from caller, use them as entry samples. + return getHeadSamples(); + } + uint64_t Count = 0; + // Use either BodySamples or CallsiteSamples which ever has the smaller + // lineno. + if (!BodySamples.empty() && + (CallsiteSamples.empty() || + BodySamples.begin()->first < CallsiteSamples.begin()->first)) + Count = BodySamples.begin()->second.getSamples(); + else if (!CallsiteSamples.empty()) { + // An indirect callsite may be promoted to several inlined direct calls. + // We need to get the sum of them. + for (const auto &N_FS : CallsiteSamples.begin()->second) + Count += N_FS.second.getEntrySamples(); + } + // Return at least 1 if total sample is not 0. + return Count ? Count : TotalSamples > 0; + } + + /// Return all the samples collected in the body of the function. + const BodySampleMap &getBodySamples() const { return BodySamples; } + + /// Return all the callsite samples collected in the body of the function. + const CallsiteSampleMap &getCallsiteSamples() const { + return CallsiteSamples; + } + + /// Return the maximum of sample counts in a function body including functions + /// inlined in it. + uint64_t getMaxCountInside() const { + uint64_t MaxCount = 0; + for (const auto &L : getBodySamples()) + MaxCount = std::max(MaxCount, L.second.getSamples()); + for (const auto &C : getCallsiteSamples()) + for (const FunctionSamplesMap::value_type &F : C.second) + MaxCount = std::max(MaxCount, F.second.getMaxCountInside()); + return MaxCount; + } + + /// Merge the samples in \p Other into this one. + /// Optionally scale samples by \p Weight. + sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) { + sampleprof_error Result = sampleprof_error::success; + if (!GUIDToFuncNameMap) + GUIDToFuncNameMap = Other.GUIDToFuncNameMap; + if (Context.getName().empty()) + Context = Other.getContext(); + if (FunctionHash == 0) { + // Set the function hash code for the target profile. + FunctionHash = Other.getFunctionHash(); + } else if (FunctionHash != Other.getFunctionHash()) { + // The two profiles coming with different valid hash codes indicates + // either: + // 1. They are same-named static functions from different compilation + // units (without using -unique-internal-linkage-names), or + // 2. They are really the same function but from different compilations. + // Let's bail out in either case for now, which means one profile is + // dropped. + return sampleprof_error::hash_mismatch; + } + + MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight)); + MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight)); + for (const auto &I : Other.getBodySamples()) { + const LineLocation &Loc = I.first; + const SampleRecord &Rec = I.second; + MergeResult(Result, BodySamples[Loc].merge(Rec, Weight)); + } + for (const auto &I : Other.getCallsiteSamples()) { + const LineLocation &Loc = I.first; + FunctionSamplesMap &FSMap = functionSamplesAt(Loc); + for (const auto &Rec : I.second) + MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight)); + } + return Result; + } + + /// Recursively traverses all children, if the total sample count of the + /// corresponding function is no less than \p Threshold, add its corresponding + /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID + /// to \p S. + void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, + const StringMap<Function *> &SymbolMap, + uint64_t Threshold) const { + if (TotalSamples <= Threshold) + return; + auto isDeclaration = [](const Function *F) { + return !F || F->isDeclaration(); + }; + if (isDeclaration(SymbolMap.lookup(getFuncName()))) { + // Add to the import list only when it's defined out of module. + S.insert(getGUID(getName())); + } + // Import hot CallTargets, which may not be available in IR because full + // profile annotation cannot be done until backend compilation in ThinLTO. + for (const auto &BS : BodySamples) + for (const auto &TS : BS.second.getCallTargets()) + if (TS.getValue() > Threshold) { + const Function *Callee = SymbolMap.lookup(getFuncName(TS.getKey())); + if (isDeclaration(Callee)) + S.insert(getGUID(TS.getKey())); + } + for (const auto &CS : CallsiteSamples) + for (const auto &NameFS : CS.second) + NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold); + } + + /// Set the name of the function. + void setName(StringRef FunctionName) { Context.setName(FunctionName); } + + /// Return the function name. + StringRef getName() const { return Context.getName(); } + + /// Return the original function name. + StringRef getFuncName() const { return getFuncName(getName()); } + + void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; } + + uint64_t getFunctionHash() const { return FunctionHash; } + + /// Return the canonical name for a function, taking into account + /// suffix elision policy attributes. + static StringRef getCanonicalFnName(const Function &F) { + auto AttrName = "sample-profile-suffix-elision-policy"; + auto Attr = F.getFnAttribute(AttrName).getValueAsString(); + return getCanonicalFnName(F.getName(), Attr); + } + + /// Name suffixes which canonicalization should handle to avoid + /// profile mismatch. + static constexpr const char *LLVMSuffix = ".llvm."; + static constexpr const char *PartSuffix = ".part."; + static constexpr const char *UniqSuffix = ".__uniq."; + + static StringRef getCanonicalFnName(StringRef FnName, + StringRef Attr = "selected") { + // Note the sequence of the suffixes in the knownSuffixes array matters. + // If suffix "A" is appended after the suffix "B", "A" should be in front + // of "B" in knownSuffixes. + const char *knownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix}; + if (Attr == "" || Attr == "all") { + return FnName.split('.').first; + } else if (Attr == "selected") { + StringRef Cand(FnName); + for (const auto &Suf : knownSuffixes) { + StringRef Suffix(Suf); + // If the profile contains ".__uniq." suffix, don't strip the + // suffix for names in the IR. + if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix) + continue; + auto It = Cand.rfind(Suffix); + if (It == StringRef::npos) + continue; + auto Dit = Cand.rfind('.'); + if (Dit == It + Suffix.size() - 1) + Cand = Cand.substr(0, It); + } + return Cand; + } else if (Attr == "none") { + return FnName; + } else { + assert(false && "internal error: unknown suffix elision policy"); + } + return FnName; + } + + /// Translate \p Name into its original name. + /// When profile doesn't use MD5, \p Name needs no translation. + /// When profile uses MD5, \p Name in current FunctionSamples + /// is actually GUID of the original function name. getFuncName will + /// translate \p Name in current FunctionSamples into its original name + /// by looking up in the function map GUIDToFuncNameMap. + /// If the original name doesn't exist in the map, return empty StringRef. + StringRef getFuncName(StringRef Name) const { + if (!UseMD5) + return Name; + + assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first"); + return GUIDToFuncNameMap->lookup(std::stoull(Name.data())); + } + + /// Returns the line offset to the start line of the subprogram. + /// We assume that a single function will not exceed 65535 LOC. + static unsigned getOffset(const DILocation *DIL); + + /// Returns a unique call site identifier for a given debug location of a call + /// instruction. This is wrapper of two scenarios, the probe-based profile and + /// regular profile, to hide implementation details from the sample loader and + /// the context tracker. + static LineLocation getCallSiteIdentifier(const DILocation *DIL, + bool ProfileIsFS = false); + + /// Returns a unique hash code for a combination of a callsite location and + /// the callee function name. + static uint64_t getCallSiteHash(StringRef CalleeName, + const LineLocation &Callsite); + + /// Get the FunctionSamples of the inline instance where DIL originates + /// from. + /// + /// The FunctionSamples of the instruction (Machine or IR) associated to + /// \p DIL is the inlined instance in which that instruction is coming from. + /// We traverse the inline stack of that instruction, and match it with the + /// tree nodes in the profile. + /// + /// \returns the FunctionSamples pointer to the inlined instance. + /// If \p Remapper is not nullptr, it will be used to find matching + /// FunctionSamples with not exactly the same but equivalent name. + const FunctionSamples *findFunctionSamples( + const DILocation *DIL, + SampleProfileReaderItaniumRemapper *Remapper = nullptr) const; + + static bool ProfileIsProbeBased; + + static bool ProfileIsCSFlat; + + static bool ProfileIsCSNested; + + SampleContext &getContext() const { return Context; } + + void setContext(const SampleContext &FContext) { Context = FContext; } + + static SampleProfileFormat Format; + + /// Whether the profile uses MD5 to represent string. + static bool UseMD5; + + /// Whether the profile contains any ".__uniq." suffix in a name. + static bool HasUniqSuffix; + + /// If this profile uses flow sensitive discriminators. + static bool ProfileIsFS; + + /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for + /// all the function symbols defined or declared in current module. + DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr; + + // Assume the input \p Name is a name coming from FunctionSamples itself. + // If UseMD5 is true, the name is already a GUID and we + // don't want to return the GUID of GUID. + static uint64_t getGUID(StringRef Name) { + return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name); + } + + // Find all the names in the current FunctionSamples including names in + // all the inline instances and names of call targets. + void findAllNames(DenseSet<StringRef> &NameSet) const; + +private: + /// CFG hash value for the function. + uint64_t FunctionHash = 0; + + /// Calling context for function profile + mutable SampleContext Context; + + /// Total number of samples collected inside this function. + /// + /// Samples are cumulative, they include all the samples collected + /// inside this function and all its inlined callees. + uint64_t TotalSamples = 0; + + /// Total number of samples collected at the head of the function. + /// This is an approximation of the number of calls made to this function + /// at runtime. + uint64_t TotalHeadSamples = 0; + + /// Map instruction locations to collected samples. + /// + /// Each entry in this map contains the number of samples + /// collected at the corresponding line offset. All line locations + /// are an offset from the start of the function. + BodySampleMap BodySamples; + + /// Map call sites to collected samples for the called function. + /// + /// Each entry in this map corresponds to all the samples + /// collected for the inlined function call at the given + /// location. For example, given: + /// + /// void foo() { + /// 1 bar(); + /// ... + /// 8 baz(); + /// } + /// + /// If the bar() and baz() calls were inlined inside foo(), this + /// map will contain two entries. One for all the samples collected + /// in the call to bar() at line offset 1, the other for all the samples + /// collected in the call to baz() at line offset 8. + CallsiteSampleMap CallsiteSamples; +}; + +raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); + +using SampleProfileMap = + std::unordered_map<SampleContext, FunctionSamples, SampleContext::Hash>; + +using NameFunctionSamples = std::pair<SampleContext, const FunctionSamples *>; + +void sortFuncProfiles(const SampleProfileMap &ProfileMap, + std::vector<NameFunctionSamples> &SortedProfiles); + +/// Sort a LocationT->SampleT map by LocationT. +/// +/// It produces a sorted list of <LocationT, SampleT> records by ascending +/// order of LocationT. +template <class LocationT, class SampleT> class SampleSorter { +public: + using SamplesWithLoc = std::pair<const LocationT, SampleT>; + using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>; + + SampleSorter(const std::map<LocationT, SampleT> &Samples) { + for (const auto &I : Samples) + V.push_back(&I); + llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) { + return A->first < B->first; + }); + } + + const SamplesWithLocList &get() const { return V; } + +private: + SamplesWithLocList V; +}; + +/// SampleContextTrimmer impelements helper functions to trim, merge cold +/// context profiles. It also supports context profile canonicalization to make +/// sure ProfileMap's key is consistent with FunctionSample's name/context. +class SampleContextTrimmer { +public: + SampleContextTrimmer(SampleProfileMap &Profiles) : ProfileMap(Profiles){}; + // Trim and merge cold context profile when requested. TrimBaseProfileOnly + // should only be effective when TrimColdContext is true. On top of + // TrimColdContext, TrimBaseProfileOnly can be used to specify to trim all + // cold profiles or only cold base profiles. Trimming base profiles only is + // mainly to honor the preinliner decsion. Note that when MergeColdContext is + // true, preinliner decsion is not honored anyway so TrimBaseProfileOnly will + // be ignored. + void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold, + bool TrimColdContext, + bool MergeColdContext, + uint32_t ColdContextFrameLength, + bool TrimBaseProfileOnly); + // Canonicalize context profile name and attributes. + void canonicalizeContextProfiles(); + +private: + SampleProfileMap &ProfileMap; +}; + +// CSProfileConverter converts a full context-sensitive flat sample profile into +// a nested context-sensitive sample profile. +class CSProfileConverter { +public: + CSProfileConverter(SampleProfileMap &Profiles); + void convertProfiles(); + struct FrameNode { + FrameNode(StringRef FName = StringRef(), + FunctionSamples *FSamples = nullptr, + LineLocation CallLoc = {0, 0}) + : FuncName(FName), FuncSamples(FSamples), CallSiteLoc(CallLoc){}; + + // Map line+discriminator location to child frame + std::map<uint64_t, FrameNode> AllChildFrames; + // Function name for current frame + StringRef FuncName; + // Function Samples for current frame + FunctionSamples *FuncSamples; + // Callsite location in parent context + LineLocation CallSiteLoc; + + FrameNode *getOrCreateChildFrame(const LineLocation &CallSite, + StringRef CalleeName); + }; + +private: + // Nest all children profiles into the profile of Node. + void convertProfiles(FrameNode &Node); + FrameNode *getOrCreateContextPath(const SampleContext &Context); + + SampleProfileMap &ProfileMap; + FrameNode RootFrame; +}; + +/// ProfileSymbolList records the list of function symbols shown up +/// in the binary used to generate the profile. It is useful to +/// to discriminate a function being so cold as not to shown up +/// in the profile and a function newly added. +class ProfileSymbolList { +public: + /// copy indicates whether we need to copy the underlying memory + /// for the input Name. + void add(StringRef Name, bool copy = false) { + if (!copy) { + Syms.insert(Name); + return; + } + Syms.insert(Name.copy(Allocator)); + } + + bool contains(StringRef Name) { return Syms.count(Name); } + + void merge(const ProfileSymbolList &List) { + for (auto Sym : List.Syms) + add(Sym, true); + } + + unsigned size() { return Syms.size(); } + + void setToCompress(bool TC) { ToCompress = TC; } + bool toCompress() { return ToCompress; } + + std::error_code read(const uint8_t *Data, uint64_t ListSize); + std::error_code write(raw_ostream &OS); + void dump(raw_ostream &OS = dbgs()) const; + +private: + // Determine whether or not to compress the symbol list when + // writing it into profile. The variable is unused when the symbol + // list is read from an existing profile. + bool ToCompress = false; + DenseSet<StringRef> Syms; + BumpPtrAllocator Allocator; +}; + +} // end namespace sampleprof + +using namespace sampleprof; +// Provide DenseMapInfo for SampleContext. +template <> struct DenseMapInfo<SampleContext> { + static inline SampleContext getEmptyKey() { return SampleContext(); } + + static inline SampleContext getTombstoneKey() { return SampleContext("@"); } + + static unsigned getHashValue(const SampleContext &Val) { + return Val.getHashCode(); + } + + static bool isEqual(const SampleContext &LHS, const SampleContext &RHS) { + return LHS == RHS; + } +}; +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_SAMPLEPROF_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/SampleProfReader.h b/contrib/libs/llvm14/include/llvm/ProfileData/SampleProfReader.h new file mode 100644 index 0000000000..6b34a108c1 --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/SampleProfReader.h @@ -0,0 +1,914 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- SampleProfReader.h - Read LLVM sample profile data -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains definitions needed for reading sample profiles. +// +// NOTE: If you are making changes to this file format, please remember +// to document them in the Clang documentation at +// tools/clang/docs/UsersManual.rst. +// +// Text format +// ----------- +// +// Sample profiles are written as ASCII text. The file is divided into +// sections, which correspond to each of the functions executed at runtime. +// Each section has the following format +// +// function1:total_samples:total_head_samples +// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ] +// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ] +// ... +// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ] +// offsetA[.discriminator]: fnA:num_of_total_samples +// offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ] +// ... +// !CFGChecksum: num +// !Attribute: flags +// +// This is a nested tree in which the indentation represents the nesting level +// of the inline stack. There are no blank lines in the file. And the spacing +// within a single line is fixed. Additional spaces will result in an error +// while reading the file. +// +// Any line starting with the '#' character is completely ignored. +// +// Inlined calls are represented with indentation. The Inline stack is a +// stack of source locations in which the top of the stack represents the +// leaf function, and the bottom of the stack represents the actual +// symbol to which the instruction belongs. +// +// Function names must be mangled in order for the profile loader to +// match them in the current translation unit. The two numbers in the +// function header specify how many total samples were accumulated in the +// function (first number), and the total number of samples accumulated +// in the prologue of the function (second number). This head sample +// count provides an indicator of how frequently the function is invoked. +// +// There are three types of lines in the function body. +// +// * Sampled line represents the profile information of a source location. +// * Callsite line represents the profile information of a callsite. +// * Metadata line represents extra metadata of the function. +// +// Each sampled line may contain several items. Some are optional (marked +// below): +// +// a. Source line offset. This number represents the line number +// in the function where the sample was collected. The line number is +// always relative to the line where symbol of the function is +// defined. So, if the function has its header at line 280, the offset +// 13 is at line 293 in the file. +// +// Note that this offset should never be a negative number. This could +// happen in cases like macros. The debug machinery will register the +// line number at the point of macro expansion. So, if the macro was +// expanded in a line before the start of the function, the profile +// converter should emit a 0 as the offset (this means that the optimizers +// will not be able to associate a meaningful weight to the instructions +// in the macro). +// +// b. [OPTIONAL] Discriminator. This is used if the sampled program +// was compiled with DWARF discriminator support +// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators). +// DWARF discriminators are unsigned integer values that allow the +// compiler to distinguish between multiple execution paths on the +// same source line location. +// +// For example, consider the line of code ``if (cond) foo(); else bar();``. +// If the predicate ``cond`` is true 80% of the time, then the edge +// into function ``foo`` should be considered to be taken most of the +// time. But both calls to ``foo`` and ``bar`` are at the same source +// line, so a sample count at that line is not sufficient. The +// compiler needs to know which part of that line is taken more +// frequently. +// +// This is what discriminators provide. In this case, the calls to +// ``foo`` and ``bar`` will be at the same line, but will have +// different discriminator values. This allows the compiler to correctly +// set edge weights into ``foo`` and ``bar``. +// +// c. Number of samples. This is an integer quantity representing the +// number of samples collected by the profiler at this source +// location. +// +// d. [OPTIONAL] Potential call targets and samples. If present, this +// line contains a call instruction. This models both direct and +// number of samples. For example, +// +// 130: 7 foo:3 bar:2 baz:7 +// +// The above means that at relative line offset 130 there is a call +// instruction that calls one of ``foo()``, ``bar()`` and ``baz()``, +// with ``baz()`` being the relatively more frequently called target. +// +// Each callsite line may contain several items. Some are optional. +// +// a. Source line offset. This number represents the line number of the +// callsite that is inlined in the profiled binary. +// +// b. [OPTIONAL] Discriminator. Same as the discriminator for sampled line. +// +// c. Number of samples. This is an integer quantity representing the +// total number of samples collected for the inlined instance at this +// callsite +// +// Metadata line can occur in lines with one indent only, containing extra +// information for the top-level function. Furthermore, metadata can only +// occur after all the body samples and callsite samples. +// Each metadata line may contain a particular type of metadata, marked by +// the starting characters annotated with !. We process each metadata line +// independently, hence each metadata line has to form an independent piece +// of information that does not require cross-line reference. +// We support the following types of metadata: +// +// a. CFG Checksum (a.k.a. function hash): +// !CFGChecksum: 12345 +// b. CFG Checksum (see ContextAttributeMask): +// !Atribute: 1 +// +// +// Binary format +// ------------- +// +// This is a more compact encoding. Numbers are encoded as ULEB128 values +// and all strings are encoded in a name table. The file is organized in +// the following sections: +// +// MAGIC (uint64_t) +// File identifier computed by function SPMagic() (0x5350524f463432ff) +// +// VERSION (uint32_t) +// File format version number computed by SPVersion() +// +// SUMMARY +// TOTAL_COUNT (uint64_t) +// Total number of samples in the profile. +// MAX_COUNT (uint64_t) +// Maximum value of samples on a line. +// MAX_FUNCTION_COUNT (uint64_t) +// Maximum number of samples at function entry (head samples). +// NUM_COUNTS (uint64_t) +// Number of lines with samples. +// NUM_FUNCTIONS (uint64_t) +// Number of functions with samples. +// NUM_DETAILED_SUMMARY_ENTRIES (size_t) +// Number of entries in detailed summary +// DETAILED_SUMMARY +// A list of detailed summary entry. Each entry consists of +// CUTOFF (uint32_t) +// Required percentile of total sample count expressed as a fraction +// multiplied by 1000000. +// MIN_COUNT (uint64_t) +// The minimum number of samples required to reach the target +// CUTOFF. +// NUM_COUNTS (uint64_t) +// Number of samples to get to the desrired percentile. +// +// NAME TABLE +// SIZE (uint32_t) +// Number of entries in the name table. +// NAMES +// A NUL-separated list of SIZE strings. +// +// FUNCTION BODY (one for each uninlined function body present in the profile) +// HEAD_SAMPLES (uint64_t) [only for top-level functions] +// Total number of samples collected at the head (prologue) of the +// function. +// NOTE: This field should only be present for top-level functions +// (i.e., not inlined into any caller). Inlined function calls +// have no prologue, so they don't need this. +// NAME_IDX (uint32_t) +// Index into the name table indicating the function name. +// SAMPLES (uint64_t) +// Total number of samples collected in this function. +// NRECS (uint32_t) +// Total number of sampling records this function's profile. +// BODY RECORDS +// A list of NRECS entries. Each entry contains: +// OFFSET (uint32_t) +// Line offset from the start of the function. +// DISCRIMINATOR (uint32_t) +// Discriminator value (see description of discriminators +// in the text format documentation above). +// SAMPLES (uint64_t) +// Number of samples collected at this location. +// NUM_CALLS (uint32_t) +// Number of non-inlined function calls made at this location. In the +// case of direct calls, this number will always be 1. For indirect +// calls (virtual functions and function pointers) this will +// represent all the actual functions called at runtime. +// CALL_TARGETS +// A list of NUM_CALLS entries for each called function: +// NAME_IDX (uint32_t) +// Index into the name table with the callee name. +// SAMPLES (uint64_t) +// Number of samples collected at the call site. +// NUM_INLINED_FUNCTIONS (uint32_t) +// Number of callees inlined into this function. +// INLINED FUNCTION RECORDS +// A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined +// callees. +// OFFSET (uint32_t) +// Line offset from the start of the function. +// DISCRIMINATOR (uint32_t) +// Discriminator value (see description of discriminators +// in the text format documentation above). +// FUNCTION BODY +// A FUNCTION BODY entry describing the inlined function. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_SAMPLEPROFREADER_H +#define LLVM_PROFILEDATA_SAMPLEPROFREADER_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/GCOV.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Discriminator.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SymbolRemappingReader.h" +#include <algorithm> +#include <cstdint> +#include <list> +#include <memory> +#include <string> +#include <system_error> +#include <unordered_set> +#include <vector> + +namespace llvm { + +class raw_ostream; +class Twine; + +namespace sampleprof { + +class SampleProfileReader; + +/// SampleProfileReaderItaniumRemapper remaps the profile data from a +/// sample profile data reader, by applying a provided set of equivalences +/// between components of the symbol names in the profile. +class SampleProfileReaderItaniumRemapper { +public: + SampleProfileReaderItaniumRemapper(std::unique_ptr<MemoryBuffer> B, + std::unique_ptr<SymbolRemappingReader> SRR, + SampleProfileReader &R) + : Buffer(std::move(B)), Remappings(std::move(SRR)), Reader(R) { + assert(Remappings && "Remappings cannot be nullptr"); + } + + /// Create a remapper from the given remapping file. The remapper will + /// be used for profile read in by Reader. + static ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> + create(const std::string Filename, SampleProfileReader &Reader, + LLVMContext &C); + + /// Create a remapper from the given Buffer. The remapper will + /// be used for profile read in by Reader. + static ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> + create(std::unique_ptr<MemoryBuffer> &B, SampleProfileReader &Reader, + LLVMContext &C); + + /// Apply remappings to the profile read by Reader. + void applyRemapping(LLVMContext &Ctx); + + bool hasApplied() { return RemappingApplied; } + + /// Insert function name into remapper. + void insert(StringRef FunctionName) { Remappings->insert(FunctionName); } + + /// Query whether there is equivalent in the remapper which has been + /// inserted. + bool exist(StringRef FunctionName) { + return Remappings->lookup(FunctionName); + } + + /// Return the equivalent name in the profile for \p FunctionName if + /// it exists. + Optional<StringRef> lookUpNameInProfile(StringRef FunctionName); + +private: + // The buffer holding the content read from remapping file. + std::unique_ptr<MemoryBuffer> Buffer; + std::unique_ptr<SymbolRemappingReader> Remappings; + // Map remapping key to the name in the profile. By looking up the + // key in the remapper, a given new name can be mapped to the + // cannonical name using the NameMap. + DenseMap<SymbolRemappingReader::Key, StringRef> NameMap; + // The Reader the remapper is servicing. + SampleProfileReader &Reader; + // Indicate whether remapping has been applied to the profile read + // by Reader -- by calling applyRemapping. + bool RemappingApplied = false; +}; + +/// Sample-based profile reader. +/// +/// Each profile contains sample counts for all the functions +/// executed. Inside each function, statements are annotated with the +/// collected samples on all the instructions associated with that +/// statement. +/// +/// For this to produce meaningful data, the program needs to be +/// compiled with some debug information (at minimum, line numbers: +/// -gline-tables-only). Otherwise, it will be impossible to match IR +/// instructions to the line numbers collected by the profiler. +/// +/// From the profile file, we are interested in collecting the +/// following information: +/// +/// * A list of functions included in the profile (mangled names). +/// +/// * For each function F: +/// 1. The total number of samples collected in F. +/// +/// 2. The samples collected at each line in F. To provide some +/// protection against source code shuffling, line numbers should +/// be relative to the start of the function. +/// +/// The reader supports two file formats: text and binary. The text format +/// is useful for debugging and testing, while the binary format is more +/// compact and I/O efficient. They can both be used interchangeably. +class SampleProfileReader { +public: + SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C, + SampleProfileFormat Format = SPF_None) + : Profiles(0), Ctx(C), Buffer(std::move(B)), Format(Format) {} + + virtual ~SampleProfileReader() = default; + + /// Read and validate the file header. + virtual std::error_code readHeader() = 0; + + /// Set the bits for FS discriminators. Parameter Pass specify the sequence + /// number, Pass == i is for the i-th round of adding FS discriminators. + /// Pass == 0 is for using base discriminators. + void setDiscriminatorMaskedBitFrom(FSDiscriminatorPass P) { + MaskedBitFrom = getFSPassBitEnd(P); + } + + /// Get the bitmask the discriminators: For FS profiles, return the bit + /// mask for this pass. For non FS profiles, return (unsigned) -1. + uint32_t getDiscriminatorMask() const { + if (!ProfileIsFS) + return 0xFFFFFFFF; + assert((MaskedBitFrom != 0) && "MaskedBitFrom is not set properly"); + return getN1Bits(MaskedBitFrom); + } + + /// The interface to read sample profiles from the associated file. + std::error_code read() { + if (std::error_code EC = readImpl()) + return EC; + if (Remapper) + Remapper->applyRemapping(Ctx); + FunctionSamples::UseMD5 = useMD5(); + return sampleprof_error::success; + } + + /// The implementaion to read sample profiles from the associated file. + virtual std::error_code readImpl() = 0; + + /// Print the profile for \p FContext on stream \p OS. + void dumpFunctionProfile(SampleContext FContext, raw_ostream &OS = dbgs()); + + /// Collect functions with definitions in Module M. For reader which + /// support loading function profiles on demand, return true when the + /// reader has been given a module. Always return false for reader + /// which doesn't support loading function profiles on demand. + virtual bool collectFuncsFromModule() { return false; } + + /// Print all the profiles on stream \p OS. + void dump(raw_ostream &OS = dbgs()); + + /// Return the samples collected for function \p F. + FunctionSamples *getSamplesFor(const Function &F) { + // The function name may have been updated by adding suffix. Call + // a helper to (optionally) strip off suffixes so that we can + // match against the original function name in the profile. + StringRef CanonName = FunctionSamples::getCanonicalFnName(F); + return getSamplesFor(CanonName); + } + + /// Return the samples collected for function \p F, create empty + /// FunctionSamples if it doesn't exist. + FunctionSamples *getOrCreateSamplesFor(const Function &F) { + std::string FGUID; + StringRef CanonName = FunctionSamples::getCanonicalFnName(F); + CanonName = getRepInFormat(CanonName, useMD5(), FGUID); + auto It = Profiles.find(CanonName); + if (It != Profiles.end()) + return &It->second; + if (!FGUID.empty()) { + assert(useMD5() && "New name should only be generated for md5 profile"); + CanonName = *MD5NameBuffer.insert(FGUID).first; + } + return &Profiles[CanonName]; + } + + /// Return the samples collected for function \p F. + virtual FunctionSamples *getSamplesFor(StringRef Fname) { + std::string FGUID; + Fname = getRepInFormat(Fname, useMD5(), FGUID); + auto It = Profiles.find(Fname); + if (It != Profiles.end()) + return &It->second; + + if (Remapper) { + if (auto NameInProfile = Remapper->lookUpNameInProfile(Fname)) { + auto It = Profiles.find(*NameInProfile); + if (It != Profiles.end()) + return &It->second; + } + } + return nullptr; + } + + /// Return all the profiles. + SampleProfileMap &getProfiles() { return Profiles; } + + /// Report a parse error message. + void reportError(int64_t LineNumber, const Twine &Msg) const { + Ctx.diagnose(DiagnosticInfoSampleProfile(Buffer->getBufferIdentifier(), + LineNumber, Msg)); + } + + /// Create a sample profile reader appropriate to the file format. + /// Create a remapper underlying if RemapFilename is not empty. + /// Parameter P specifies the FSDiscriminatorPass. + static ErrorOr<std::unique_ptr<SampleProfileReader>> + create(const std::string Filename, LLVMContext &C, + FSDiscriminatorPass P = FSDiscriminatorPass::Base, + const std::string RemapFilename = ""); + + /// Create a sample profile reader from the supplied memory buffer. + /// Create a remapper underlying if RemapFilename is not empty. + /// Parameter P specifies the FSDiscriminatorPass. + static ErrorOr<std::unique_ptr<SampleProfileReader>> + create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C, + FSDiscriminatorPass P = FSDiscriminatorPass::Base, + const std::string RemapFilename = ""); + + /// Return the profile summary. + ProfileSummary &getSummary() const { return *(Summary.get()); } + + MemoryBuffer *getBuffer() const { return Buffer.get(); } + + /// \brief Return the profile format. + SampleProfileFormat getFormat() const { return Format; } + + /// Whether input profile is based on pseudo probes. + bool profileIsProbeBased() const { return ProfileIsProbeBased; } + + /// Whether input profile is fully context-sensitive and flat. + bool profileIsCSFlat() const { return ProfileIsCSFlat; } + + /// Whether input profile is fully context-sensitive and nested. + bool profileIsCSNested() const { return ProfileIsCSNested; } + + virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() { + return nullptr; + }; + + /// It includes all the names that have samples either in outline instance + /// or inline instance. + virtual std::vector<StringRef> *getNameTable() { return nullptr; } + virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) { return false; }; + + /// Return whether names in the profile are all MD5 numbers. + virtual bool useMD5() { return false; } + + /// Don't read profile without context if the flag is set. This is only meaningful + /// for ExtBinary format. + virtual void setSkipFlatProf(bool Skip) {} + /// Return whether any name in the profile contains ".__uniq." suffix. + virtual bool hasUniqSuffix() { return false; } + + SampleProfileReaderItaniumRemapper *getRemapper() { return Remapper.get(); } + + void setModule(const Module *Mod) { M = Mod; } + +protected: + /// Map every function to its associated profile. + /// + /// The profile of every function executed at runtime is collected + /// in the structure FunctionSamples. This maps function objects + /// to their corresponding profiles. + SampleProfileMap Profiles; + + /// LLVM context used to emit diagnostics. + LLVMContext &Ctx; + + /// Memory buffer holding the profile file. + std::unique_ptr<MemoryBuffer> Buffer; + + /// Extra name buffer holding names created on demand. + /// This should only be needed for md5 profiles. + std::unordered_set<std::string> MD5NameBuffer; + + /// Profile summary information. + std::unique_ptr<ProfileSummary> Summary; + + /// Take ownership of the summary of this reader. + static std::unique_ptr<ProfileSummary> + takeSummary(SampleProfileReader &Reader) { + return std::move(Reader.Summary); + } + + /// Compute summary for this profile. + void computeSummary(); + + std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper; + + /// \brief Whether samples are collected based on pseudo probes. + bool ProfileIsProbeBased = false; + + /// Whether function profiles are context-sensitive flat profiles. + bool ProfileIsCSFlat = false; + + /// Whether function profiles are context-sensitive nested profiles. + bool ProfileIsCSNested = false; + + /// Number of context-sensitive profiles. + uint32_t CSProfileCount = 0; + + /// Whether the function profiles use FS discriminators. + bool ProfileIsFS = false; + + /// \brief The format of sample. + SampleProfileFormat Format = SPF_None; + + /// \brief The current module being compiled if SampleProfileReader + /// is used by compiler. If SampleProfileReader is used by other + /// tools which are not compiler, M is usually nullptr. + const Module *M = nullptr; + + /// Zero out the discriminator bits higher than bit MaskedBitFrom (0 based). + /// The default is to keep all the bits. + uint32_t MaskedBitFrom = 31; +}; + +class SampleProfileReaderText : public SampleProfileReader { +public: + SampleProfileReaderText(std::unique_ptr<MemoryBuffer> B, LLVMContext &C) + : SampleProfileReader(std::move(B), C, SPF_Text) {} + + /// Read and validate the file header. + std::error_code readHeader() override { return sampleprof_error::success; } + + /// Read sample profiles from the associated file. + std::error_code readImpl() override; + + /// Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); + +private: + /// CSNameTable is used to save full context vectors. This serves as an + /// underlying immutable buffer for all clients. + std::list<SampleContextFrameVector> CSNameTable; +}; + +class SampleProfileReaderBinary : public SampleProfileReader { +public: + SampleProfileReaderBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C, + SampleProfileFormat Format = SPF_None) + : SampleProfileReader(std::move(B), C, Format) {} + + /// Read and validate the file header. + virtual std::error_code readHeader() override; + + /// Read sample profiles from the associated file. + std::error_code readImpl() override; + + /// It includes all the names that have samples either in outline instance + /// or inline instance. + virtual std::vector<StringRef> *getNameTable() override { return &NameTable; } + +protected: + /// Read a numeric value of type T from the profile. + /// + /// If an error occurs during decoding, a diagnostic message is emitted and + /// EC is set. + /// + /// \returns the read value. + template <typename T> ErrorOr<T> readNumber(); + + /// Read a numeric value of type T from the profile. The value is saved + /// without encoded. + template <typename T> ErrorOr<T> readUnencodedNumber(); + + /// Read a string from the profile. + /// + /// If an error occurs during decoding, a diagnostic message is emitted and + /// EC is set. + /// + /// \returns the read value. + ErrorOr<StringRef> readString(); + + /// Read the string index and check whether it overflows the table. + template <typename T> inline ErrorOr<uint32_t> readStringIndex(T &Table); + + /// Return true if we've reached the end of file. + bool at_eof() const { return Data >= End; } + + /// Read the next function profile instance. + std::error_code readFuncProfile(const uint8_t *Start); + + /// Read the contents of the given profile instance. + std::error_code readProfile(FunctionSamples &FProfile); + + /// Read the contents of Magic number and Version number. + std::error_code readMagicIdent(); + + /// Read profile summary. + std::error_code readSummary(); + + /// Read the whole name table. + virtual std::error_code readNameTable(); + + /// Points to the current location in the buffer. + const uint8_t *Data = nullptr; + + /// Points to the end of the buffer. + const uint8_t *End = nullptr; + + /// Function name table. + std::vector<StringRef> NameTable; + + /// Read a string indirectly via the name table. + virtual ErrorOr<StringRef> readStringFromTable(); + virtual ErrorOr<SampleContext> readSampleContextFromTable(); + +private: + std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries); + virtual std::error_code verifySPMagic(uint64_t Magic) = 0; +}; + +class SampleProfileReaderRawBinary : public SampleProfileReaderBinary { +private: + virtual std::error_code verifySPMagic(uint64_t Magic) override; + +public: + SampleProfileReaderRawBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C, + SampleProfileFormat Format = SPF_Binary) + : SampleProfileReaderBinary(std::move(B), C, Format) {} + + /// \brief Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); +}; + +/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase defines +/// the basic structure of the extensible binary format. +/// The format is organized in sections except the magic and version number +/// at the beginning. There is a section table before all the sections, and +/// each entry in the table describes the entry type, start, size and +/// attributes. The format in each section is defined by the section itself. +/// +/// It is easy to add a new section while maintaining the backward +/// compatibility of the profile. Nothing extra needs to be done. If we want +/// to extend an existing section, like add cache misses information in +/// addition to the sample count in the profile body, we can add a new section +/// with the extension and retire the existing section, and we could choose +/// to keep the parser of the old section if we want the reader to be able +/// to read both new and old format profile. +/// +/// SampleProfileReaderExtBinary/SampleProfileWriterExtBinary define the +/// commonly used sections of a profile in extensible binary format. It is +/// possible to define other types of profile inherited from +/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase. +class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary { +private: + std::error_code decompressSection(const uint8_t *SecStart, + const uint64_t SecSize, + const uint8_t *&DecompressBuf, + uint64_t &DecompressBufSize); + + BumpPtrAllocator Allocator; + +protected: + std::vector<SecHdrTableEntry> SecHdrTable; + std::error_code readSecHdrTableEntry(uint32_t Idx); + std::error_code readSecHdrTable(); + + std::error_code readFuncMetadata(bool ProfileHasAttribute); + std::error_code readFuncMetadata(bool ProfileHasAttribute, + FunctionSamples *FProfile); + std::error_code readFuncOffsetTable(); + std::error_code readFuncProfiles(); + std::error_code readMD5NameTable(); + std::error_code readNameTableSec(bool IsMD5); + std::error_code readCSNameTableSec(); + std::error_code readProfileSymbolList(); + + virtual std::error_code readHeader() override; + virtual std::error_code verifySPMagic(uint64_t Magic) override = 0; + virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size, + const SecHdrTableEntry &Entry); + // placeholder for subclasses to dispatch their own section readers. + virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0; + virtual ErrorOr<StringRef> readStringFromTable() override; + virtual ErrorOr<SampleContext> readSampleContextFromTable() override; + ErrorOr<SampleContextFrames> readContextFromTable(); + + std::unique_ptr<ProfileSymbolList> ProfSymList; + + /// The table mapping from function context to the offset of its + /// FunctionSample towards file start. + DenseMap<SampleContext, uint64_t> FuncOffsetTable; + + /// Function offset mapping ordered by contexts. + std::unique_ptr<std::vector<std::pair<SampleContext, uint64_t>>> + OrderedFuncOffsets; + + /// The set containing the functions to use when compiling a module. + DenseSet<StringRef> FuncsToUse; + + /// Use fixed length MD5 instead of ULEB128 encoding so NameTable doesn't + /// need to be read in up front and can be directly accessed using index. + bool FixedLengthMD5 = false; + /// The starting address of NameTable containing fixed length MD5. + const uint8_t *MD5NameMemStart = nullptr; + + /// If MD5 is used in NameTable section, the section saves uint64_t data. + /// The uint64_t data has to be converted to a string and then the string + /// will be used to initialize StringRef in NameTable. + /// Note NameTable contains StringRef so it needs another buffer to own + /// the string data. MD5StringBuf serves as the string buffer that is + /// referenced by NameTable (vector of StringRef). We make sure + /// the lifetime of MD5StringBuf is not shorter than that of NameTable. + std::unique_ptr<std::vector<std::string>> MD5StringBuf; + + /// CSNameTable is used to save full context vectors. This serves as an + /// underlying immutable buffer for all clients. + std::unique_ptr<const std::vector<SampleContextFrameVector>> CSNameTable; + + /// If SkipFlatProf is true, skip the sections with + /// SecFlagFlat flag. + bool SkipFlatProf = false; + + bool FuncOffsetsOrdered = false; + +public: + SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B, + LLVMContext &C, SampleProfileFormat Format) + : SampleProfileReaderBinary(std::move(B), C, Format) {} + + /// Read sample profiles in extensible format from the associated file. + std::error_code readImpl() override; + + /// Get the total size of all \p Type sections. + uint64_t getSectionSize(SecType Type); + /// Get the total size of header and all sections. + uint64_t getFileSize(); + virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) override; + + /// Collect functions with definitions in Module M. Return true if + /// the reader has been given a module. + bool collectFuncsFromModule() override; + + /// Return whether names in the profile are all MD5 numbers. + virtual bool useMD5() override { return MD5StringBuf.get(); } + + virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override { + return std::move(ProfSymList); + }; + + virtual void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; } +}; + +class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase { +private: + virtual std::error_code verifySPMagic(uint64_t Magic) override; + virtual std::error_code + readCustomSection(const SecHdrTableEntry &Entry) override { + // Update the data reader pointer to the end of the section. + Data = End; + return sampleprof_error::success; + }; + +public: + SampleProfileReaderExtBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C, + SampleProfileFormat Format = SPF_Ext_Binary) + : SampleProfileReaderExtBinaryBase(std::move(B), C, Format) {} + + /// \brief Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); +}; + +class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary { +private: + /// Function name table. + std::vector<std::string> NameTable; + /// The table mapping from function name to the offset of its FunctionSample + /// towards file start. + DenseMap<StringRef, uint64_t> FuncOffsetTable; + /// The set containing the functions to use when compiling a module. + DenseSet<StringRef> FuncsToUse; + virtual std::error_code verifySPMagic(uint64_t Magic) override; + virtual std::error_code readNameTable() override; + /// Read a string indirectly via the name table. + virtual ErrorOr<StringRef> readStringFromTable() override; + virtual std::error_code readHeader() override; + std::error_code readFuncOffsetTable(); + +public: + SampleProfileReaderCompactBinary(std::unique_ptr<MemoryBuffer> B, + LLVMContext &C) + : SampleProfileReaderBinary(std::move(B), C, SPF_Compact_Binary) {} + + /// \brief Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); + + /// Read samples only for functions to use. + std::error_code readImpl() override; + + /// Collect functions with definitions in Module M. Return true if + /// the reader has been given a module. + bool collectFuncsFromModule() override; + + /// Return whether names in the profile are all MD5 numbers. + virtual bool useMD5() override { return true; } +}; + +using InlineCallStack = SmallVector<FunctionSamples *, 10>; + +// Supported histogram types in GCC. Currently, we only need support for +// call target histograms. +enum HistType { + HIST_TYPE_INTERVAL, + HIST_TYPE_POW2, + HIST_TYPE_SINGLE_VALUE, + HIST_TYPE_CONST_DELTA, + HIST_TYPE_INDIR_CALL, + HIST_TYPE_AVERAGE, + HIST_TYPE_IOR, + HIST_TYPE_INDIR_CALL_TOPN +}; + +class SampleProfileReaderGCC : public SampleProfileReader { +public: + SampleProfileReaderGCC(std::unique_ptr<MemoryBuffer> B, LLVMContext &C) + : SampleProfileReader(std::move(B), C, SPF_GCC), + GcovBuffer(Buffer.get()) {} + + /// Read and validate the file header. + std::error_code readHeader() override; + + /// Read sample profiles from the associated file. + std::error_code readImpl() override; + + /// Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); + +protected: + std::error_code readNameTable(); + std::error_code readOneFunctionProfile(const InlineCallStack &InlineStack, + bool Update, uint32_t Offset); + std::error_code readFunctionProfiles(); + std::error_code skipNextWord(); + template <typename T> ErrorOr<T> readNumber(); + ErrorOr<StringRef> readString(); + + /// Read the section tag and check that it's the same as \p Expected. + std::error_code readSectionTag(uint32_t Expected); + + /// GCOV buffer containing the profile. + GCOVBuffer GcovBuffer; + + /// Function names in this profile. + std::vector<std::string> Names; + + /// GCOV tags used to separate sections in the profile file. + static const uint32_t GCOVTagAFDOFileNames = 0xaa000000; + static const uint32_t GCOVTagAFDOFunction = 0xac000000; +}; + +} // end namespace sampleprof + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_SAMPLEPROFREADER_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/contrib/libs/llvm14/include/llvm/ProfileData/SampleProfWriter.h b/contrib/libs/llvm14/include/llvm/ProfileData/SampleProfWriter.h new file mode 100644 index 0000000000..6b2d4de98d --- /dev/null +++ b/contrib/libs/llvm14/include/llvm/ProfileData/SampleProfWriter.h @@ -0,0 +1,418 @@ +#pragma once + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//===- SampleProfWriter.h - Write LLVM sample profile data ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains definitions needed for writing sample profiles. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_PROFILEDATA_SAMPLEPROFWRITER_H +#define LLVM_PROFILEDATA_SAMPLEPROFWRITER_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdint> +#include <memory> +#include <set> +#include <system_error> +#include <unordered_set> + +namespace llvm { +namespace sampleprof { + +enum SectionLayout { + DefaultLayout, + // The layout splits profile with context information from profile without + // context information. When Thinlto is enabled, ThinLTO postlink phase only + // has to load profile with context information and can skip the other part. + CtxSplitLayout, + NumOfLayout, +}; + +/// Sample-based profile writer. Base class. +class SampleProfileWriter { +public: + virtual ~SampleProfileWriter() = default; + + /// Write sample profiles in \p S. + /// + /// \returns status code of the file update operation. + virtual std::error_code writeSample(const FunctionSamples &S) = 0; + + /// Write all the sample profiles in the given map of samples. + /// + /// \returns status code of the file update operation. + virtual std::error_code write(const SampleProfileMap &ProfileMap); + + raw_ostream &getOutputStream() { return *OutputStream; } + + /// Profile writer factory. + /// + /// Create a new file writer based on the value of \p Format. + static ErrorOr<std::unique_ptr<SampleProfileWriter>> + create(StringRef Filename, SampleProfileFormat Format); + + /// Create a new stream writer based on the value of \p Format. + /// For testing. + static ErrorOr<std::unique_ptr<SampleProfileWriter>> + create(std::unique_ptr<raw_ostream> &OS, SampleProfileFormat Format); + + virtual void setProfileSymbolList(ProfileSymbolList *PSL) {} + virtual void setToCompressAllSections() {} + virtual void setUseMD5() {} + virtual void setPartialProfile() {} + virtual void resetSecLayout(SectionLayout SL) {} + +protected: + SampleProfileWriter(std::unique_ptr<raw_ostream> &OS) + : OutputStream(std::move(OS)) {} + + /// Write a file header for the profile file. + virtual std::error_code writeHeader(const SampleProfileMap &ProfileMap) = 0; + + // Write function profiles to the profile file. + virtual std::error_code writeFuncProfiles(const SampleProfileMap &ProfileMap); + + /// Output stream where to emit the profile to. + std::unique_ptr<raw_ostream> OutputStream; + + /// Profile summary. + std::unique_ptr<ProfileSummary> Summary; + + /// Compute summary for this profile. + void computeSummary(const SampleProfileMap &ProfileMap); + + /// Profile format. + SampleProfileFormat Format = SPF_None; +}; + +/// Sample-based profile writer (text format). +class SampleProfileWriterText : public SampleProfileWriter { +public: + std::error_code writeSample(const FunctionSamples &S) override; + +protected: + SampleProfileWriterText(std::unique_ptr<raw_ostream> &OS) + : SampleProfileWriter(OS), Indent(0) {} + + std::error_code writeHeader(const SampleProfileMap &ProfileMap) override { + return sampleprof_error::success; + } + +private: + /// Indent level to use when writing. + /// + /// This is used when printing inlined callees. + unsigned Indent; + + friend ErrorOr<std::unique_ptr<SampleProfileWriter>> + SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS, + SampleProfileFormat Format); +}; + +/// Sample-based profile writer (binary format). +class SampleProfileWriterBinary : public SampleProfileWriter { +public: + SampleProfileWriterBinary(std::unique_ptr<raw_ostream> &OS) + : SampleProfileWriter(OS) {} + + virtual std::error_code writeSample(const FunctionSamples &S) override; + +protected: + virtual MapVector<StringRef, uint32_t> &getNameTable() { return NameTable; } + virtual std::error_code writeMagicIdent(SampleProfileFormat Format); + virtual std::error_code writeNameTable(); + virtual std::error_code + writeHeader(const SampleProfileMap &ProfileMap) override; + std::error_code writeSummary(); + virtual std::error_code writeContextIdx(const SampleContext &Context); + std::error_code writeNameIdx(StringRef FName); + std::error_code writeBody(const FunctionSamples &S); + inline void stablizeNameTable(MapVector<StringRef, uint32_t> &NameTable, + std::set<StringRef> &V); + + MapVector<StringRef, uint32_t> NameTable; + + void addName(StringRef FName); + virtual void addContext(const SampleContext &Context); + void addNames(const FunctionSamples &S); + +private: + friend ErrorOr<std::unique_ptr<SampleProfileWriter>> + SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS, + SampleProfileFormat Format); +}; + +class SampleProfileWriterRawBinary : public SampleProfileWriterBinary { + using SampleProfileWriterBinary::SampleProfileWriterBinary; +}; + +const std::array<SmallVector<SecHdrTableEntry, 8>, NumOfLayout> + ExtBinaryHdrLayoutTable = { + // Note that SecFuncOffsetTable section is written after SecLBRProfile + // in the profile, but is put before SecLBRProfile in SectionHdrLayout. + // This is because sample reader follows the order in SectionHdrLayout + // to read each section. To read function profiles on demand, sample + // reader need to get the offset of each function profile first. + // + // DefaultLayout + SmallVector<SecHdrTableEntry, 8>({{SecProfSummary, 0, 0, 0, 0}, + {SecNameTable, 0, 0, 0, 0}, + {SecCSNameTable, 0, 0, 0, 0}, + {SecFuncOffsetTable, 0, 0, 0, 0}, + {SecLBRProfile, 0, 0, 0, 0}, + {SecProfileSymbolList, 0, 0, 0, 0}, + {SecFuncMetadata, 0, 0, 0, 0}}), + // CtxSplitLayout + SmallVector<SecHdrTableEntry, 8>({{SecProfSummary, 0, 0, 0, 0}, + {SecNameTable, 0, 0, 0, 0}, + // profile with context + // for next two sections + {SecFuncOffsetTable, 0, 0, 0, 0}, + {SecLBRProfile, 0, 0, 0, 0}, + // profile without context + // for next two sections + {SecFuncOffsetTable, 0, 0, 0, 0}, + {SecLBRProfile, 0, 0, 0, 0}, + {SecProfileSymbolList, 0, 0, 0, 0}, + {SecFuncMetadata, 0, 0, 0, 0}}), +}; + +class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary { + using SampleProfileWriterBinary::SampleProfileWriterBinary; +public: + virtual std::error_code write(const SampleProfileMap &ProfileMap) override; + + virtual void setToCompressAllSections() override; + void setToCompressSection(SecType Type); + virtual std::error_code writeSample(const FunctionSamples &S) override; + + // Set to use MD5 to represent string in NameTable. + virtual void setUseMD5() override { + UseMD5 = true; + addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagMD5Name); + // MD5 will be stored as plain uint64_t instead of variable-length + // quantity format in NameTable section. + addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagFixedLengthMD5); + } + + // Set the profile to be partial. It means the profile is for + // common/shared code. The common profile is usually merged from + // profiles collected from running other targets. + virtual void setPartialProfile() override { + addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagPartial); + } + + virtual void setProfileSymbolList(ProfileSymbolList *PSL) override { + ProfSymList = PSL; + }; + + virtual void resetSecLayout(SectionLayout SL) override { + verifySecLayout(SL); +#ifndef NDEBUG + // Make sure resetSecLayout is called before any flag setting. + for (auto &Entry : SectionHdrLayout) { + assert(Entry.Flags == 0 && + "resetSecLayout has to be called before any flag setting"); + } +#endif + SecLayout = SL; + SectionHdrLayout = ExtBinaryHdrLayoutTable[SL]; + } + +protected: + uint64_t markSectionStart(SecType Type, uint32_t LayoutIdx); + std::error_code addNewSection(SecType Sec, uint32_t LayoutIdx, + uint64_t SectionStart); + template <class SecFlagType> + void addSectionFlag(SecType Type, SecFlagType Flag) { + for (auto &Entry : SectionHdrLayout) { + if (Entry.Type == Type) + addSecFlag(Entry, Flag); + } + } + template <class SecFlagType> + void addSectionFlag(uint32_t SectionIdx, SecFlagType Flag) { + addSecFlag(SectionHdrLayout[SectionIdx], Flag); + } + + virtual void addContext(const SampleContext &Context) override; + + // placeholder for subclasses to dispatch their own section writers. + virtual std::error_code writeCustomSection(SecType Type) = 0; + // Verify the SecLayout is supported by the format. + virtual void verifySecLayout(SectionLayout SL) = 0; + + // specify the order to write sections. + virtual std::error_code writeSections(const SampleProfileMap &ProfileMap) = 0; + + // Dispatch section writer for each section. \p LayoutIdx is the sequence + // number indicating where the section is located in SectionHdrLayout. + virtual std::error_code writeOneSection(SecType Type, uint32_t LayoutIdx, + const SampleProfileMap &ProfileMap); + + // Helper function to write name table. + virtual std::error_code writeNameTable() override; + virtual std::error_code + writeContextIdx(const SampleContext &Context) override; + std::error_code writeCSNameIdx(const SampleContext &Context); + std::error_code writeCSNameTableSection(); + + std::error_code writeFuncMetadata(const SampleProfileMap &Profiles); + std::error_code writeFuncMetadata(const FunctionSamples &Profile); + + // Functions to write various kinds of sections. + std::error_code writeNameTableSection(const SampleProfileMap &ProfileMap); + std::error_code writeFuncOffsetTable(); + std::error_code writeProfileSymbolListSection(); + + SectionLayout SecLayout = DefaultLayout; + // Specifiy the order of sections in section header table. Note + // the order of sections in SecHdrTable may be different that the + // order in SectionHdrLayout. sample Reader will follow the order + // in SectionHdrLayout to read each section. + SmallVector<SecHdrTableEntry, 8> SectionHdrLayout = + ExtBinaryHdrLayoutTable[DefaultLayout]; + + // Save the start of SecLBRProfile so we can compute the offset to the + // start of SecLBRProfile for each Function's Profile and will keep it + // in FuncOffsetTable. + uint64_t SecLBRProfileStart = 0; + +private: + void allocSecHdrTable(); + std::error_code writeSecHdrTable(); + virtual std::error_code + writeHeader(const SampleProfileMap &ProfileMap) override; + std::error_code compressAndOutput(); + + // We will swap the raw_ostream held by LocalBufStream and that + // held by OutputStream if we try to add a section which needs + // compression. After the swap, all the data written to output + // will be temporarily buffered into the underlying raw_string_ostream + // originally held by LocalBufStream. After the data writing for the + // section is completed, compress the data in the local buffer, + // swap the raw_ostream back and write the compressed data to the + // real output. + std::unique_ptr<raw_ostream> LocalBufStream; + // The location where the output stream starts. + uint64_t FileStart; + // The location in the output stream where the SecHdrTable should be + // written to. + uint64_t SecHdrTableOffset; + // The table contains SecHdrTableEntry entries in order of how they are + // populated in the writer. It may be different from the order in + // SectionHdrLayout which specifies the sequence in which sections will + // be read. + std::vector<SecHdrTableEntry> SecHdrTable; + + // FuncOffsetTable maps function context to its profile offset in + // SecLBRProfile section. It is used to load function profile on demand. + MapVector<SampleContext, uint64_t> FuncOffsetTable; + // Whether to use MD5 to represent string. + bool UseMD5 = false; + + /// CSNameTable maps function context to its offset in SecCSNameTable section. + /// The offset will be used everywhere where the context is referenced. + MapVector<SampleContext, uint32_t> CSNameTable; + + ProfileSymbolList *ProfSymList = nullptr; +}; + +class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase { +public: + SampleProfileWriterExtBinary(std::unique_ptr<raw_ostream> &OS) + : SampleProfileWriterExtBinaryBase(OS) {} + +private: + std::error_code writeDefaultLayout(const SampleProfileMap &ProfileMap); + std::error_code writeCtxSplitLayout(const SampleProfileMap &ProfileMap); + + virtual std::error_code + writeSections(const SampleProfileMap &ProfileMap) override; + + virtual std::error_code writeCustomSection(SecType Type) override { + return sampleprof_error::success; + }; + + virtual void verifySecLayout(SectionLayout SL) override { + assert((SL == DefaultLayout || SL == CtxSplitLayout) && + "Unsupported layout"); + } +}; + +// CompactBinary is a compact format of binary profile which both reduces +// the profile size and the load time needed when compiling. It has two +// major difference with Binary format. +// 1. It represents all the strings in name table using md5 hash. +// 2. It saves a function offset table which maps function name index to +// the offset of its function profile to the start of the binary profile, +// so by using the function offset table, for those function profiles which +// will not be needed when compiling a module, the profile reader does't +// have to read them and it saves compile time if the profile size is huge. +// The layout of the compact format is shown as follows: +// +// Part1: Profile header, the same as binary format, containing magic +// number, version, summary, name table... +// Part2: Function Offset Table Offset, which saves the position of +// Part4. +// Part3: Function profile collection +// function1 profile start +// .... +// function2 profile start +// .... +// function3 profile start +// .... +// ...... +// Part4: Function Offset Table +// function1 name index --> function1 profile start +// function2 name index --> function2 profile start +// function3 name index --> function3 profile start +// +// We need Part2 because profile reader can use it to find out and read +// function offset table without reading Part3 first. +class SampleProfileWriterCompactBinary : public SampleProfileWriterBinary { + using SampleProfileWriterBinary::SampleProfileWriterBinary; + +public: + virtual std::error_code writeSample(const FunctionSamples &S) override; + virtual std::error_code write(const SampleProfileMap &ProfileMap) override; + +protected: + /// The table mapping from function name to the offset of its FunctionSample + /// towards profile start. + MapVector<StringRef, uint64_t> FuncOffsetTable; + /// The offset of the slot to be filled with the offset of FuncOffsetTable + /// towards profile start. + uint64_t TableOffset; + virtual std::error_code writeNameTable() override; + virtual std::error_code + writeHeader(const SampleProfileMap &ProfileMap) override; + std::error_code writeFuncOffsetTable(); +}; + +} // end namespace sampleprof +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_SAMPLEPROFWRITER_H + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif |