aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/CodeGen/MachineFunctionSplitter.cpp
blob: fd5285c0e4ac7636d909e065b884a575a5da75dc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===// 
// 
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
// See https://llvm.org/LICENSE.txt for license information. 
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
// 
//===----------------------------------------------------------------------===// 
// 
// \file 
// Uses profile information to split out cold blocks. 
// 
// This pass splits out cold machine basic blocks from the parent function. This 
// implementation leverages the basic block section framework. Blocks marked 
// cold by this pass are grouped together in a separate section prefixed with 
// ".text.unlikely.*". The linker can then group these together as a cold 
// section. The split part of the function is a contiguous region identified by 
// the symbol "foo.cold". Grouping all cold blocks across functions together 
// decreases fragmentation and improves icache and itlb utilization. Note that 
// the overall changes to the binary size are negligible; only a small number of 
// additional jump instructions may be introduced. 
// 
// For the original RFC of this pass please see 
// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ 
//===----------------------------------------------------------------------===// 
 
#include "llvm/ADT/Statistic.h" 
#include "llvm/Analysis/ProfileSummaryInfo.h" 
#include "llvm/CodeGen/BasicBlockSectionUtils.h" 
#include "llvm/CodeGen/MachineBasicBlock.h" 
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 
#include "llvm/CodeGen/MachineFunction.h" 
#include "llvm/CodeGen/MachineFunctionPass.h" 
#include "llvm/CodeGen/MachineModuleInfo.h" 
#include "llvm/CodeGen/Passes.h" 
#include "llvm/IR/Function.h" 
#include "llvm/IR/Module.h" 
#include "llvm/InitializePasses.h" 
#include "llvm/Support/CommandLine.h" 
 
using namespace llvm; 
 
// FIXME: This cutoff value is CPU dependent and should be moved to 
// TargetTransformInfo once we consider enabling this on other platforms. 
// The value is expressed as a ProfileSummaryInfo integer percentile cutoff. 
// Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split. 
// The default was empirically determined to be optimal when considering cutoff 
// values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on 
// Intel CPUs. 
static cl::opt<unsigned> 
    PercentileCutoff("mfs-psi-cutoff", 
                     cl::desc("Percentile profile summary cutoff used to " 
                              "determine cold blocks. Unused if set to zero."), 
                     cl::init(999950), cl::Hidden); 
 
static cl::opt<unsigned> ColdCountThreshold( 
    "mfs-count-threshold", 
    cl::desc( 
        "Minimum number of times a block must be executed to be retained."), 
    cl::init(1), cl::Hidden); 
 
namespace { 
 
class MachineFunctionSplitter : public MachineFunctionPass { 
public: 
  static char ID; 
  MachineFunctionSplitter() : MachineFunctionPass(ID) { 
    initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); 
  } 
 
  StringRef getPassName() const override { 
    return "Machine Function Splitter Transformation"; 
  } 
 
  void getAnalysisUsage(AnalysisUsage &AU) const override; 
 
  bool runOnMachineFunction(MachineFunction &F) override; 
}; 
} // end anonymous namespace 
 
static bool isColdBlock(MachineBasicBlock &MBB, 
                        const MachineBlockFrequencyInfo *MBFI, 
                        ProfileSummaryInfo *PSI) { 
  Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); 
  if (!Count.hasValue()) 
    return true; 
 
  if (PercentileCutoff > 0) { 
    return PSI->isColdCountNthPercentile(PercentileCutoff, *Count); 
  } 
  return (*Count < ColdCountThreshold); 
} 
 
bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { 
  // TODO: We only target functions with profile data. Static information may 
  // also be considered but we don't see performance improvements yet. 
  if (!MF.getFunction().hasProfileData()) 
    return false; 
 
  // TODO: We don't split functions where a section attribute has been set 
  // since the split part may not be placed in a contiguous region. It may also 
  // be more beneficial to augment the linker to ensure contiguous layout of 
  // split functions within the same section as specified by the attribute. 
  if (!MF.getFunction().getSection().empty()) 
    return false; 
 
  // We don't want to proceed further for cold functions 
  // or functions of unknown hotness. Lukewarm functions have no prefix. 
  Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix(); 
  if (SectionPrefix.hasValue() && 
      (SectionPrefix.getValue().equals("unlikely") || 
       SectionPrefix.getValue().equals("unknown"))) { 
    return false; 
  } 
 
  // Renumbering blocks here preserves the order of the blocks as 
  // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort 
  // blocks. Preserving the order of blocks is essential to retaining decisions 
  // made by prior passes such as MachineBlockPlacement. 
  MF.RenumberBlocks(); 
  MF.setBBSectionsType(BasicBlockSection::Preset); 
  auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 
  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 
 
  for (auto &MBB : MF) { 
    // FIXME: We retain the entry block and conservatively keep all landing pad 
    // blocks as part of the original function. Once D73739 is submitted, we can 
    // improve the handling of ehpads. 
    if ((MBB.pred_empty() || MBB.isEHPad())) 
      continue; 
    if (isColdBlock(MBB, MBFI, PSI)) 
      MBB.setSectionID(MBBSectionID::ColdSectionID); 
  } 
 
  auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { 
    return X.getSectionID().Type < Y.getSectionID().Type; 
  }; 
  llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); 
 
  return true; 
} 
 
void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { 
  AU.addRequired<MachineModuleInfoWrapperPass>(); 
  AU.addRequired<MachineBlockFrequencyInfo>(); 
  AU.addRequired<ProfileSummaryInfoWrapperPass>(); 
} 
 
char MachineFunctionSplitter::ID = 0; 
INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter", 
                "Split machine functions using profile information", false, 
                false) 
 
MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { 
  return new MachineFunctionSplitter(); 
}