1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
|
//==-- AArch64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file When allowed by the instruction, replace a dead definition of a GPR
/// with the zero register. This makes the code a bit friendlier towards the
/// hardware's register renamer.
//===----------------------------------------------------------------------===//
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-dead-defs"
STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
#define AARCH64_DEAD_REG_DEF_NAME "AArch64 Dead register definitions"
namespace {
class AArch64DeadRegisterDefinitions : public MachineFunctionPass {
private:
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
bool Changed;
void processMachineBasicBlock(MachineBasicBlock &MBB);
public:
static char ID; // Pass identification, replacement for typeid.
AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {
initializeAArch64DeadRegisterDefinitionsPass(
*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &F) override;
StringRef getPassName() const override { return AARCH64_DEAD_REG_DEF_NAME; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
char AArch64DeadRegisterDefinitions::ID = 0;
} // end anonymous namespace
INITIALIZE_PASS(AArch64DeadRegisterDefinitions, "aarch64-dead-defs",
AARCH64_DEAD_REG_DEF_NAME, false, false)
static bool usesFrameIndex(const MachineInstr &MI) {
for (const MachineOperand &MO : MI.uses())
if (MO.isFI())
return true;
return false;
}
// Instructions that lose their 'read' operation for a subesquent fence acquire
// (DMB LD) once the zero register is used.
//
// WARNING: The aquire variants of the instructions are also affected, but they
// are split out into `atomicBarrierDroppedOnZero()` to support annotations on
// assembly.
static bool atomicReadDroppedOnZero(unsigned Opcode) {
switch (Opcode) {
case AArch64::LDADDB: case AArch64::LDADDH:
case AArch64::LDADDW: case AArch64::LDADDX:
case AArch64::LDADDLB: case AArch64::LDADDLH:
case AArch64::LDADDLW: case AArch64::LDADDLX:
case AArch64::LDCLRB: case AArch64::LDCLRH:
case AArch64::LDCLRW: case AArch64::LDCLRX:
case AArch64::LDCLRLB: case AArch64::LDCLRLH:
case AArch64::LDCLRLW: case AArch64::LDCLRLX:
case AArch64::LDEORB: case AArch64::LDEORH:
case AArch64::LDEORW: case AArch64::LDEORX:
case AArch64::LDEORLB: case AArch64::LDEORLH:
case AArch64::LDEORLW: case AArch64::LDEORLX:
case AArch64::LDSETB: case AArch64::LDSETH:
case AArch64::LDSETW: case AArch64::LDSETX:
case AArch64::LDSETLB: case AArch64::LDSETLH:
case AArch64::LDSETLW: case AArch64::LDSETLX:
case AArch64::LDSMAXB: case AArch64::LDSMAXH:
case AArch64::LDSMAXW: case AArch64::LDSMAXX:
case AArch64::LDSMAXLB: case AArch64::LDSMAXLH:
case AArch64::LDSMAXLW: case AArch64::LDSMAXLX:
case AArch64::LDSMINB: case AArch64::LDSMINH:
case AArch64::LDSMINW: case AArch64::LDSMINX:
case AArch64::LDSMINLB: case AArch64::LDSMINLH:
case AArch64::LDSMINLW: case AArch64::LDSMINLX:
case AArch64::LDUMAXB: case AArch64::LDUMAXH:
case AArch64::LDUMAXW: case AArch64::LDUMAXX:
case AArch64::LDUMAXLB: case AArch64::LDUMAXLH:
case AArch64::LDUMAXLW: case AArch64::LDUMAXLX:
case AArch64::LDUMINB: case AArch64::LDUMINH:
case AArch64::LDUMINW: case AArch64::LDUMINX:
case AArch64::LDUMINLB: case AArch64::LDUMINLH:
case AArch64::LDUMINLW: case AArch64::LDUMINLX:
return true;
}
return false;
}
void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
for (MachineInstr &MI : MBB) {
if (usesFrameIndex(MI)) {
// We need to skip this instruction because while it appears to have a
// dead def it uses a frame index which might expand into a multi
// instruction sequence during EPI.
LLVM_DEBUG(dbgs() << " Ignoring, operand is frame index\n");
continue;
}
if (MI.definesRegister(AArch64::XZR) || MI.definesRegister(AArch64::WZR)) {
// It is not allowed to write to the same register (not even the zero
// register) twice in a single instruction.
LLVM_DEBUG(
dbgs()
<< " Ignoring, XZR or WZR already used by the instruction\n");
continue;
}
if (atomicBarrierDroppedOnZero(MI.getOpcode()) || atomicReadDroppedOnZero(MI.getOpcode())) {
LLVM_DEBUG(dbgs() << " Ignoring, semantics change with xzr/wzr.\n");
continue;
}
const MCInstrDesc &Desc = MI.getDesc();
for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg() || !MO.isDef())
continue;
// We should not have any relevant physreg defs that are replacable by
// zero before register allocation. So we just check for dead vreg defs.
Register Reg = MO.getReg();
if (!Reg.isVirtual() || (!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
continue;
assert(!MO.isImplicit() && "Unexpected implicit def!");
LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n ";
MI.print(dbgs()));
// Be careful not to change the register if it's a tied operand.
if (MI.isRegTiedToUseOperand(I)) {
LLVM_DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
continue;
}
const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
unsigned NewReg;
if (RC == nullptr) {
LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
continue;
} else if (RC->contains(AArch64::WZR))
NewReg = AArch64::WZR;
else if (RC->contains(AArch64::XZR))
NewReg = AArch64::XZR;
else {
LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
continue;
}
LLVM_DEBUG(dbgs() << " Replacing with zero register. New:\n ");
MO.setReg(NewReg);
MO.setIsDead();
LLVM_DEBUG(MI.print(dbgs()));
++NumDeadDefsReplaced;
Changed = true;
// Only replace one dead register, see check for zero register above.
break;
}
}
}
// Scan the function for instructions that have a dead definition of a
// register. Replace that register with the zero register when possible.
bool AArch64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
MRI = &MF.getRegInfo();
LLVM_DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n");
Changed = false;
for (auto &MBB : MF)
processMachineBasicBlock(MBB);
return Changed;
}
FunctionPass *llvm::createAArch64DeadRegisterDefinitions() {
return new AArch64DeadRegisterDefinitions();
}
|