aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp')
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp2062
1 files changed, 1031 insertions, 1031 deletions
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index fc5ef02e84..72f92065f3 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -18,7 +18,7 @@
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
@@ -34,18 +34,18 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/IntrinsicsAArch64.h"
-#include "llvm/Pass.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "aarch64-isel"
using namespace llvm;
-using namespace MIPatternMatch;
+using namespace MIPatternMatch;
namespace {
@@ -103,23 +103,23 @@ private:
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
- ///@{
- /// Helper functions for selectCompareBranch.
- bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
- MachineIRBuilder &MIB) const;
- bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
- MachineIRBuilder &MIB) const;
- bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
- MachineIRBuilder &MIB) const;
- bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
+ ///@{
+ /// Helper functions for selectCompareBranch.
+ bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
+ MachineIRBuilder &MIB) const;
+ bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
+ MachineIRBuilder &MIB) const;
+ bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
+ MachineIRBuilder &MIB) const;
+ bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
- ///@}
-
+ ///@}
+
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
- bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
// Helper to generate an equivalent of scalar_to_vector into a new register,
@@ -160,7 +160,7 @@ private:
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
@@ -173,72 +173,72 @@ private:
MachineIRBuilder &MIRBuilder) const;
// Emit an integer compare between LHS and RHS, which checks for Predicate.
- MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
- MachineOperand &Predicate,
- MachineIRBuilder &MIRBuilder) const;
-
- /// Emit a floating point comparison between \p LHS and \p RHS.
- /// \p Pred if given is the intended predicate to use.
- MachineInstr *emitFPCompare(Register LHS, Register RHS,
- MachineIRBuilder &MIRBuilder,
- Optional<CmpInst::Predicate> = None) const;
-
- MachineInstr *emitInstr(unsigned Opcode,
- std::initializer_list<llvm::DstOp> DstOps,
- std::initializer_list<llvm::SrcOp> SrcOps,
- MachineIRBuilder &MIRBuilder,
- const ComplexRendererFns &RenderFns = None) const;
- /// Helper function to emit an add or sub instruction.
- ///
- /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
- /// in a specific order.
- ///
- /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
- ///
- /// \code
- /// const std::array<std::array<unsigned, 2>, 4> Table {
- /// {{AArch64::ADDXri, AArch64::ADDWri},
- /// {AArch64::ADDXrs, AArch64::ADDWrs},
- /// {AArch64::ADDXrr, AArch64::ADDWrr},
- /// {AArch64::SUBXri, AArch64::SUBWri},
- /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
- /// \endcode
- ///
- /// Each row in the table corresponds to a different addressing mode. Each
- /// column corresponds to a different register size.
- ///
- /// \attention Rows must be structured as follows:
- /// - Row 0: The ri opcode variants
- /// - Row 1: The rs opcode variants
- /// - Row 2: The rr opcode variants
- /// - Row 3: The ri opcode variants for negative immediates
- /// - Row 4: The rx opcode variants
- ///
- /// \attention Columns must be structured as follows:
- /// - Column 0: The 64-bit opcode variants
- /// - Column 1: The 32-bit opcode variants
- ///
- /// \p Dst is the destination register of the binop to emit.
- /// \p LHS is the left-hand operand of the binop to emit.
- /// \p RHS is the right-hand operand of the binop to emit.
- MachineInstr *emitAddSub(
- const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
- Register Dst, MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
- MachineOperand &RHS,
+ MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
+ MachineOperand &Predicate,
+ MachineIRBuilder &MIRBuilder) const;
+
+ /// Emit a floating point comparison between \p LHS and \p RHS.
+ /// \p Pred if given is the intended predicate to use.
+ MachineInstr *emitFPCompare(Register LHS, Register RHS,
+ MachineIRBuilder &MIRBuilder,
+ Optional<CmpInst::Predicate> = None) const;
+
+ MachineInstr *emitInstr(unsigned Opcode,
+ std::initializer_list<llvm::DstOp> DstOps,
+ std::initializer_list<llvm::SrcOp> SrcOps,
+ MachineIRBuilder &MIRBuilder,
+ const ComplexRendererFns &RenderFns = None) const;
+ /// Helper function to emit an add or sub instruction.
+ ///
+ /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
+ /// in a specific order.
+ ///
+ /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
+ ///
+ /// \code
+ /// const std::array<std::array<unsigned, 2>, 4> Table {
+ /// {{AArch64::ADDXri, AArch64::ADDWri},
+ /// {AArch64::ADDXrs, AArch64::ADDWrs},
+ /// {AArch64::ADDXrr, AArch64::ADDWrr},
+ /// {AArch64::SUBXri, AArch64::SUBWri},
+ /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
+ /// \endcode
+ ///
+ /// Each row in the table corresponds to a different addressing mode. Each
+ /// column corresponds to a different register size.
+ ///
+ /// \attention Rows must be structured as follows:
+ /// - Row 0: The ri opcode variants
+ /// - Row 1: The rs opcode variants
+ /// - Row 2: The rr opcode variants
+ /// - Row 3: The ri opcode variants for negative immediates
+ /// - Row 4: The rx opcode variants
+ ///
+ /// \attention Columns must be structured as follows:
+ /// - Column 0: The 64-bit opcode variants
+ /// - Column 1: The 32-bit opcode variants
+ ///
+ /// \p Dst is the destination register of the binop to emit.
+ /// \p LHS is the left-hand operand of the binop to emit.
+ /// \p RHS is the right-hand operand of the binop to emit.
+ MachineInstr *emitAddSub(
+ const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
+ Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
+ MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
+ MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
- AArch64CC::CondCode CC,
- MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
+ AArch64CC::CondCode CC,
+ MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx,
@@ -250,25 +250,25 @@ private:
MachineInstr *emitFMovForFConstant(MachineInstr &MI,
MachineRegisterInfo &MRI) const;
- /// Emit a CSet for an integer compare.
- ///
- /// \p DefReg is expected to be a 32-bit scalar register.
+ /// Emit a CSet for an integer compare.
+ ///
+ /// \p DefReg is expected to be a 32-bit scalar register.
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
MachineIRBuilder &MIRBuilder) const;
- /// Emit a CSet for a FP compare.
- ///
- /// \p Dst is expected to be a 32-bit scalar register.
- MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
- MachineIRBuilder &MIRBuilder) const;
-
- /// Emit the overflow op for \p Opcode.
- ///
- /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
- /// G_USUBO, etc.
- std::pair<MachineInstr *, AArch64CC::CondCode>
- emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
- MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
-
+ /// Emit a CSet for a FP compare.
+ ///
+ /// \p Dst is expected to be a 32-bit scalar register.
+ MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
+ MachineIRBuilder &MIRBuilder) const;
+
+ /// Emit the overflow op for \p Opcode.
+ ///
+ /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
+ /// G_USUBO, etc.
+ std::pair<MachineInstr *, AArch64CC::CondCode>
+ emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
+
/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
/// \p IsNegative is true if the test should be "not zero".
/// This will also optimize the test bit instruction when possible.
@@ -276,11 +276,11 @@ private:
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
- /// Emit a CB(N)Z instruction which branches to \p DestMBB.
- MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
- MachineBasicBlock *DestMBB,
- MachineIRBuilder &MIB) const;
-
+ /// Emit a CB(N)Z instruction which branches to \p DestMBB.
+ MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
+ MachineBasicBlock *DestMBB,
+ MachineIRBuilder &MIB) const;
+
// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
// We use these manually instead of using the importer since it doesn't
// support SDNodeXForm.
@@ -577,7 +577,7 @@ static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
if (!ValAndVReg)
return None;
- Immed = ValAndVReg->Value.getSExtValue();
+ Immed = ValAndVReg->Value.getSExtValue();
} else
return None;
return Immed;
@@ -865,7 +865,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
#ifndef NDEBUG
ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
assert(ValidCopy && "Invalid copy.");
- (void)KnownValid;
+ (void)KnownValid;
#endif
return ValidCopy;
};
@@ -1012,173 +1012,173 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
return GenericOpc;
}
-MachineInstr *
-AArch64InstructionSelector::emitSelect(Register Dst, Register True,
- Register False, AArch64CC::CondCode CC,
- MachineIRBuilder &MIB) const {
- MachineRegisterInfo &MRI = *MIB.getMRI();
- assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
- RBI.getRegBank(True, MRI, TRI)->getID() &&
- "Expected both select operands to have the same regbank?");
- LLT Ty = MRI.getType(True);
- if (Ty.isVector())
- return nullptr;
- const unsigned Size = Ty.getSizeInBits();
- assert((Size == 32 || Size == 64) &&
- "Expected 32 bit or 64 bit select only?");
- const bool Is32Bit = Size == 32;
- if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
- unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
- auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
- constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
- return &*FCSel;
- }
-
- // By default, we'll try and emit a CSEL.
- unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
- bool Optimized = false;
- auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
- &Optimized](Register &Reg, Register &OtherReg,
- bool Invert) {
- if (Optimized)
- return false;
-
- // Attempt to fold:
- //
- // %sub = G_SUB 0, %x
- // %select = G_SELECT cc, %reg, %sub
- //
- // Into:
- // %select = CSNEG %reg, %x, cc
- Register MatchReg;
- if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
- Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
- Reg = MatchReg;
- if (Invert) {
- CC = AArch64CC::getInvertedCondCode(CC);
- std::swap(Reg, OtherReg);
- }
- return true;
- }
-
- // Attempt to fold:
- //
- // %xor = G_XOR %x, -1
- // %select = G_SELECT cc, %reg, %xor
- //
- // Into:
- // %select = CSINV %reg, %x, cc
- if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
- Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
- Reg = MatchReg;
- if (Invert) {
- CC = AArch64CC::getInvertedCondCode(CC);
- std::swap(Reg, OtherReg);
- }
- return true;
- }
-
- // Attempt to fold:
- //
- // %add = G_ADD %x, 1
- // %select = G_SELECT cc, %reg, %add
- //
- // Into:
- // %select = CSINC %reg, %x, cc
- if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
- Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
- Reg = MatchReg;
- if (Invert) {
- CC = AArch64CC::getInvertedCondCode(CC);
- std::swap(Reg, OtherReg);
- }
- return true;
- }
-
+MachineInstr *
+AArch64InstructionSelector::emitSelect(Register Dst, Register True,
+ Register False, AArch64CC::CondCode CC,
+ MachineIRBuilder &MIB) const {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
+ RBI.getRegBank(True, MRI, TRI)->getID() &&
+ "Expected both select operands to have the same regbank?");
+ LLT Ty = MRI.getType(True);
+ if (Ty.isVector())
+ return nullptr;
+ const unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) &&
+ "Expected 32 bit or 64 bit select only?");
+ const bool Is32Bit = Size == 32;
+ if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
+ unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
+ auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
+ constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
+ return &*FCSel;
+ }
+
+ // By default, we'll try and emit a CSEL.
+ unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
+ bool Optimized = false;
+ auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
+ &Optimized](Register &Reg, Register &OtherReg,
+ bool Invert) {
+ if (Optimized)
+ return false;
+
+ // Attempt to fold:
+ //
+ // %sub = G_SUB 0, %x
+ // %select = G_SELECT cc, %reg, %sub
+ //
+ // Into:
+ // %select = CSNEG %reg, %x, cc
+ Register MatchReg;
+ if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
+ Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
+ Reg = MatchReg;
+ if (Invert) {
+ CC = AArch64CC::getInvertedCondCode(CC);
+ std::swap(Reg, OtherReg);
+ }
+ return true;
+ }
+
+ // Attempt to fold:
+ //
+ // %xor = G_XOR %x, -1
+ // %select = G_SELECT cc, %reg, %xor
+ //
+ // Into:
+ // %select = CSINV %reg, %x, cc
+ if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ Reg = MatchReg;
+ if (Invert) {
+ CC = AArch64CC::getInvertedCondCode(CC);
+ std::swap(Reg, OtherReg);
+ }
+ return true;
+ }
+
+ // Attempt to fold:
+ //
+ // %add = G_ADD %x, 1
+ // %select = G_SELECT cc, %reg, %add
+ //
+ // Into:
+ // %select = CSINC %reg, %x, cc
+ if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ Reg = MatchReg;
+ if (Invert) {
+ CC = AArch64CC::getInvertedCondCode(CC);
+ std::swap(Reg, OtherReg);
+ }
+ return true;
+ }
+
return false;
- };
-
- // Helper lambda which tries to use CSINC/CSINV for the instruction when its
- // true/false values are constants.
- // FIXME: All of these patterns already exist in tablegen. We should be
- // able to import these.
- auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
- &Optimized]() {
- if (Optimized)
- return false;
- auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
- auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
- if (!TrueCst && !FalseCst)
- return false;
-
- Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
- if (TrueCst && FalseCst) {
- int64_t T = TrueCst->Value.getSExtValue();
- int64_t F = FalseCst->Value.getSExtValue();
-
- if (T == 0 && F == 1) {
- // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
- True = ZReg;
- False = ZReg;
- return true;
- }
-
- if (T == 0 && F == -1) {
- // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
- True = ZReg;
- False = ZReg;
- return true;
- }
- }
-
- if (TrueCst) {
- int64_t T = TrueCst->Value.getSExtValue();
- if (T == 1) {
- // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
- True = False;
- False = ZReg;
- CC = AArch64CC::getInvertedCondCode(CC);
- return true;
- }
-
- if (T == -1) {
- // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
- True = False;
- False = ZReg;
- CC = AArch64CC::getInvertedCondCode(CC);
- return true;
- }
- }
-
- if (FalseCst) {
- int64_t F = FalseCst->Value.getSExtValue();
- if (F == 1) {
- // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
- False = ZReg;
- return true;
- }
-
- if (F == -1) {
- // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
- Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
- False = ZReg;
- return true;
- }
- }
- return false;
- };
-
- Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
- Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
- Optimized |= TryOptSelectCst();
- auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
- constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
- return &*SelectInst;
+ };
+
+ // Helper lambda which tries to use CSINC/CSINV for the instruction when its
+ // true/false values are constants.
+ // FIXME: All of these patterns already exist in tablegen. We should be
+ // able to import these.
+ auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
+ &Optimized]() {
+ if (Optimized)
+ return false;
+ auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
+ auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
+ if (!TrueCst && !FalseCst)
+ return false;
+
+ Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
+ if (TrueCst && FalseCst) {
+ int64_t T = TrueCst->Value.getSExtValue();
+ int64_t F = FalseCst->Value.getSExtValue();
+
+ if (T == 0 && F == 1) {
+ // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ True = ZReg;
+ False = ZReg;
+ return true;
+ }
+
+ if (T == 0 && F == -1) {
+ // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ True = ZReg;
+ False = ZReg;
+ return true;
+ }
+ }
+
+ if (TrueCst) {
+ int64_t T = TrueCst->Value.getSExtValue();
+ if (T == 1) {
+ // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ True = False;
+ False = ZReg;
+ CC = AArch64CC::getInvertedCondCode(CC);
+ return true;
+ }
+
+ if (T == -1) {
+ // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ True = False;
+ False = ZReg;
+ CC = AArch64CC::getInvertedCondCode(CC);
+ return true;
+ }
+ }
+
+ if (FalseCst) {
+ int64_t F = FalseCst->Value.getSExtValue();
+ if (F == 1) {
+ // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ False = ZReg;
+ return true;
+ }
+
+ if (F == -1) {
+ // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ False = ZReg;
+ return true;
+ }
+ }
+ return false;
+ };
+
+ Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
+ Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
+ Optimized |= TryOptSelectCst();
+ auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
+ constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
+ return &*SelectInst;
}
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
@@ -1308,7 +1308,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
}
if (VRegAndVal)
- C = VRegAndVal->Value.getSExtValue();
+ C = VRegAndVal->Value.getSExtValue();
break;
}
case TargetOpcode::G_ASHR:
@@ -1318,7 +1318,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
auto VRegAndVal =
getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
if (VRegAndVal)
- C = VRegAndVal->Value.getSExtValue();
+ C = VRegAndVal->Value.getSExtValue();
break;
}
}
@@ -1420,9 +1420,9 @@ MachineInstr *AArch64InstructionSelector::emitTestBit(
}
bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
- MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
- MachineIRBuilder &MIB) const {
- assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
+ MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
+ MachineIRBuilder &MIB) const {
+ assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
// Given something like this:
//
// %x = ...Something...
@@ -1444,92 +1444,92 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
// Check if the AND has a constant on its RHS which we can use as a mask.
// If it's a power of 2, then it's the same as checking a specific bit.
// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
- auto MaybeBit = getConstantVRegValWithLookThrough(
- AndInst.getOperand(2).getReg(), *MIB.getMRI());
- if (!MaybeBit)
+ auto MaybeBit = getConstantVRegValWithLookThrough(
+ AndInst.getOperand(2).getReg(), *MIB.getMRI());
+ if (!MaybeBit)
return false;
- int32_t Bit = MaybeBit->Value.exactLogBase2();
- if (Bit < 0)
- return false;
-
- Register TestReg = AndInst.getOperand(1).getReg();
+ int32_t Bit = MaybeBit->Value.exactLogBase2();
+ if (Bit < 0)
+ return false;
+ Register TestReg = AndInst.getOperand(1).getReg();
+
// Emit a TB(N)Z.
emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
return true;
}
-MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
- bool IsNegative,
- MachineBasicBlock *DestMBB,
- MachineIRBuilder &MIB) const {
- assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
- MachineRegisterInfo &MRI = *MIB.getMRI();
- assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
- AArch64::GPRRegBankID &&
- "Expected GPRs only?");
- auto Ty = MRI.getType(CompareReg);
- unsigned Width = Ty.getSizeInBits();
- assert(!Ty.isVector() && "Expected scalar only?");
- assert(Width <= 64 && "Expected width to be at most 64?");
- static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
- {AArch64::CBNZW, AArch64::CBNZX}};
- unsigned Opc = OpcTable[IsNegative][Width == 64];
- auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
- constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
- return &*BranchMI;
-}
-
-bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
- MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
- assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
- assert(I.getOpcode() == TargetOpcode::G_BRCOND);
- // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
- // totally clean. Some of them require two branches to implement.
- auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
- emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
- Pred);
- AArch64CC::CondCode CC1, CC2;
- changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
+MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
+ bool IsNegative,
+ MachineBasicBlock *DestMBB,
+ MachineIRBuilder &MIB) const {
+ assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
+ AArch64::GPRRegBankID &&
+ "Expected GPRs only?");
+ auto Ty = MRI.getType(CompareReg);
+ unsigned Width = Ty.getSizeInBits();
+ assert(!Ty.isVector() && "Expected scalar only?");
+ assert(Width <= 64 && "Expected width to be at most 64?");
+ static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
+ {AArch64::CBNZW, AArch64::CBNZX}};
+ unsigned Opc = OpcTable[IsNegative][Width == 64];
+ auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
+ constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
+ return &*BranchMI;
+}
+
+bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
+ MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
+ assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
+ assert(I.getOpcode() == TargetOpcode::G_BRCOND);
+ // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
+ // totally clean. Some of them require two branches to implement.
+ auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
+ emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
+ Pred);
+ AArch64CC::CondCode CC1, CC2;
+ changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
- if (CC2 != AArch64CC::AL)
- MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
- I.eraseFromParent();
- return true;
-}
-
-bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
- MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
- assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
- assert(I.getOpcode() == TargetOpcode::G_BRCOND);
- // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
- //
- // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
- // instructions will not be produced, as they are conditional branch
- // instructions that do not set flags.
- if (!ProduceNonFlagSettingCondBr)
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
+ if (CC2 != AArch64CC::AL)
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
+ MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
+ assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
+ assert(I.getOpcode() == TargetOpcode::G_BRCOND);
+ // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
+ //
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
+ // instructions will not be produced, as they are conditional branch
+ // instructions that do not set flags.
+ if (!ProduceNonFlagSettingCondBr)
return false;
- MachineRegisterInfo &MRI = *MIB.getMRI();
- MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- auto Pred =
- static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
- Register LHS = ICmp.getOperand(2).getReg();
- Register RHS = ICmp.getOperand(3).getReg();
-
- // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ auto Pred =
+ static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
+ Register LHS = ICmp.getOperand(2).getReg();
+ Register RHS = ICmp.getOperand(3).getReg();
+
+ // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
- MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
+ MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
// When we can emit a TB(N)Z, prefer that.
//
// Handle non-commutative condition codes first.
// Note that we don't want to do this when we have a G_AND because it can
// become a tst. The tst will make the test bit in the TB(N)Z redundant.
- if (VRegAndVal && !AndInst) {
- int64_t C = VRegAndVal->Value.getSExtValue();
+ if (VRegAndVal && !AndInst) {
+ int64_t C = VRegAndVal->Value.getSExtValue();
// When we have a greater-than comparison, we can just test if the msb is
// zero.
@@ -1550,97 +1550,97 @@ bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
}
}
- // Attempt to handle commutative condition codes. Right now, that's only
- // eq/ne.
- if (ICmpInst::isEquality(Pred)) {
- if (!VRegAndVal) {
- std::swap(RHS, LHS);
- VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
- AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
- }
-
- if (VRegAndVal && VRegAndVal->Value == 0) {
- // If there's a G_AND feeding into this branch, try to fold it away by
- // emitting a TB(N)Z instead.
- //
- // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
- // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
- // would be redundant.
- if (AndInst &&
- tryOptAndIntoCompareBranch(
- *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
- I.eraseFromParent();
- return true;
- }
-
- // Otherwise, try to emit a CB(N)Z instead.
- auto LHSTy = MRI.getType(LHS);
- if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
- emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
- I.eraseFromParent();
- return true;
- }
- }
- }
-
- return false;
-}
-
-bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
- MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
- assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
- assert(I.getOpcode() == TargetOpcode::G_BRCOND);
- if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
+ // Attempt to handle commutative condition codes. Right now, that's only
+ // eq/ne.
+ if (ICmpInst::isEquality(Pred)) {
+ if (!VRegAndVal) {
+ std::swap(RHS, LHS);
+ VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
+ }
+
+ if (VRegAndVal && VRegAndVal->Value == 0) {
+ // If there's a G_AND feeding into this branch, try to fold it away by
+ // emitting a TB(N)Z instead.
+ //
+ // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
+ // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
+ // would be redundant.
+ if (AndInst &&
+ tryOptAndIntoCompareBranch(
+ *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
+ I.eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, try to emit a CB(N)Z instead.
+ auto LHSTy = MRI.getType(LHS);
+ if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
+ emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
+ I.eraseFromParent();
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
+ MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
+ assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
+ assert(I.getOpcode() == TargetOpcode::G_BRCOND);
+ if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
return true;
-
- // Couldn't optimize. Emit a compare + a Bcc.
- MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- auto PredOp = ICmp.getOperand(1);
- emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
- const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
- static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
- MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
- I.eraseFromParent();
- return true;
-}
-
-bool AArch64InstructionSelector::selectCompareBranch(
- MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
- Register CondReg = I.getOperand(0).getReg();
- MachineInstr *CCMI = MRI.getVRegDef(CondReg);
- if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
- CondReg = CCMI->getOperand(1).getReg();
- CCMI = MRI.getVRegDef(CondReg);
- }
-
- // Try to select the G_BRCOND using whatever is feeding the condition if
- // possible.
- MachineIRBuilder MIB(I);
- unsigned CCMIOpc = CCMI->getOpcode();
- if (CCMIOpc == TargetOpcode::G_FCMP)
- return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
- if (CCMIOpc == TargetOpcode::G_ICMP)
- return selectCompareBranchFedByICmp(I, *CCMI, MIB);
-
- // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
- // instructions will not be produced, as they are conditional branch
- // instructions that do not set flags.
- if (ProduceNonFlagSettingCondBr) {
- emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
- I.getOperand(1).getMBB(), MIB);
+
+ // Couldn't optimize. Emit a compare + a Bcc.
+ MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ auto PredOp = ICmp.getOperand(1);
+ emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
+ const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
+ static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectCompareBranch(
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+ Register CondReg = I.getOperand(0).getReg();
+ MachineInstr *CCMI = MRI.getVRegDef(CondReg);
+ if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
+ CondReg = CCMI->getOperand(1).getReg();
+ CCMI = MRI.getVRegDef(CondReg);
+ }
+
+ // Try to select the G_BRCOND using whatever is feeding the condition if
+ // possible.
+ MachineIRBuilder MIB(I);
+ unsigned CCMIOpc = CCMI->getOpcode();
+ if (CCMIOpc == TargetOpcode::G_FCMP)
+ return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
+ if (CCMIOpc == TargetOpcode::G_ICMP)
+ return selectCompareBranchFedByICmp(I, *CCMI, MIB);
+
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
+ // instructions will not be produced, as they are conditional branch
+ // instructions that do not set flags.
+ if (ProduceNonFlagSettingCondBr) {
+ emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
+ I.getOperand(1).getMBB(), MIB);
I.eraseFromParent();
return true;
}
- // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
- auto TstMI =
- MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
- constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
- auto Bcc = MIB.buildInstr(AArch64::Bcc)
- .addImm(AArch64CC::EQ)
- .addMBB(I.getOperand(1).getMBB());
+ // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
+ auto TstMI =
+ MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
+ constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+ auto Bcc = MIB.buildInstr(AArch64::Bcc)
+ .addImm(AArch64CC::EQ)
+ .addMBB(I.getOperand(1).getMBB());
I.eraseFromParent();
- return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
+ return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
}
/// Returns the element immediate value of a vector shift operand if found.
@@ -1661,8 +1661,8 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
return None;
if (Idx == 1)
- ImmVal = VRegAndVal->Value.getSExtValue();
- if (ImmVal != VRegAndVal->Value.getSExtValue())
+ ImmVal = VRegAndVal->Value.getSExtValue();
+ if (ImmVal != VRegAndVal->Value.getSExtValue())
return None;
}
@@ -1725,14 +1725,14 @@ bool AArch64InstructionSelector::selectVectorSHL(
Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
} else if (Ty == LLT::vector(2, 32)) {
Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
- } else if (Ty == LLT::vector(4, 16)) {
- Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
- } else if (Ty == LLT::vector(8, 16)) {
- Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
- } else if (Ty == LLT::vector(16, 8)) {
- Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
- } else if (Ty == LLT::vector(8, 8)) {
- Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
+ } else if (Ty == LLT::vector(4, 16)) {
+ Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
+ } else if (Ty == LLT::vector(8, 16)) {
+ Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
+ } else if (Ty == LLT::vector(16, 8)) {
+ Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
+ } else if (Ty == LLT::vector(8, 8)) {
+ Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
return false;
@@ -1749,10 +1749,10 @@ bool AArch64InstructionSelector::selectVectorSHL(
return true;
}
-bool AArch64InstructionSelector::selectVectorAshrLshr(
+bool AArch64InstructionSelector::selectVectorAshrLshr(
MachineInstr &I, MachineRegisterInfo &MRI) const {
- assert(I.getOpcode() == TargetOpcode::G_ASHR ||
- I.getOpcode() == TargetOpcode::G_LSHR);
+ assert(I.getOpcode() == TargetOpcode::G_ASHR ||
+ I.getOpcode() == TargetOpcode::G_LSHR);
Register DstReg = I.getOperand(0).getReg();
const LLT Ty = MRI.getType(DstReg);
Register Src1Reg = I.getOperand(1).getReg();
@@ -1761,40 +1761,40 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
if (!Ty.isVector())
return false;
- bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
-
- // We expect the immediate case to be lowered in the PostLegalCombiner to
- // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
-
+ bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
+
+ // We expect the immediate case to be lowered in the PostLegalCombiner to
+ // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
+
// There is not a shift right register instruction, but the shift left
// register instruction takes a signed value, where negative numbers specify a
// right shift.
unsigned Opc = 0;
unsigned NegOpc = 0;
- const TargetRegisterClass *RC =
- getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
+ const TargetRegisterClass *RC =
+ getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
if (Ty == LLT::vector(2, 64)) {
- Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
+ Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
NegOpc = AArch64::NEGv2i64;
} else if (Ty == LLT::vector(4, 32)) {
- Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
+ Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
NegOpc = AArch64::NEGv4i32;
} else if (Ty == LLT::vector(2, 32)) {
- Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
+ Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
NegOpc = AArch64::NEGv2i32;
- } else if (Ty == LLT::vector(4, 16)) {
- Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
- NegOpc = AArch64::NEGv4i16;
- } else if (Ty == LLT::vector(8, 16)) {
- Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
- NegOpc = AArch64::NEGv8i16;
- } else if (Ty == LLT::vector(16, 8)) {
- Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
- NegOpc = AArch64::NEGv16i8;
- } else if (Ty == LLT::vector(8, 8)) {
- Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
- NegOpc = AArch64::NEGv8i8;
+ } else if (Ty == LLT::vector(4, 16)) {
+ Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
+ NegOpc = AArch64::NEGv4i16;
+ } else if (Ty == LLT::vector(8, 16)) {
+ Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
+ NegOpc = AArch64::NEGv8i16;
+ } else if (Ty == LLT::vector(16, 8)) {
+ Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
+ NegOpc = AArch64::NEGv16i8;
+ } else if (Ty == LLT::vector(8, 8)) {
+ Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
+ NegOpc = AArch64::NEGv8i8;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
return false;
@@ -1931,40 +1931,40 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
MRI.setType(DstReg, LLT::scalar(64));
return true;
}
- case AArch64::G_DUP: {
- // Convert the type from p0 to s64 to help selection.
- LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- if (!DstTy.getElementType().isPointer())
- return false;
- MachineIRBuilder MIB(I);
- auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
- MRI.setType(I.getOperand(0).getReg(),
- DstTy.changeElementType(LLT::scalar(64)));
- MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
- I.getOperand(1).setReg(NewSrc.getReg(0));
- return true;
- }
- case TargetOpcode::G_UITOFP:
- case TargetOpcode::G_SITOFP: {
- // If both source and destination regbanks are FPR, then convert the opcode
- // to G_SITOF so that the importer can select it to an fpr variant.
- // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
- // copy.
- Register SrcReg = I.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
- return false;
-
- if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
- if (I.getOpcode() == TargetOpcode::G_SITOFP)
- I.setDesc(TII.get(AArch64::G_SITOF));
- else
- I.setDesc(TII.get(AArch64::G_UITOF));
- return true;
- }
- return false;
- }
+ case AArch64::G_DUP: {
+ // Convert the type from p0 to s64 to help selection.
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ if (!DstTy.getElementType().isPointer())
+ return false;
+ MachineIRBuilder MIB(I);
+ auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
+ MRI.setType(I.getOperand(0).getReg(),
+ DstTy.changeElementType(LLT::scalar(64)));
+ MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
+ I.getOperand(1).setReg(NewSrc.getReg(0));
+ return true;
+ }
+ case TargetOpcode::G_UITOFP:
+ case TargetOpcode::G_SITOFP: {
+ // If both source and destination regbanks are FPR, then convert the opcode
+ // to G_SITOF so that the importer can select it to an fpr variant.
+ // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
+ // copy.
+ Register SrcReg = I.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
+ return false;
+
+ if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
+ if (I.getOpcode() == TargetOpcode::G_SITOFP)
+ I.setDesc(TII.get(AArch64::G_SITOF));
+ else
+ I.setDesc(TII.get(AArch64::G_UITOF));
+ return true;
+ }
+ return false;
+ }
default:
return false;
}
@@ -2005,14 +2005,14 @@ bool AArch64InstructionSelector::convertPtrAddToAdd(
LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
return false;
}
-
- // Also take the opportunity here to try to do some optimization.
- // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
- Register NegatedReg;
- if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
- return true;
- I.getOperand(2).setReg(NegatedReg);
- I.setDesc(TII.get(TargetOpcode::G_SUB));
+
+ // Also take the opportunity here to try to do some optimization.
+ // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
+ Register NegatedReg;
+ if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
+ return true;
+ I.getOperand(2).setReg(NegatedReg);
+ I.setDesc(TII.get(TargetOpcode::G_SUB));
return true;
}
@@ -2102,17 +2102,17 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
- case TargetOpcode::G_BR: {
- // If the branch jumps to the fallthrough block, don't bother emitting it.
- // Only do this for -O0 for a good code size improvement, because when
- // optimizations are enabled we want to leave this choice to
- // MachineBlockPlacement.
- bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
- if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
- return false;
- I.eraseFromParent();
- return true;
- }
+ case TargetOpcode::G_BR: {
+ // If the branch jumps to the fallthrough block, don't bother emitting it.
+ // Only do this for -O0 for a good code size improvement, because when
+ // optimizations are enabled we want to leave this choice to
+ // MachineBlockPlacement.
+ bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
+ if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
+ return false;
+ I.eraseFromParent();
+ return true;
+ }
case TargetOpcode::G_SHL:
return earlySelectSHL(I, MRI);
case TargetOpcode::G_CONSTANT: {
@@ -2232,8 +2232,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
MachineIRBuilder MIB(I);
switch (Opcode) {
- case TargetOpcode::G_BRCOND:
- return selectCompareBranch(I, MF, MRI);
+ case TargetOpcode::G_BRCOND:
+ return selectCompareBranch(I, MF, MRI);
case TargetOpcode::G_BRINDIRECT: {
I.setDesc(TII.get(AArch64::BR));
@@ -2313,7 +2313,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
- const LLT s128 = LLT::scalar(128);
+ const LLT s128 = LLT::scalar(128);
const LLT p0 = LLT::pointer(0, 64);
const Register DefReg = I.getOperand(0).getReg();
@@ -2323,10 +2323,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// FIXME: Redundant check, but even less readable when factored out.
if (isFP) {
- if (Ty != s32 && Ty != s64 && Ty != s128) {
+ if (Ty != s32 && Ty != s64 && Ty != s128) {
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
<< " constant, expected: " << s32 << " or " << s64
- << " or " << s128 << '\n');
+ << " or " << s128 << '\n');
return false;
}
@@ -2339,9 +2339,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// The case when we have 0.0 is covered by tablegen. Reject it here so we
// can be sure tablegen works correctly and isn't rescued by this code.
- // 0.0 is not covered by tablegen for FP128. So we will handle this
- // scenario in the code here.
- if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
+ // 0.0 is not covered by tablegen for FP128. So we will handle this
+ // scenario in the code here.
+ if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
return false;
} else {
// s32 and s64 are covered by tablegen.
@@ -2368,17 +2368,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// Either emit a FMOV, or emit a copy to emit a normal mov.
const TargetRegisterClass &GPRRC =
DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
- const TargetRegisterClass &FPRRC =
- DefSize == 32 ? AArch64::FPR32RegClass
- : (DefSize == 64 ? AArch64::FPR64RegClass
- : AArch64::FPR128RegClass);
+ const TargetRegisterClass &FPRRC =
+ DefSize == 32 ? AArch64::FPR32RegClass
+ : (DefSize == 64 ? AArch64::FPR64RegClass
+ : AArch64::FPR128RegClass);
// Can we use a FMOV instruction to represent the immediate?
if (emitFMovForFConstant(I, MRI))
return true;
// For 64b values, emit a constant pool load instead.
- if (DefSize == 64 || DefSize == 128) {
+ if (DefSize == 64 || DefSize == 128) {
auto *FPImm = I.getOperand(1).getFPImm();
MachineIRBuilder MIB(I);
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
@@ -2571,21 +2571,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
auto &MemOp = **I.memoperands_begin();
- uint64_t MemSizeInBytes = MemOp.getSize();
+ uint64_t MemSizeInBytes = MemOp.getSize();
if (MemOp.isAtomic()) {
// For now we just support s8 acquire loads to be able to compile stack
// protector code.
if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
- MemSizeInBytes == 1) {
+ MemSizeInBytes == 1) {
I.setDesc(TII.get(AArch64::LDARB));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
return false;
}
- unsigned MemSizeInBits = MemSizeInBytes * 8;
+ unsigned MemSizeInBits = MemSizeInBytes * 8;
-#ifndef NDEBUG
+#ifndef NDEBUG
const Register PtrReg = I.getOperand(1).getReg();
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
// Sanity-check the pointer register.
@@ -2598,78 +2598,78 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
const Register ValReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
- // Helper lambda for partially selecting I. Either returns the original
- // instruction with an updated opcode, or a new instruction.
- auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
- bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
- const unsigned NewOpc =
- selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
- if (NewOpc == I.getOpcode())
- return nullptr;
- // Check if we can fold anything into the addressing mode.
- auto AddrModeFns =
- selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
- if (!AddrModeFns) {
- // Can't fold anything. Use the original instruction.
- I.setDesc(TII.get(NewOpc));
- I.addOperand(MachineOperand::CreateImm(0));
- return &I;
+ // Helper lambda for partially selecting I. Either returns the original
+ // instruction with an updated opcode, or a new instruction.
+ auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
+ bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
+ const unsigned NewOpc =
+ selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
+ if (NewOpc == I.getOpcode())
+ return nullptr;
+ // Check if we can fold anything into the addressing mode.
+ auto AddrModeFns =
+ selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
+ if (!AddrModeFns) {
+ // Can't fold anything. Use the original instruction.
+ I.setDesc(TII.get(NewOpc));
+ I.addOperand(MachineOperand::CreateImm(0));
+ return &I;
}
- // Folded something. Create a new instruction and return it.
- auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
- IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
- NewInst.cloneMemRefs(I);
- for (auto &Fn : *AddrModeFns)
- Fn(NewInst);
- I.eraseFromParent();
- return &*NewInst;
- };
+ // Folded something. Create a new instruction and return it.
+ auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
+ IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
+ NewInst.cloneMemRefs(I);
+ for (auto &Fn : *AddrModeFns)
+ Fn(NewInst);
+ I.eraseFromParent();
+ return &*NewInst;
+ };
- MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
- if (!LoadStore)
- return false;
+ MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
+ if (!LoadStore)
+ return false;
// If we're storing a 0, use WZR/XZR.
- if (Opcode == TargetOpcode::G_STORE) {
- auto CVal = getConstantVRegValWithLookThrough(
- LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
- /*HandleFConstants = */ false);
- if (CVal && CVal->Value == 0) {
- switch (LoadStore->getOpcode()) {
- case AArch64::STRWui:
- case AArch64::STRHHui:
- case AArch64::STRBBui:
- LoadStore->getOperand(0).setReg(AArch64::WZR);
- break;
- case AArch64::STRXui:
- LoadStore->getOperand(0).setReg(AArch64::XZR);
- break;
- }
+ if (Opcode == TargetOpcode::G_STORE) {
+ auto CVal = getConstantVRegValWithLookThrough(
+ LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
+ /*HandleFConstants = */ false);
+ if (CVal && CVal->Value == 0) {
+ switch (LoadStore->getOpcode()) {
+ case AArch64::STRWui:
+ case AArch64::STRHHui:
+ case AArch64::STRBBui:
+ LoadStore->getOperand(0).setReg(AArch64::WZR);
+ break;
+ case AArch64::STRXui:
+ LoadStore->getOperand(0).setReg(AArch64::XZR);
+ break;
+ }
}
}
if (IsZExtLoad) {
- // The zextload from a smaller type to i32 should be handled by the
- // importer.
- if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
+ // The zextload from a smaller type to i32 should be handled by the
+ // importer.
+ if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
return false;
// If we have a ZEXTLOAD then change the load's type to be a narrower reg
- // and zero_extend with SUBREG_TO_REG.
+ // and zero_extend with SUBREG_TO_REG.
Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- Register DstReg = LoadStore->getOperand(0).getReg();
- LoadStore->getOperand(0).setReg(LdReg);
+ Register DstReg = LoadStore->getOperand(0).getReg();
+ LoadStore->getOperand(0).setReg(LdReg);
- MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
+ MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
.addImm(0)
.addUse(LdReg)
.addImm(AArch64::sub_32);
- constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
MRI);
}
- return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
+ return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
}
case TargetOpcode::G_SMULH:
@@ -2700,21 +2700,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// operands to use appropriate classes.
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR:
if (MRI.getType(I.getOperand(0).getReg()).isVector())
- return selectVectorAshrLshr(I, MRI);
+ return selectVectorAshrLshr(I, MRI);
LLVM_FALLTHROUGH;
case TargetOpcode::G_SHL:
if (Opcode == TargetOpcode::G_SHL &&
MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorSHL(I, MRI);
LLVM_FALLTHROUGH;
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_OR: {
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_OR: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
@@ -2743,24 +2743,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.eraseFromParent();
return true;
}
- case TargetOpcode::G_SADDO:
- case TargetOpcode::G_UADDO:
- case TargetOpcode::G_SSUBO:
- case TargetOpcode::G_USUBO: {
- // Emit the operation and get the correct condition code.
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_USUBO: {
+ // Emit the operation and get the correct condition code.
MachineIRBuilder MIRBuilder(I);
- auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
- I.getOperand(2), I.getOperand(3), MIRBuilder);
+ auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
+ I.getOperand(2), I.getOperand(3), MIRBuilder);
// Now, put the overflow result in the register given by the first operand
- // to the overflow op. CSINC increments the result when the predicate is
- // false, so to get the increment when it's true, we need to use the
- // inverse. In this case, we want to increment when carry is set.
- Register ZReg = AArch64::WZR;
+ // to the overflow op. CSINC increments the result when the predicate is
+ // false, so to get the increment when it's true, we need to use the
+ // inverse. In this case, we want to increment when carry is set.
+ Register ZReg = AArch64::WZR;
auto CsetMI = MIRBuilder
.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
- {ZReg, ZReg})
- .addImm(getInvertedCondCode(OpAndCC.second));
+ {ZReg, ZReg})
+ .addImm(getInvertedCondCode(OpAndCC.second));
constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
I.eraseFromParent();
return true;
@@ -2768,7 +2768,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_PTRMASK: {
Register MaskReg = I.getOperand(2).getReg();
- Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
+ Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
// TODO: Implement arbitrary cases
if (!MaskVal || !isShiftedMask_64(*MaskVal))
return false;
@@ -3059,15 +3059,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (tryOptSelect(I))
return true;
- // Make sure to use an unused vreg instead of wzr, so that the peephole
- // optimizations will be able to optimize these.
- MachineIRBuilder MIB(I);
- Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
- constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
- if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
- return false;
+ // Make sure to use an unused vreg instead of wzr, so that the peephole
+ // optimizations will be able to optimize these.
+ MachineIRBuilder MIB(I);
+ Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+ if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
+ return false;
I.eraseFromParent();
return true;
}
@@ -3082,21 +3082,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
MachineIRBuilder MIRBuilder(I);
- auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
- emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
- MIRBuilder);
+ auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
+ emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
+ MIRBuilder);
emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_FCMP: {
- MachineIRBuilder MIRBuilder(I);
- CmpInst::Predicate Pred =
- static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
- if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
- MIRBuilder, Pred) ||
- !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
+ MachineIRBuilder MIRBuilder(I);
+ CmpInst::Predicate Pred =
+ static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
+ if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
+ MIRBuilder, Pred) ||
+ !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
return false;
I.eraseFromParent();
return true;
@@ -3136,24 +3136,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
}
- case AArch64::G_DUP: {
- // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
- // imported patterns. Do it manually here. Avoiding generating s16 gpr is
- // difficult because at RBS we may end up pessimizing the fpr case if we
- // decided to add an anyextend to fix this. Manual selection is the most
- // robust solution for now.
- Register SrcReg = I.getOperand(1).getReg();
- if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
- return false; // We expect the fpr regbank case to be imported.
- LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy.getSizeInBits() == 16)
- I.setDesc(TII.get(AArch64::DUPv8i16gpr));
- else if (SrcTy.getSizeInBits() == 8)
- I.setDesc(TII.get(AArch64::DUPv16i8gpr));
- else
- return false;
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
+ case AArch64::G_DUP: {
+ // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
+ // imported patterns. Do it manually here. Avoiding generating s16 gpr is
+ // difficult because at RBS we may end up pessimizing the fpr case if we
+ // decided to add an anyextend to fix this. Manual selection is the most
+ // robust solution for now.
+ Register SrcReg = I.getOperand(1).getReg();
+ if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
+ return false; // We expect the fpr regbank case to be imported.
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.getSizeInBits() == 16)
+ I.setDesc(TII.get(AArch64::DUPv8i16gpr));
+ else if (SrcTy.getSizeInBits() == 8)
+ I.setDesc(TII.get(AArch64::DUPv16i8gpr));
+ else
+ return false;
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
case TargetOpcode::G_INTRINSIC_TRUNC:
return selectIntrinsicTrunc(I, MRI);
case TargetOpcode::G_INTRINSIC_ROUND:
@@ -3174,52 +3174,52 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return selectConcatVectors(I, MRI);
case TargetOpcode::G_JUMP_TABLE:
return selectJumpTable(I, MRI);
- case TargetOpcode::G_VECREDUCE_FADD:
- case TargetOpcode::G_VECREDUCE_ADD:
- return selectReduction(I, MRI);
- }
-
- return false;
-}
-
-bool AArch64InstructionSelector::selectReduction(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
- Register VecReg = I.getOperand(1).getReg();
- LLT VecTy = MRI.getType(VecReg);
- if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
- unsigned Opc = 0;
- if (VecTy == LLT::vector(16, 8))
- Opc = AArch64::ADDVv16i8v;
- else if (VecTy == LLT::vector(8, 16))
- Opc = AArch64::ADDVv8i16v;
- else if (VecTy == LLT::vector(4, 32))
- Opc = AArch64::ADDVv4i32v;
- else if (VecTy == LLT::vector(2, 64))
- Opc = AArch64::ADDPv2i64p;
- else {
- LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
- return false;
- }
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_ADD:
+ return selectReduction(I, MRI);
}
- if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
- unsigned Opc = 0;
- if (VecTy == LLT::vector(2, 32))
- Opc = AArch64::FADDPv2i32p;
- else if (VecTy == LLT::vector(2, 64))
- Opc = AArch64::FADDPv2i64p;
- else {
- LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
- return false;
- }
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
return false;
}
+bool AArch64InstructionSelector::selectReduction(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ Register VecReg = I.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
+ unsigned Opc = 0;
+ if (VecTy == LLT::vector(16, 8))
+ Opc = AArch64::ADDVv16i8v;
+ else if (VecTy == LLT::vector(8, 16))
+ Opc = AArch64::ADDVv8i16v;
+ else if (VecTy == LLT::vector(4, 32))
+ Opc = AArch64::ADDVv4i32v;
+ else if (VecTy == LLT::vector(2, 64))
+ Opc = AArch64::ADDPv2i64p;
+ else {
+ LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
+ return false;
+ }
+ I.setDesc(TII.get(Opc));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+
+ if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
+ unsigned Opc = 0;
+ if (VecTy == LLT::vector(2, 32))
+ Opc = AArch64::FADDPv2i32p;
+ else if (VecTy == LLT::vector(2, 64))
+ Opc = AArch64::FADDPv2i64p;
+ else {
+ LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
+ return false;
+ }
+ I.setDesc(TII.get(Opc));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+ return false;
+}
+
bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
@@ -3230,8 +3230,8 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
-
- MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
+
+ MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
{TargetReg, ScratchReg}, {JTAddr, Index})
.addJumpTableIndex(JTI);
@@ -3268,20 +3268,20 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
const GlobalValue &GV = *I.getOperand(1).getGlobal();
MachineIRBuilder MIB(I);
- auto LoadGOT =
- MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
- .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
+ auto LoadGOT =
+ MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
+ .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
- {LoadGOT.getReg(0)})
+ {LoadGOT.getReg(0)})
.addImm(0);
- MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
+ MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
- .addUse(AArch64::X0, RegState::Implicit)
+ .addUse(AArch64::X0, RegState::Implicit)
.addDef(AArch64::X0, RegState::Implicit)
.addRegMask(TRI.getTLSCallPreservedMask());
@@ -3767,7 +3767,7 @@ bool AArch64InstructionSelector::selectExtractElt(
(void)WideTy;
assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!");
- assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
+ assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
// Need the lane index to determine the correct copy opcode.
MachineOperand &LaneIdxOp = I.getOperand(2);
@@ -3782,7 +3782,7 @@ bool AArch64InstructionSelector::selectExtractElt(
auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
if (!VRegAndVal)
return false;
- unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
+ unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
MachineIRBuilder MIRBuilder(I);
@@ -4005,10 +4005,10 @@ static std::pair<unsigned, unsigned>
getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
unsigned Opc, SubregIdx;
if (RB.getID() == AArch64::GPRRegBankID) {
- if (EltSize == 16) {
- Opc = AArch64::INSvi16gpr;
- SubregIdx = AArch64::ssub;
- } else if (EltSize == 32) {
+ if (EltSize == 16) {
+ Opc = AArch64::INSvi16gpr;
+ SubregIdx = AArch64::ssub;
+ } else if (EltSize == 32) {
Opc = AArch64::INSvi32gpr;
SubregIdx = AArch64::ssub;
} else if (EltSize == 64) {
@@ -4037,93 +4037,93 @@ getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
return std::make_pair(Opc, SubregIdx);
}
-MachineInstr *AArch64InstructionSelector::emitInstr(
- unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
- std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
- const ComplexRendererFns &RenderFns) const {
- assert(Opcode && "Expected an opcode?");
- assert(!isPreISelGenericOpcode(Opcode) &&
- "Function should only be used to produce selected instructions!");
- auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
- if (RenderFns)
- for (auto &Fn : *RenderFns)
- Fn(MI);
- constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
- return &*MI;
-}
-
-MachineInstr *AArch64InstructionSelector::emitAddSub(
- const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
- Register Dst, MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
- auto Ty = MRI.getType(LHS.getReg());
- assert(!Ty.isVector() && "Expected a scalar or pointer?");
- unsigned Size = Ty.getSizeInBits();
- assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
- bool Is32Bit = Size == 32;
-
- // INSTRri form with positive arithmetic immediate.
- if (auto Fns = selectArithImmed(RHS))
- return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
- MIRBuilder, Fns);
-
- // INSTRri form with negative arithmetic immediate.
- if (auto Fns = selectNegArithImmed(RHS))
- return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
- MIRBuilder, Fns);
-
- // INSTRrx form.
- if (auto Fns = selectArithExtendedRegister(RHS))
- return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
- MIRBuilder, Fns);
-
- // INSTRrs form.
- if (auto Fns = selectShiftedRegister(RHS))
- return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
- MIRBuilder, Fns);
- return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
- MIRBuilder);
-}
-
+MachineInstr *AArch64InstructionSelector::emitInstr(
+ unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
+ std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
+ const ComplexRendererFns &RenderFns) const {
+ assert(Opcode && "Expected an opcode?");
+ assert(!isPreISelGenericOpcode(Opcode) &&
+ "Function should only be used to produce selected instructions!");
+ auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
+ if (RenderFns)
+ for (auto &Fn : *RenderFns)
+ Fn(MI);
+ constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
+ return &*MI;
+}
+
+MachineInstr *AArch64InstructionSelector::emitAddSub(
+ const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
+ Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+ assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+ auto Ty = MRI.getType(LHS.getReg());
+ assert(!Ty.isVector() && "Expected a scalar or pointer?");
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
+ bool Is32Bit = Size == 32;
+
+ // INSTRri form with positive arithmetic immediate.
+ if (auto Fns = selectArithImmed(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+
+ // INSTRri form with negative arithmetic immediate.
+ if (auto Fns = selectNegArithImmed(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+
+ // INSTRrx form.
+ if (auto Fns = selectArithExtendedRegister(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+
+ // INSTRrs form.
+ if (auto Fns = selectShiftedRegister(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+ return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
+ MIRBuilder);
+}
+
MachineInstr *
AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
- const std::array<std::array<unsigned, 2>, 5> OpcTable{
- {{AArch64::ADDXri, AArch64::ADDWri},
- {AArch64::ADDXrs, AArch64::ADDWrs},
- {AArch64::ADDXrr, AArch64::ADDWrr},
- {AArch64::SUBXri, AArch64::SUBWri},
- {AArch64::ADDXrx, AArch64::ADDWrx}}};
- return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
-}
-
-MachineInstr *
-AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
- MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- const std::array<std::array<unsigned, 2>, 5> OpcTable{
- {{AArch64::ADDSXri, AArch64::ADDSWri},
- {AArch64::ADDSXrs, AArch64::ADDSWrs},
- {AArch64::ADDSXrr, AArch64::ADDSWrr},
- {AArch64::SUBSXri, AArch64::SUBSWri},
- {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
- return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
-}
-
-MachineInstr *
-AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
- MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- const std::array<std::array<unsigned, 2>, 5> OpcTable{
- {{AArch64::SUBSXri, AArch64::SUBSWri},
- {AArch64::SUBSXrs, AArch64::SUBSWrs},
- {AArch64::SUBSXrr, AArch64::SUBSWrr},
- {AArch64::ADDSXri, AArch64::ADDSWri},
- {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
- return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
+ {{AArch64::ADDXri, AArch64::ADDWri},
+ {AArch64::ADDXrs, AArch64::ADDWrs},
+ {AArch64::ADDXrr, AArch64::ADDWrr},
+ {AArch64::SUBXri, AArch64::SUBWri},
+ {AArch64::ADDXrx, AArch64::ADDWrx}}};
+ return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
+ {{AArch64::ADDSXri, AArch64::ADDSWri},
+ {AArch64::ADDSXrs, AArch64::ADDSWrs},
+ {AArch64::ADDSXrr, AArch64::ADDSWrr},
+ {AArch64::SUBSXri, AArch64::SUBSWri},
+ {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
+ return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
+ {{AArch64::SUBSXri, AArch64::SUBSWri},
+ {AArch64::SUBSXrs, AArch64::SUBSWrs},
+ {AArch64::SUBSXrr, AArch64::SUBSWrr},
+ {AArch64::ADDSXri, AArch64::ADDSWri},
+ {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
+ return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
}
MachineInstr *
@@ -4131,129 +4131,129 @@ AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
- auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
- return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
+ auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
+ return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
}
MachineInstr *
-AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
+AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
- assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+ assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- LLT Ty = MRI.getType(LHS.getReg());
- unsigned RegSize = Ty.getSizeInBits();
+ LLT Ty = MRI.getType(LHS.getReg());
+ unsigned RegSize = Ty.getSizeInBits();
bool Is32Bit = (RegSize == 32);
- const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
- {AArch64::ANDSXrs, AArch64::ANDSWrs},
- {AArch64::ANDSXrr, AArch64::ANDSWrr}};
- // ANDS needs a logical immediate for its immediate form. Check if we can
- // fold one in.
- if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
- int64_t Imm = ValAndVReg->Value.getSExtValue();
-
- if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
- auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
- TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
- constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
- return &*TstMI;
- }
- }
-
- if (auto Fns = selectLogicalShiftedRegister(RHS))
- return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
- return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
+ const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
+ {AArch64::ANDSXrs, AArch64::ANDSWrs},
+ {AArch64::ANDSXrr, AArch64::ANDSWrr}};
+ // ANDS needs a logical immediate for its immediate form. Check if we can
+ // fold one in.
+ if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
+ int64_t Imm = ValAndVReg->Value.getSExtValue();
+
+ if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
+ auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
+ TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
+ constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+ return &*TstMI;
+ }
+ }
+
+ if (auto Fns = selectLogicalShiftedRegister(RHS))
+ return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
+ return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
}
-MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
+MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
assert(Predicate.isPredicate() && "Expected predicate?");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- LLT CmpTy = MRI.getType(LHS.getReg());
- assert(!CmpTy.isVector() && "Expected scalar or pointer");
- unsigned Size = CmpTy.getSizeInBits();
- (void)Size;
- assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
- // Fold the compare into a cmn or tst if possible.
- if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
- return FoldCmp;
- auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
- return emitSUBS(Dst, LHS, RHS, MIRBuilder);
-}
-
-MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
- Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
-#ifndef NDEBUG
- LLT Ty = MRI.getType(Dst);
- assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
- "Expected a 32-bit scalar register?");
-#endif
- const Register ZeroReg = AArch64::WZR;
- auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
- auto CSet =
- MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
- .addImm(getInvertedCondCode(CC));
- constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
- return &*CSet;
- };
-
- AArch64CC::CondCode CC1, CC2;
- changeFCMPPredToAArch64CC(Pred, CC1, CC2);
- if (CC2 == AArch64CC::AL)
- return EmitCSet(Dst, CC1);
-
- const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
- Register Def1Reg = MRI.createVirtualRegister(RC);
- Register Def2Reg = MRI.createVirtualRegister(RC);
- EmitCSet(Def1Reg, CC1);
- EmitCSet(Def2Reg, CC2);
- auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
- constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
- return &*OrMI;
-}
-
-MachineInstr *
-AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
- MachineIRBuilder &MIRBuilder,
- Optional<CmpInst::Predicate> Pred) const {
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
- LLT Ty = MRI.getType(LHS);
- if (Ty.isVector())
- return nullptr;
- unsigned OpSize = Ty.getSizeInBits();
- if (OpSize != 32 && OpSize != 64)
- return nullptr;
-
- // If this is a compare against +0.0, then we don't have
- // to explicitly materialize a constant.
- const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
- bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
-
- auto IsEqualityPred = [](CmpInst::Predicate P) {
- return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
- P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
- };
- if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
- // Try commutating the operands.
- const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
- if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
- ShouldUseImm = true;
- std::swap(LHS, RHS);
- }
- }
- unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
- {AArch64::FCMPSri, AArch64::FCMPDri}};
- unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
-
- // Partially build the compare. Decide if we need to add a use for the
- // third operand based off whether or not we're comparing against 0.0.
- auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
- if (!ShouldUseImm)
- CmpMI.addUse(RHS);
+ LLT CmpTy = MRI.getType(LHS.getReg());
+ assert(!CmpTy.isVector() && "Expected scalar or pointer");
+ unsigned Size = CmpTy.getSizeInBits();
+ (void)Size;
+ assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
+ // Fold the compare into a cmn or tst if possible.
+ if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
+ return FoldCmp;
+ auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
+ return emitSUBS(Dst, LHS, RHS, MIRBuilder);
+}
+
+MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
+ Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+#ifndef NDEBUG
+ LLT Ty = MRI.getType(Dst);
+ assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
+ "Expected a 32-bit scalar register?");
+#endif
+ const Register ZeroReg = AArch64::WZR;
+ auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
+ auto CSet =
+ MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
+ .addImm(getInvertedCondCode(CC));
+ constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
+ return &*CSet;
+ };
+
+ AArch64CC::CondCode CC1, CC2;
+ changeFCMPPredToAArch64CC(Pred, CC1, CC2);
+ if (CC2 == AArch64CC::AL)
+ return EmitCSet(Dst, CC1);
+
+ const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
+ Register Def1Reg = MRI.createVirtualRegister(RC);
+ Register Def2Reg = MRI.createVirtualRegister(RC);
+ EmitCSet(Def1Reg, CC1);
+ EmitCSet(Def2Reg, CC2);
+ auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
+ constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
+ return &*OrMI;
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
+ MachineIRBuilder &MIRBuilder,
+ Optional<CmpInst::Predicate> Pred) const {
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ LLT Ty = MRI.getType(LHS);
+ if (Ty.isVector())
+ return nullptr;
+ unsigned OpSize = Ty.getSizeInBits();
+ if (OpSize != 32 && OpSize != 64)
+ return nullptr;
+
+ // If this is a compare against +0.0, then we don't have
+ // to explicitly materialize a constant.
+ const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
+ bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
+
+ auto IsEqualityPred = [](CmpInst::Predicate P) {
+ return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
+ P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
+ };
+ if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
+ // Try commutating the operands.
+ const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
+ if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
+ ShouldUseImm = true;
+ std::swap(LHS, RHS);
+ }
+ }
+ unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
+ {AArch64::FCMPSri, AArch64::FCMPDri}};
+ unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
+
+ // Partially build the compare. Decide if we need to add a use for the
+ // third operand based off whether or not we're comparing against 0.0.
+ auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
+ if (!ShouldUseImm)
+ CmpMI.addUse(RHS);
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- return &*CmpMI;
+ return &*CmpMI;
}
MachineInstr *AArch64InstructionSelector::emitVectorConcat(
@@ -4363,25 +4363,25 @@ AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
return &*I;
}
-std::pair<MachineInstr *, AArch64CC::CondCode>
-AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
- MachineOperand &LHS,
- MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- switch (Opcode) {
- default:
- llvm_unreachable("Unexpected opcode!");
- case TargetOpcode::G_SADDO:
- return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
- case TargetOpcode::G_UADDO:
- return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
- case TargetOpcode::G_SSUBO:
- return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
- case TargetOpcode::G_USUBO:
- return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
- }
-}
-
+std::pair<MachineInstr *, AArch64CC::CondCode>
+AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
+ MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case TargetOpcode::G_SADDO:
+ return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+ case TargetOpcode::G_UADDO:
+ return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
+ case TargetOpcode::G_SSUBO:
+ return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+ case TargetOpcode::G_USUBO:
+ return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
+ }
+}
+
bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
MachineIRBuilder MIB(I);
MachineRegisterInfo &MRI = *MIB.getMRI();
@@ -4441,17 +4441,17 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
AArch64CC::CondCode CondCode;
if (CondOpc == TargetOpcode::G_ICMP) {
- auto Pred =
- static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
+ auto Pred =
+ static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
CondCode = changeICMPPredToAArch64CC(Pred);
- emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
- CondDef->getOperand(1), MIB);
+ emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
+ CondDef->getOperand(1), MIB);
} else {
// Get the condition code for the select.
- auto Pred =
- static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
+ auto Pred =
+ static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
AArch64CC::CondCode CondCode2;
- changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
+ changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
// changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
// instructions to emit the comparison.
@@ -4460,16 +4460,16 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
if (CondCode2 != AArch64CC::AL)
return false;
- if (!emitFPCompare(CondDef->getOperand(2).getReg(),
- CondDef->getOperand(3).getReg(), MIB)) {
- LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
+ if (!emitFPCompare(CondDef->getOperand(2).getReg(),
+ CondDef->getOperand(3).getReg(), MIB)) {
+ LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
return false;
- }
+ }
}
// Emit the select.
- emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
- I.getOperand(3).getReg(), CondCode, MIB);
+ emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
+ I.getOperand(3).getReg(), CondCode, MIB);
I.eraseFromParent();
return true;
}
@@ -4552,15 +4552,15 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
// Produce this if the compare is signed:
//
// tst x, y
- if (!CmpInst::isUnsigned(P) && LHSDef &&
+ if (!CmpInst::isUnsigned(P) && LHSDef &&
LHSDef->getOpcode() == TargetOpcode::G_AND) {
// Make sure that the RHS is 0.
auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
if (!ValAndVReg || ValAndVReg->Value != 0)
return nullptr;
- return emitTST(LHSDef->getOperand(1),
- LHSDef->getOperand(2), MIRBuilder);
+ return emitTST(LHSDef->getOperand(1),
+ LHSDef->getOperand(2), MIRBuilder);
}
return nullptr;
@@ -4708,7 +4708,7 @@ bool AArch64InstructionSelector::selectInsertElt(
auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
if (!VRegAndVal)
return false;
- unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
+ unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
// Perform the lane insert.
Register SrcReg = I.getOperand(1).getReg();
@@ -4765,9 +4765,9 @@ bool AArch64InstructionSelector::selectInsertElt(
bool AArch64InstructionSelector::tryOptConstantBuildVec(
MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
- unsigned DstSize = DstTy.getSizeInBits();
- assert(DstSize <= 128 && "Unexpected build_vec type!");
- if (DstSize < 32)
+ unsigned DstSize = DstTy.getSizeInBits();
+ assert(DstSize <= 128 && "Unexpected build_vec type!");
+ if (DstSize < 32)
return false;
// Check if we're building a constant vector, in which case we want to
// generate a constant pool load instead of a vector insert sequence.
@@ -4788,24 +4788,24 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
}
Constant *CV = ConstantVector::get(Csts);
MachineIRBuilder MIB(I);
- if (CV->isNullValue()) {
- // Until the importer can support immAllZerosV in pattern leaf nodes,
- // select a zero move manually here.
- Register DstReg = I.getOperand(0).getReg();
- if (DstSize == 128) {
- auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- } else if (DstSize == 64) {
- auto Mov =
- MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
- .addImm(0);
- MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
- .addReg(Mov.getReg(0), 0, AArch64::dsub);
- I.eraseFromParent();
- return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
- }
- }
+ if (CV->isNullValue()) {
+ // Until the importer can support immAllZerosV in pattern leaf nodes,
+ // select a zero move manually here.
+ Register DstReg = I.getOperand(0).getReg();
+ if (DstSize == 128) {
+ auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ } else if (DstSize == 64) {
+ auto Mov =
+ MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
+ .addImm(0);
+ MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+ .addReg(Mov.getReg(0), 0, AArch64::dsub);
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
+ }
+ }
auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
if (!CPLoad) {
LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
@@ -4927,10 +4927,10 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
case Intrinsic::debugtrap:
MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
break;
- case Intrinsic::ubsantrap:
- MIRBuilder.buildInstr(AArch64::BRK, {}, {})
- .addImm(I.getOperand(1).getImm() | ('U' << 8));
- break;
+ case Intrinsic::ubsantrap:
+ MIRBuilder.buildInstr(AArch64::BRK, {}, {})
+ .addImm(I.getOperand(1).getImm() | ('U' << 8));
+ break;
}
I.eraseFromParent();
@@ -4996,22 +4996,22 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
- if (!MFReturnAddr) {
- // Insert the copy from LR/X30 into the entry block, before it can be
- // clobbered by anything.
- MFI.setReturnAddressIsTaken(true);
- MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
- AArch64::GPR64RegClass);
- }
-
- if (STI.hasPAuth()) {
- MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
- } else {
- MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
- MIRBuilder.buildInstr(AArch64::XPACLRI);
- MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+ if (!MFReturnAddr) {
+ // Insert the copy from LR/X30 into the entry block, before it can be
+ // clobbered by anything.
+ MFI.setReturnAddressIsTaken(true);
+ MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
+ AArch64::GPR64RegClass);
}
-
+
+ if (STI.hasPAuth()) {
+ MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
+ } else {
+ MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
+ MIRBuilder.buildInstr(AArch64::XPACLRI);
+ MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+ }
+
I.eraseFromParent();
return true;
}
@@ -5031,16 +5031,16 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
MIRBuilder.buildCopy({DstReg}, {FrameAddr});
else {
MFI.setReturnAddressIsTaken(true);
-
- if (STI.hasPAuth()) {
- Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
- MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
- } else {
- MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
- MIRBuilder.buildInstr(AArch64::XPACLRI);
- MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
- }
+
+ if (STI.hasPAuth()) {
+ Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
+ MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
+ } else {
+ MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
+ MIRBuilder.buildInstr(AArch64::XPACLRI);
+ MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+ }
}
I.eraseFromParent();
@@ -5248,7 +5248,7 @@ AArch64InstructionSelector::selectExtendedSHL(
// The value must fit into 3 bits, and must be positive. Make sure that is
// true.
- int64_t ImmVal = ValAndVReg->Value.getSExtValue();
+ int64_t ImmVal = ValAndVReg->Value.getSExtValue();
// Since we're going to pull this into a shift, the constant value must be
// a power of 2. If we got a multiply, then we need to check this.
@@ -5388,60 +5388,60 @@ InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const {
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
- if (!Root.isReg())
- return None;
- MachineInstr *PtrAdd =
- getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
- if (!PtrAdd)
+ if (!Root.isReg())
return None;
-
- // Check for an immediates which cannot be encoded in the [base + imm]
- // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
- // end up with code like:
- //
- // mov x0, wide
- // add x1 base, x0
- // ldr x2, [x1, x0]
- //
- // In this situation, we can use the [base, xreg] addressing mode to save an
- // add/sub:
- //
- // mov x0, wide
- // ldr x2, [base, x0]
- auto ValAndVReg =
- getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
- if (ValAndVReg) {
- unsigned Scale = Log2_32(SizeInBytes);
- int64_t ImmOff = ValAndVReg->Value.getSExtValue();
-
- // Skip immediates that can be selected in the load/store addresing
- // mode.
- if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
- ImmOff < (0x1000 << Scale))
- return None;
-
- // Helper lambda to decide whether or not it is preferable to emit an add.
- auto isPreferredADD = [](int64_t ImmOff) {
- // Constants in [0x0, 0xfff] can be encoded in an add.
- if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
- return true;
-
- // Can it be encoded in an add lsl #12?
- if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
- return false;
-
- // It can be encoded in an add lsl #12, but we may not want to. If it is
- // possible to select this as a single movz, then prefer that. A single
- // movz is faster than an add with a shift.
- return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
- (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
- };
-
- // If the immediate can be encoded in a single add/sub, then bail out.
- if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
- return None;
- }
-
+ MachineInstr *PtrAdd =
+ getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
+ if (!PtrAdd)
+ return None;
+
+ // Check for an immediates which cannot be encoded in the [base + imm]
+ // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
+ // end up with code like:
+ //
+ // mov x0, wide
+ // add x1 base, x0
+ // ldr x2, [x1, x0]
+ //
+ // In this situation, we can use the [base, xreg] addressing mode to save an
+ // add/sub:
+ //
+ // mov x0, wide
+ // ldr x2, [base, x0]
+ auto ValAndVReg =
+ getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
+ if (ValAndVReg) {
+ unsigned Scale = Log2_32(SizeInBytes);
+ int64_t ImmOff = ValAndVReg->Value.getSExtValue();
+
+ // Skip immediates that can be selected in the load/store addresing
+ // mode.
+ if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
+ ImmOff < (0x1000 << Scale))
+ return None;
+
+ // Helper lambda to decide whether or not it is preferable to emit an add.
+ auto isPreferredADD = [](int64_t ImmOff) {
+ // Constants in [0x0, 0xfff] can be encoded in an add.
+ if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
+ return true;
+
+ // Can it be encoded in an add lsl #12?
+ if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
+ return false;
+
+ // It can be encoded in an add lsl #12, but we may not want to. If it is
+ // possible to select this as a single movz, then prefer that. A single
+ // movz is faster than an add with a shift.
+ return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
+ (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
+ };
+
+ // If the immediate can be encoded in a single add/sub, then bail out.
+ if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
+ return None;
+ }
+
// Try to fold shifts into the addressing mode.
auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
if (AddrModeFns)
@@ -5871,8 +5871,8 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
- Optional<int64_t> CstVal =
- getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
+ Optional<int64_t> CstVal =
+ getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
assert(CstVal && "Expected constant value");
MIB.addImm(CstVal.getValue());
}