diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/AArch64/GISel | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/AArch64/GISel')
12 files changed, 2496 insertions, 2496 deletions
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 0f8b1d6584..7b05f70a73 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -52,10 +52,10 @@ AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) : CallLowering(&TLI) {} namespace { -struct IncomingArgHandler : public CallLowering::IncomingValueHandler { +struct IncomingArgHandler : public CallLowering::IncomingValueHandler { IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, CCAssignFn *AssignFn) - : IncomingValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {} + : IncomingValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {} Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { @@ -101,7 +101,7 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler { /// How the physical register gets marked varies between formal /// parameters (it's a basic-block live-in), and a call instruction /// (it's an implicit-def of the BL). - virtual void markPhysRegUsed(MCRegister PhysReg) = 0; + virtual void markPhysRegUsed(MCRegister PhysReg) = 0; uint64_t StackUsed; }; @@ -111,7 +111,7 @@ struct FormalArgHandler : public IncomingArgHandler { CCAssignFn *AssignFn) : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {} - void markPhysRegUsed(MCRegister PhysReg) override { + void markPhysRegUsed(MCRegister PhysReg) override { MIRBuilder.getMRI()->addLiveIn(PhysReg); MIRBuilder.getMBB().addLiveIn(PhysReg); } @@ -122,19 +122,19 @@ struct CallReturnHandler : public IncomingArgHandler { MachineInstrBuilder MIB, CCAssignFn *AssignFn) : IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} - void markPhysRegUsed(MCRegister PhysReg) override { + void markPhysRegUsed(MCRegister PhysReg) override { MIB.addDef(PhysReg, RegState::Implicit); } MachineInstrBuilder MIB; }; -struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { +struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstrBuilder MIB, CCAssignFn *AssignFn, CCAssignFn *AssignFnVarArg, bool IsTailCall = false, int FPDiff = 0) - : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), + : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff), StackSize(0), SPReg(0) {} @@ -187,8 +187,8 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { if (!Arg.IsFixed) MaxSize = 0; - assert(Arg.Regs.size() == 1); - + assert(Arg.Regs.size() == 1); + Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt ? extendRegister(Arg.Regs[0], VA, MaxSize) : Arg.Regs[0]; @@ -274,7 +274,7 @@ void AArch64CallLowering::splitToValueTypes( bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef<Register> VRegs, - FunctionLoweringInfo &FLI, + FunctionLoweringInfo &FLI, Register SwiftErrorVReg) const { auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR); assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && @@ -420,7 +420,7 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, // Conservatively forward X8, since it might be used for an aggregate // return. if (!CCInfo.isAllocated(AArch64::X8)) { - Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass); + Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass); Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64)); } @@ -441,7 +441,7 @@ bool AArch64CallLowering::fallBackToDAGISel(const Function &F) const { bool AArch64CallLowering::lowerFormalArguments( MachineIRBuilder &MIRBuilder, const Function &F, - ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const { + ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const { MachineFunction &MF = MIRBuilder.getMF(); MachineBasicBlock &MBB = MIRBuilder.getMBB(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -623,25 +623,25 @@ bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC); MachineRegisterInfo &MRI = MF.getRegInfo(); - if (Info.IsVarArg) { - // Be conservative and disallow variadic memory operands to match SDAG's - // behaviour. - // FIXME: If the caller's calling convention is C, then we can - // potentially use its argument area. However, for cases like fastcc, - // we can't do anything. - for (unsigned i = 0; i < OutLocs.size(); ++i) { - auto &ArgLoc = OutLocs[i]; - if (ArgLoc.isRegLoc()) - continue; + if (Info.IsVarArg) { + // Be conservative and disallow variadic memory operands to match SDAG's + // behaviour. + // FIXME: If the caller's calling convention is C, then we can + // potentially use its argument area. However, for cases like fastcc, + // we can't do anything. + for (unsigned i = 0; i < OutLocs.size(); ++i) { + auto &ArgLoc = OutLocs[i]; + if (ArgLoc.isRegLoc()) + continue; LLVM_DEBUG( dbgs() - << "... Cannot tail call vararg function with stack arguments\n"); + << "... Cannot tail call vararg function with stack arguments\n"); return false; } } - return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs); + return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs); } bool AArch64CallLowering::isEligibleForTailCallOptimization( @@ -756,7 +756,7 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use // x16 or x17. - if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) + if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) return AArch64::TCRETURNriBTI; return AArch64::TCRETURNri; @@ -776,7 +776,7 @@ bool AArch64CallLowering::lowerTailCall( // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64 // register class. Until we can do that, we should fall back here. - if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) { + if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) { LLVM_DEBUG( dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n"); return false; @@ -894,9 +894,9 @@ bool AArch64CallLowering::lowerTailCall( // If Callee is a reg, since it is used by a target specific instruction, // it must have a register class matching the constraint of that instruction. if (Info.Callee.isReg()) - constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), - *MF.getSubtarget().getRegBankInfo(), *MIB, - MIB->getDesc(), Info.Callee, 0); + constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), + *MF.getSubtarget().getRegBankInfo(), *MIB, + MIB->getDesc(), Info.Callee, 0); MF.getFrameInfo().setHasTailCall(); Info.LoweredTailCall = true; @@ -978,9 +978,9 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // instruction, it must have a register class matching the // constraint of that instruction. if (Info.Callee.isReg()) - constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), - *MF.getSubtarget().getRegBankInfo(), *MIB, - MIB->getDesc(), Info.Callee, 0); + constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), + *MF.getSubtarget().getRegBankInfo(), *MIB, + MIB->getDesc(), Info.Callee, 0); // Finally we can copy the returned value back into its virtual-register. In // symmetry with the arguments, the physical register must be an diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h index 1f45c9ebc0..8054cf6b99 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h +++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h @@ -34,14 +34,14 @@ public: AArch64CallLowering(const AArch64TargetLowering &TLI); bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, - ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI, + ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI, Register SwiftErrorVReg) const override; bool fallBackToDAGISel(const Function &F) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, - ArrayRef<ArrayRef<Register>> VRegs, - FunctionLoweringInfo &FLI) const override; + ArrayRef<ArrayRef<Register>> VRegs, + FunctionLoweringInfo &FLI) const override; bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override; diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h index bed1136c7a..9536f0a596 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h +++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h @@ -1,29 +1,29 @@ -//===- AArch64GlobalISelUtils.h ----------------------------------*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file APIs for AArch64-specific helper functions used in the GlobalISel -/// pipeline. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H -#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H - -#include <cstdint> - -namespace llvm { -namespace AArch64GISelUtils { - -/// \returns true if \p C is a legal immediate operand for an arithmetic -/// instruction. -constexpr bool isLegalArithImmed(const uint64_t C) { - return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); -} - -} // namespace AArch64GISelUtils -} // namespace llvm - -#endif +//===- AArch64GlobalISelUtils.h ----------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file APIs for AArch64-specific helper functions used in the GlobalISel +/// pipeline. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H +#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H + +#include <cstdint> + +namespace llvm { +namespace AArch64GISelUtils { + +/// \returns true if \p C is a legal immediate operand for an arithmetic +/// instruction. +constexpr bool isLegalArithImmed(const uint64_t C) { + return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); +} + +} // namespace AArch64GISelUtils +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index fc5ef02e84..72f92065f3 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -18,7 +18,7 @@ #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" @@ -34,18 +34,18 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/IntrinsicsAArch64.h" -#include "llvm/Pass.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "aarch64-isel" using namespace llvm; -using namespace MIPatternMatch; +using namespace MIPatternMatch; namespace { @@ -103,23 +103,23 @@ private: bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; - ///@{ - /// Helper functions for selectCompareBranch. - bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, - MachineIRBuilder &MIB) const; - bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, - MachineIRBuilder &MIB) const; - bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, - MachineIRBuilder &MIB) const; - bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, + ///@{ + /// Helper functions for selectCompareBranch. + bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, + MachineIRBuilder &MIB) const; + bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, + MachineIRBuilder &MIB) const; + bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, + MachineIRBuilder &MIB) const; + bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const; - ///@} - + ///@} + bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; - bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; // Helper to generate an equivalent of scalar_to_vector into a new register, @@ -160,7 +160,7 @@ private: bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const; - bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const; unsigned emitConstantPoolEntry(const Constant *CPVal, MachineFunction &MF) const; @@ -173,72 +173,72 @@ private: MachineIRBuilder &MIRBuilder) const; // Emit an integer compare between LHS and RHS, which checks for Predicate. - MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, - MachineOperand &Predicate, - MachineIRBuilder &MIRBuilder) const; - - /// Emit a floating point comparison between \p LHS and \p RHS. - /// \p Pred if given is the intended predicate to use. - MachineInstr *emitFPCompare(Register LHS, Register RHS, - MachineIRBuilder &MIRBuilder, - Optional<CmpInst::Predicate> = None) const; - - MachineInstr *emitInstr(unsigned Opcode, - std::initializer_list<llvm::DstOp> DstOps, - std::initializer_list<llvm::SrcOp> SrcOps, - MachineIRBuilder &MIRBuilder, - const ComplexRendererFns &RenderFns = None) const; - /// Helper function to emit an add or sub instruction. - /// - /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above - /// in a specific order. - /// - /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. - /// - /// \code - /// const std::array<std::array<unsigned, 2>, 4> Table { - /// {{AArch64::ADDXri, AArch64::ADDWri}, - /// {AArch64::ADDXrs, AArch64::ADDWrs}, - /// {AArch64::ADDXrr, AArch64::ADDWrr}, - /// {AArch64::SUBXri, AArch64::SUBWri}, - /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; - /// \endcode - /// - /// Each row in the table corresponds to a different addressing mode. Each - /// column corresponds to a different register size. - /// - /// \attention Rows must be structured as follows: - /// - Row 0: The ri opcode variants - /// - Row 1: The rs opcode variants - /// - Row 2: The rr opcode variants - /// - Row 3: The ri opcode variants for negative immediates - /// - Row 4: The rx opcode variants - /// - /// \attention Columns must be structured as follows: - /// - Column 0: The 64-bit opcode variants - /// - Column 1: The 32-bit opcode variants - /// - /// \p Dst is the destination register of the binop to emit. - /// \p LHS is the left-hand operand of the binop to emit. - /// \p RHS is the right-hand operand of the binop to emit. - MachineInstr *emitAddSub( - const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, - Register Dst, MachineOperand &LHS, MachineOperand &RHS, - MachineIRBuilder &MIRBuilder) const; - MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, - MachineOperand &RHS, + MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, + MachineOperand &Predicate, + MachineIRBuilder &MIRBuilder) const; + + /// Emit a floating point comparison between \p LHS and \p RHS. + /// \p Pred if given is the intended predicate to use. + MachineInstr *emitFPCompare(Register LHS, Register RHS, + MachineIRBuilder &MIRBuilder, + Optional<CmpInst::Predicate> = None) const; + + MachineInstr *emitInstr(unsigned Opcode, + std::initializer_list<llvm::DstOp> DstOps, + std::initializer_list<llvm::SrcOp> SrcOps, + MachineIRBuilder &MIRBuilder, + const ComplexRendererFns &RenderFns = None) const; + /// Helper function to emit an add or sub instruction. + /// + /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above + /// in a specific order. + /// + /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. + /// + /// \code + /// const std::array<std::array<unsigned, 2>, 4> Table { + /// {{AArch64::ADDXri, AArch64::ADDWri}, + /// {AArch64::ADDXrs, AArch64::ADDWrs}, + /// {AArch64::ADDXrr, AArch64::ADDWrr}, + /// {AArch64::SUBXri, AArch64::SUBWri}, + /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; + /// \endcode + /// + /// Each row in the table corresponds to a different addressing mode. Each + /// column corresponds to a different register size. + /// + /// \attention Rows must be structured as follows: + /// - Row 0: The ri opcode variants + /// - Row 1: The rs opcode variants + /// - Row 2: The rr opcode variants + /// - Row 3: The ri opcode variants for negative immediates + /// - Row 4: The rx opcode variants + /// + /// \attention Columns must be structured as follows: + /// - Column 0: The 64-bit opcode variants + /// - Column 1: The 32-bit opcode variants + /// + /// \p Dst is the destination register of the binop to emit. + /// \p LHS is the left-hand operand of the binop to emit. + /// \p RHS is the right-hand operand of the binop to emit. + MachineInstr *emitAddSub( + const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, + Register Dst, MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, + MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; - MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, - MachineIRBuilder &MIRBuilder) const; - MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, - MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const; MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; - MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, + MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; - MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, - AArch64CC::CondCode CC, - MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, + AArch64CC::CondCode CC, + MachineIRBuilder &MIRBuilder) const; MachineInstr *emitExtractVectorElt(Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, Register VecReg, unsigned LaneIdx, @@ -250,25 +250,25 @@ private: MachineInstr *emitFMovForFConstant(MachineInstr &MI, MachineRegisterInfo &MRI) const; - /// Emit a CSet for an integer compare. - /// - /// \p DefReg is expected to be a 32-bit scalar register. + /// Emit a CSet for an integer compare. + /// + /// \p DefReg is expected to be a 32-bit scalar register. MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred, MachineIRBuilder &MIRBuilder) const; - /// Emit a CSet for a FP compare. - /// - /// \p Dst is expected to be a 32-bit scalar register. - MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, - MachineIRBuilder &MIRBuilder) const; - - /// Emit the overflow op for \p Opcode. - /// - /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, - /// G_USUBO, etc. - std::pair<MachineInstr *, AArch64CC::CondCode> - emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, - MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; - + /// Emit a CSet for a FP compare. + /// + /// \p Dst is expected to be a 32-bit scalar register. + MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, + MachineIRBuilder &MIRBuilder) const; + + /// Emit the overflow op for \p Opcode. + /// + /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, + /// G_USUBO, etc. + std::pair<MachineInstr *, AArch64CC::CondCode> + emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, + MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; + /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. /// \p IsNegative is true if the test should be "not zero". /// This will also optimize the test bit instruction when possible. @@ -276,11 +276,11 @@ private: MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const; - /// Emit a CB(N)Z instruction which branches to \p DestMBB. - MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, - MachineBasicBlock *DestMBB, - MachineIRBuilder &MIB) const; - + /// Emit a CB(N)Z instruction which branches to \p DestMBB. + MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, + MachineBasicBlock *DestMBB, + MachineIRBuilder &MIB) const; + // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. // We use these manually instead of using the importer since it doesn't // support SDNodeXForm. @@ -577,7 +577,7 @@ static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { getConstantVRegValWithLookThrough(Root.getReg(), MRI, true); if (!ValAndVReg) return None; - Immed = ValAndVReg->Value.getSExtValue(); + Immed = ValAndVReg->Value.getSExtValue(); } else return None; return Immed; @@ -865,7 +865,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, #ifndef NDEBUG ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI); assert(ValidCopy && "Invalid copy."); - (void)KnownValid; + (void)KnownValid; #endif return ValidCopy; }; @@ -1012,173 +1012,173 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { return GenericOpc; } -MachineInstr * -AArch64InstructionSelector::emitSelect(Register Dst, Register True, - Register False, AArch64CC::CondCode CC, - MachineIRBuilder &MIB) const { - MachineRegisterInfo &MRI = *MIB.getMRI(); - assert(RBI.getRegBank(False, MRI, TRI)->getID() == - RBI.getRegBank(True, MRI, TRI)->getID() && - "Expected both select operands to have the same regbank?"); - LLT Ty = MRI.getType(True); - if (Ty.isVector()) - return nullptr; - const unsigned Size = Ty.getSizeInBits(); - assert((Size == 32 || Size == 64) && - "Expected 32 bit or 64 bit select only?"); - const bool Is32Bit = Size == 32; - if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { - unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; - auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); - constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI); - return &*FCSel; - } - - // By default, we'll try and emit a CSEL. - unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; - bool Optimized = false; - auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, - &Optimized](Register &Reg, Register &OtherReg, - bool Invert) { - if (Optimized) - return false; - - // Attempt to fold: - // - // %sub = G_SUB 0, %x - // %select = G_SELECT cc, %reg, %sub - // - // Into: - // %select = CSNEG %reg, %x, cc - Register MatchReg; - if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) { - Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; - Reg = MatchReg; - if (Invert) { - CC = AArch64CC::getInvertedCondCode(CC); - std::swap(Reg, OtherReg); - } - return true; - } - - // Attempt to fold: - // - // %xor = G_XOR %x, -1 - // %select = G_SELECT cc, %reg, %xor - // - // Into: - // %select = CSINV %reg, %x, cc - if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) { - Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; - Reg = MatchReg; - if (Invert) { - CC = AArch64CC::getInvertedCondCode(CC); - std::swap(Reg, OtherReg); - } - return true; - } - - // Attempt to fold: - // - // %add = G_ADD %x, 1 - // %select = G_SELECT cc, %reg, %add - // - // Into: - // %select = CSINC %reg, %x, cc - if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) { - Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; - Reg = MatchReg; - if (Invert) { - CC = AArch64CC::getInvertedCondCode(CC); - std::swap(Reg, OtherReg); - } - return true; - } - +MachineInstr * +AArch64InstructionSelector::emitSelect(Register Dst, Register True, + Register False, AArch64CC::CondCode CC, + MachineIRBuilder &MIB) const { + MachineRegisterInfo &MRI = *MIB.getMRI(); + assert(RBI.getRegBank(False, MRI, TRI)->getID() == + RBI.getRegBank(True, MRI, TRI)->getID() && + "Expected both select operands to have the same regbank?"); + LLT Ty = MRI.getType(True); + if (Ty.isVector()) + return nullptr; + const unsigned Size = Ty.getSizeInBits(); + assert((Size == 32 || Size == 64) && + "Expected 32 bit or 64 bit select only?"); + const bool Is32Bit = Size == 32; + if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { + unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; + auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); + constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI); + return &*FCSel; + } + + // By default, we'll try and emit a CSEL. + unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; + bool Optimized = false; + auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, + &Optimized](Register &Reg, Register &OtherReg, + bool Invert) { + if (Optimized) + return false; + + // Attempt to fold: + // + // %sub = G_SUB 0, %x + // %select = G_SELECT cc, %reg, %sub + // + // Into: + // %select = CSNEG %reg, %x, cc + Register MatchReg; + if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) { + Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; + Reg = MatchReg; + if (Invert) { + CC = AArch64CC::getInvertedCondCode(CC); + std::swap(Reg, OtherReg); + } + return true; + } + + // Attempt to fold: + // + // %xor = G_XOR %x, -1 + // %select = G_SELECT cc, %reg, %xor + // + // Into: + // %select = CSINV %reg, %x, cc + if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) { + Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; + Reg = MatchReg; + if (Invert) { + CC = AArch64CC::getInvertedCondCode(CC); + std::swap(Reg, OtherReg); + } + return true; + } + + // Attempt to fold: + // + // %add = G_ADD %x, 1 + // %select = G_SELECT cc, %reg, %add + // + // Into: + // %select = CSINC %reg, %x, cc + if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) { + Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; + Reg = MatchReg; + if (Invert) { + CC = AArch64CC::getInvertedCondCode(CC); + std::swap(Reg, OtherReg); + } + return true; + } + return false; - }; - - // Helper lambda which tries to use CSINC/CSINV for the instruction when its - // true/false values are constants. - // FIXME: All of these patterns already exist in tablegen. We should be - // able to import these. - auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, - &Optimized]() { - if (Optimized) - return false; - auto TrueCst = getConstantVRegValWithLookThrough(True, MRI); - auto FalseCst = getConstantVRegValWithLookThrough(False, MRI); - if (!TrueCst && !FalseCst) - return false; - - Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; - if (TrueCst && FalseCst) { - int64_t T = TrueCst->Value.getSExtValue(); - int64_t F = FalseCst->Value.getSExtValue(); - - if (T == 0 && F == 1) { - // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc - Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; - True = ZReg; - False = ZReg; - return true; - } - - if (T == 0 && F == -1) { - // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc - Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; - True = ZReg; - False = ZReg; - return true; - } - } - - if (TrueCst) { - int64_t T = TrueCst->Value.getSExtValue(); - if (T == 1) { - // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc - Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; - True = False; - False = ZReg; - CC = AArch64CC::getInvertedCondCode(CC); - return true; - } - - if (T == -1) { - // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc - Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; - True = False; - False = ZReg; - CC = AArch64CC::getInvertedCondCode(CC); - return true; - } - } - - if (FalseCst) { - int64_t F = FalseCst->Value.getSExtValue(); - if (F == 1) { - // G_SELECT cc, t, 1 -> CSINC t, zreg, cc - Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; - False = ZReg; - return true; - } - - if (F == -1) { - // G_SELECT cc, t, -1 -> CSINC t, zreg, cc - Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; - False = ZReg; - return true; - } - } - return false; - }; - - Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); - Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); - Optimized |= TryOptSelectCst(); - auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); - constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); - return &*SelectInst; + }; + + // Helper lambda which tries to use CSINC/CSINV for the instruction when its + // true/false values are constants. + // FIXME: All of these patterns already exist in tablegen. We should be + // able to import these. + auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, + &Optimized]() { + if (Optimized) + return false; + auto TrueCst = getConstantVRegValWithLookThrough(True, MRI); + auto FalseCst = getConstantVRegValWithLookThrough(False, MRI); + if (!TrueCst && !FalseCst) + return false; + + Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; + if (TrueCst && FalseCst) { + int64_t T = TrueCst->Value.getSExtValue(); + int64_t F = FalseCst->Value.getSExtValue(); + + if (T == 0 && F == 1) { + // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc + Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; + True = ZReg; + False = ZReg; + return true; + } + + if (T == 0 && F == -1) { + // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc + Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; + True = ZReg; + False = ZReg; + return true; + } + } + + if (TrueCst) { + int64_t T = TrueCst->Value.getSExtValue(); + if (T == 1) { + // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc + Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; + True = False; + False = ZReg; + CC = AArch64CC::getInvertedCondCode(CC); + return true; + } + + if (T == -1) { + // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc + Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; + True = False; + False = ZReg; + CC = AArch64CC::getInvertedCondCode(CC); + return true; + } + } + + if (FalseCst) { + int64_t F = FalseCst->Value.getSExtValue(); + if (F == 1) { + // G_SELECT cc, t, 1 -> CSINC t, zreg, cc + Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; + False = ZReg; + return true; + } + + if (F == -1) { + // G_SELECT cc, t, -1 -> CSINC t, zreg, cc + Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; + False = ZReg; + return true; + } + } + return false; + }; + + Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); + Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); + Optimized |= TryOptSelectCst(); + auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); + constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); + return &*SelectInst; } static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { @@ -1308,7 +1308,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI); } if (VRegAndVal) - C = VRegAndVal->Value.getSExtValue(); + C = VRegAndVal->Value.getSExtValue(); break; } case TargetOpcode::G_ASHR: @@ -1318,7 +1318,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, auto VRegAndVal = getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); if (VRegAndVal) - C = VRegAndVal->Value.getSExtValue(); + C = VRegAndVal->Value.getSExtValue(); break; } } @@ -1420,9 +1420,9 @@ MachineInstr *AArch64InstructionSelector::emitTestBit( } bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( - MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, - MachineIRBuilder &MIB) const { - assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?"); + MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, + MachineIRBuilder &MIB) const { + assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?"); // Given something like this: // // %x = ...Something... @@ -1444,92 +1444,92 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( // Check if the AND has a constant on its RHS which we can use as a mask. // If it's a power of 2, then it's the same as checking a specific bit. // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) - auto MaybeBit = getConstantVRegValWithLookThrough( - AndInst.getOperand(2).getReg(), *MIB.getMRI()); - if (!MaybeBit) + auto MaybeBit = getConstantVRegValWithLookThrough( + AndInst.getOperand(2).getReg(), *MIB.getMRI()); + if (!MaybeBit) return false; - int32_t Bit = MaybeBit->Value.exactLogBase2(); - if (Bit < 0) - return false; - - Register TestReg = AndInst.getOperand(1).getReg(); + int32_t Bit = MaybeBit->Value.exactLogBase2(); + if (Bit < 0) + return false; + Register TestReg = AndInst.getOperand(1).getReg(); + // Emit a TB(N)Z. emitTestBit(TestReg, Bit, Invert, DstMBB, MIB); return true; } -MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, - bool IsNegative, - MachineBasicBlock *DestMBB, - MachineIRBuilder &MIB) const { - assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!"); - MachineRegisterInfo &MRI = *MIB.getMRI(); - assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() == - AArch64::GPRRegBankID && - "Expected GPRs only?"); - auto Ty = MRI.getType(CompareReg); - unsigned Width = Ty.getSizeInBits(); - assert(!Ty.isVector() && "Expected scalar only?"); - assert(Width <= 64 && "Expected width to be at most 64?"); - static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, - {AArch64::CBNZW, AArch64::CBNZX}}; - unsigned Opc = OpcTable[IsNegative][Width == 64]; - auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB); - constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); - return &*BranchMI; -} - -bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( - MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { - assert(FCmp.getOpcode() == TargetOpcode::G_FCMP); - assert(I.getOpcode() == TargetOpcode::G_BRCOND); - // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't - // totally clean. Some of them require two branches to implement. - auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate(); - emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB, - Pred); - AArch64CC::CondCode CC1, CC2; - changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2); +MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, + bool IsNegative, + MachineBasicBlock *DestMBB, + MachineIRBuilder &MIB) const { + assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!"); + MachineRegisterInfo &MRI = *MIB.getMRI(); + assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() == + AArch64::GPRRegBankID && + "Expected GPRs only?"); + auto Ty = MRI.getType(CompareReg); + unsigned Width = Ty.getSizeInBits(); + assert(!Ty.isVector() && "Expected scalar only?"); + assert(Width <= 64 && "Expected width to be at most 64?"); + static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, + {AArch64::CBNZW, AArch64::CBNZX}}; + unsigned Opc = OpcTable[IsNegative][Width == 64]; + auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB); + constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); + return &*BranchMI; +} + +bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( + MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { + assert(FCmp.getOpcode() == TargetOpcode::G_FCMP); + assert(I.getOpcode() == TargetOpcode::G_BRCOND); + // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't + // totally clean. Some of them require two branches to implement. + auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate(); + emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB, + Pred); + AArch64CC::CondCode CC1, CC2; + changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2); MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); - MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); - if (CC2 != AArch64CC::AL) - MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); - I.eraseFromParent(); - return true; -} - -bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( - MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { - assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); - assert(I.getOpcode() == TargetOpcode::G_BRCOND); - // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. - // - // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z - // instructions will not be produced, as they are conditional branch - // instructions that do not set flags. - if (!ProduceNonFlagSettingCondBr) + MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); + if (CC2 != AArch64CC::AL) + MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); + I.eraseFromParent(); + return true; +} + +bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( + MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { + assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); + assert(I.getOpcode() == TargetOpcode::G_BRCOND); + // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. + // + // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z + // instructions will not be produced, as they are conditional branch + // instructions that do not set flags. + if (!ProduceNonFlagSettingCondBr) return false; - MachineRegisterInfo &MRI = *MIB.getMRI(); - MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); - auto Pred = - static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate()); - Register LHS = ICmp.getOperand(2).getReg(); - Register RHS = ICmp.getOperand(3).getReg(); - - // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. + MachineRegisterInfo &MRI = *MIB.getMRI(); + MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); + auto Pred = + static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate()); + Register LHS = ICmp.getOperand(2).getReg(); + Register RHS = ICmp.getOperand(3).getReg(); + + // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); - MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); + MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); // When we can emit a TB(N)Z, prefer that. // // Handle non-commutative condition codes first. // Note that we don't want to do this when we have a G_AND because it can // become a tst. The tst will make the test bit in the TB(N)Z redundant. - if (VRegAndVal && !AndInst) { - int64_t C = VRegAndVal->Value.getSExtValue(); + if (VRegAndVal && !AndInst) { + int64_t C = VRegAndVal->Value.getSExtValue(); // When we have a greater-than comparison, we can just test if the msb is // zero. @@ -1550,97 +1550,97 @@ bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( } } - // Attempt to handle commutative condition codes. Right now, that's only - // eq/ne. - if (ICmpInst::isEquality(Pred)) { - if (!VRegAndVal) { - std::swap(RHS, LHS); - VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); - AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); - } - - if (VRegAndVal && VRegAndVal->Value == 0) { - // If there's a G_AND feeding into this branch, try to fold it away by - // emitting a TB(N)Z instead. - // - // Note: If we have LT, then it *is* possible to fold, but it wouldn't be - // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding - // would be redundant. - if (AndInst && - tryOptAndIntoCompareBranch( - *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) { - I.eraseFromParent(); - return true; - } - - // Otherwise, try to emit a CB(N)Z instead. - auto LHSTy = MRI.getType(LHS); - if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { - emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); - I.eraseFromParent(); - return true; - } - } - } - - return false; -} - -bool AArch64InstructionSelector::selectCompareBranchFedByICmp( - MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { - assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); - assert(I.getOpcode() == TargetOpcode::G_BRCOND); - if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) + // Attempt to handle commutative condition codes. Right now, that's only + // eq/ne. + if (ICmpInst::isEquality(Pred)) { + if (!VRegAndVal) { + std::swap(RHS, LHS); + VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); + AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); + } + + if (VRegAndVal && VRegAndVal->Value == 0) { + // If there's a G_AND feeding into this branch, try to fold it away by + // emitting a TB(N)Z instead. + // + // Note: If we have LT, then it *is* possible to fold, but it wouldn't be + // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding + // would be redundant. + if (AndInst && + tryOptAndIntoCompareBranch( + *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) { + I.eraseFromParent(); + return true; + } + + // Otherwise, try to emit a CB(N)Z instead. + auto LHSTy = MRI.getType(LHS); + if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { + emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); + I.eraseFromParent(); + return true; + } + } + } + + return false; +} + +bool AArch64InstructionSelector::selectCompareBranchFedByICmp( + MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { + assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); + assert(I.getOpcode() == TargetOpcode::G_BRCOND); + if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) return true; - - // Couldn't optimize. Emit a compare + a Bcc. - MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); - auto PredOp = ICmp.getOperand(1); - emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB); - const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( - static_cast<CmpInst::Predicate>(PredOp.getPredicate())); - MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); - I.eraseFromParent(); - return true; -} - -bool AArch64InstructionSelector::selectCompareBranch( - MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { - Register CondReg = I.getOperand(0).getReg(); - MachineInstr *CCMI = MRI.getVRegDef(CondReg); - if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) { - CondReg = CCMI->getOperand(1).getReg(); - CCMI = MRI.getVRegDef(CondReg); - } - - // Try to select the G_BRCOND using whatever is feeding the condition if - // possible. - MachineIRBuilder MIB(I); - unsigned CCMIOpc = CCMI->getOpcode(); - if (CCMIOpc == TargetOpcode::G_FCMP) - return selectCompareBranchFedByFCmp(I, *CCMI, MIB); - if (CCMIOpc == TargetOpcode::G_ICMP) - return selectCompareBranchFedByICmp(I, *CCMI, MIB); - - // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z - // instructions will not be produced, as they are conditional branch - // instructions that do not set flags. - if (ProduceNonFlagSettingCondBr) { - emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true, - I.getOperand(1).getMBB(), MIB); + + // Couldn't optimize. Emit a compare + a Bcc. + MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); + auto PredOp = ICmp.getOperand(1); + emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB); + const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( + static_cast<CmpInst::Predicate>(PredOp.getPredicate())); + MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); + I.eraseFromParent(); + return true; +} + +bool AArch64InstructionSelector::selectCompareBranch( + MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { + Register CondReg = I.getOperand(0).getReg(); + MachineInstr *CCMI = MRI.getVRegDef(CondReg); + if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) { + CondReg = CCMI->getOperand(1).getReg(); + CCMI = MRI.getVRegDef(CondReg); + } + + // Try to select the G_BRCOND using whatever is feeding the condition if + // possible. + MachineIRBuilder MIB(I); + unsigned CCMIOpc = CCMI->getOpcode(); + if (CCMIOpc == TargetOpcode::G_FCMP) + return selectCompareBranchFedByFCmp(I, *CCMI, MIB); + if (CCMIOpc == TargetOpcode::G_ICMP) + return selectCompareBranchFedByICmp(I, *CCMI, MIB); + + // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z + // instructions will not be produced, as they are conditional branch + // instructions that do not set flags. + if (ProduceNonFlagSettingCondBr) { + emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true, + I.getOperand(1).getMBB(), MIB); I.eraseFromParent(); return true; } - // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. - auto TstMI = - MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); - constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); - auto Bcc = MIB.buildInstr(AArch64::Bcc) - .addImm(AArch64CC::EQ) - .addMBB(I.getOperand(1).getMBB()); + // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. + auto TstMI = + MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); + constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); + auto Bcc = MIB.buildInstr(AArch64::Bcc) + .addImm(AArch64CC::EQ) + .addMBB(I.getOperand(1).getMBB()); I.eraseFromParent(); - return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); + return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); } /// Returns the element immediate value of a vector shift operand if found. @@ -1661,8 +1661,8 @@ static Optional<int64_t> getVectorShiftImm(Register Reg, return None; if (Idx == 1) - ImmVal = VRegAndVal->Value.getSExtValue(); - if (ImmVal != VRegAndVal->Value.getSExtValue()) + ImmVal = VRegAndVal->Value.getSExtValue(); + if (ImmVal != VRegAndVal->Value.getSExtValue()) return None; } @@ -1725,14 +1725,14 @@ bool AArch64InstructionSelector::selectVectorSHL( Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; } else if (Ty == LLT::vector(2, 32)) { Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; - } else if (Ty == LLT::vector(4, 16)) { - Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; - } else if (Ty == LLT::vector(8, 16)) { - Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; - } else if (Ty == LLT::vector(16, 8)) { - Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; - } else if (Ty == LLT::vector(8, 8)) { - Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; + } else if (Ty == LLT::vector(4, 16)) { + Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; + } else if (Ty == LLT::vector(8, 16)) { + Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; + } else if (Ty == LLT::vector(16, 8)) { + Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; + } else if (Ty == LLT::vector(8, 8)) { + Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; } else { LLVM_DEBUG(dbgs() << "Unhandled G_SHL type"); return false; @@ -1749,10 +1749,10 @@ bool AArch64InstructionSelector::selectVectorSHL( return true; } -bool AArch64InstructionSelector::selectVectorAshrLshr( +bool AArch64InstructionSelector::selectVectorAshrLshr( MachineInstr &I, MachineRegisterInfo &MRI) const { - assert(I.getOpcode() == TargetOpcode::G_ASHR || - I.getOpcode() == TargetOpcode::G_LSHR); + assert(I.getOpcode() == TargetOpcode::G_ASHR || + I.getOpcode() == TargetOpcode::G_LSHR); Register DstReg = I.getOperand(0).getReg(); const LLT Ty = MRI.getType(DstReg); Register Src1Reg = I.getOperand(1).getReg(); @@ -1761,40 +1761,40 @@ bool AArch64InstructionSelector::selectVectorAshrLshr( if (!Ty.isVector()) return false; - bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; - - // We expect the immediate case to be lowered in the PostLegalCombiner to - // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. - + bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; + + // We expect the immediate case to be lowered in the PostLegalCombiner to + // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. + // There is not a shift right register instruction, but the shift left // register instruction takes a signed value, where negative numbers specify a // right shift. unsigned Opc = 0; unsigned NegOpc = 0; - const TargetRegisterClass *RC = - getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI); + const TargetRegisterClass *RC = + getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI); if (Ty == LLT::vector(2, 64)) { - Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; + Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; NegOpc = AArch64::NEGv2i64; } else if (Ty == LLT::vector(4, 32)) { - Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; + Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; NegOpc = AArch64::NEGv4i32; } else if (Ty == LLT::vector(2, 32)) { - Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; + Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; NegOpc = AArch64::NEGv2i32; - } else if (Ty == LLT::vector(4, 16)) { - Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; - NegOpc = AArch64::NEGv4i16; - } else if (Ty == LLT::vector(8, 16)) { - Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; - NegOpc = AArch64::NEGv8i16; - } else if (Ty == LLT::vector(16, 8)) { - Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; - NegOpc = AArch64::NEGv16i8; - } else if (Ty == LLT::vector(8, 8)) { - Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; - NegOpc = AArch64::NEGv8i8; + } else if (Ty == LLT::vector(4, 16)) { + Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; + NegOpc = AArch64::NEGv4i16; + } else if (Ty == LLT::vector(8, 16)) { + Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; + NegOpc = AArch64::NEGv8i16; + } else if (Ty == LLT::vector(16, 8)) { + Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; + NegOpc = AArch64::NEGv16i8; + } else if (Ty == LLT::vector(8, 8)) { + Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; + NegOpc = AArch64::NEGv8i8; } else { LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type"); return false; @@ -1931,40 +1931,40 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { MRI.setType(DstReg, LLT::scalar(64)); return true; } - case AArch64::G_DUP: { - // Convert the type from p0 to s64 to help selection. - LLT DstTy = MRI.getType(I.getOperand(0).getReg()); - if (!DstTy.getElementType().isPointer()) - return false; - MachineIRBuilder MIB(I); - auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg()); - MRI.setType(I.getOperand(0).getReg(), - DstTy.changeElementType(LLT::scalar(64))); - MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); - I.getOperand(1).setReg(NewSrc.getReg(0)); - return true; - } - case TargetOpcode::G_UITOFP: - case TargetOpcode::G_SITOFP: { - // If both source and destination regbanks are FPR, then convert the opcode - // to G_SITOF so that the importer can select it to an fpr variant. - // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank - // copy. - Register SrcReg = I.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - LLT DstTy = MRI.getType(I.getOperand(0).getReg()); - if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) - return false; - - if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { - if (I.getOpcode() == TargetOpcode::G_SITOFP) - I.setDesc(TII.get(AArch64::G_SITOF)); - else - I.setDesc(TII.get(AArch64::G_UITOF)); - return true; - } - return false; - } + case AArch64::G_DUP: { + // Convert the type from p0 to s64 to help selection. + LLT DstTy = MRI.getType(I.getOperand(0).getReg()); + if (!DstTy.getElementType().isPointer()) + return false; + MachineIRBuilder MIB(I); + auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg()); + MRI.setType(I.getOperand(0).getReg(), + DstTy.changeElementType(LLT::scalar(64))); + MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); + I.getOperand(1).setReg(NewSrc.getReg(0)); + return true; + } + case TargetOpcode::G_UITOFP: + case TargetOpcode::G_SITOFP: { + // If both source and destination regbanks are FPR, then convert the opcode + // to G_SITOF so that the importer can select it to an fpr variant. + // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank + // copy. + Register SrcReg = I.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + LLT DstTy = MRI.getType(I.getOperand(0).getReg()); + if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) + return false; + + if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { + if (I.getOpcode() == TargetOpcode::G_SITOFP) + I.setDesc(TII.get(AArch64::G_SITOF)); + else + I.setDesc(TII.get(AArch64::G_UITOF)); + return true; + } + return false; + } default: return false; } @@ -2005,14 +2005,14 @@ bool AArch64InstructionSelector::convertPtrAddToAdd( LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"); return false; } - - // Also take the opportunity here to try to do some optimization. - // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. - Register NegatedReg; - if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg)))) - return true; - I.getOperand(2).setReg(NegatedReg); - I.setDesc(TII.get(TargetOpcode::G_SUB)); + + // Also take the opportunity here to try to do some optimization. + // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. + Register NegatedReg; + if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg)))) + return true; + I.getOperand(2).setReg(NegatedReg); + I.setDesc(TII.get(TargetOpcode::G_SUB)); return true; } @@ -2102,17 +2102,17 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { MachineRegisterInfo &MRI = MF.getRegInfo(); switch (I.getOpcode()) { - case TargetOpcode::G_BR: { - // If the branch jumps to the fallthrough block, don't bother emitting it. - // Only do this for -O0 for a good code size improvement, because when - // optimizations are enabled we want to leave this choice to - // MachineBlockPlacement. - bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None; - if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB())) - return false; - I.eraseFromParent(); - return true; - } + case TargetOpcode::G_BR: { + // If the branch jumps to the fallthrough block, don't bother emitting it. + // Only do this for -O0 for a good code size improvement, because when + // optimizations are enabled we want to leave this choice to + // MachineBlockPlacement. + bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None; + if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB())) + return false; + I.eraseFromParent(); + return true; + } case TargetOpcode::G_SHL: return earlySelectSHL(I, MRI); case TargetOpcode::G_CONSTANT: { @@ -2232,8 +2232,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { MachineIRBuilder MIB(I); switch (Opcode) { - case TargetOpcode::G_BRCOND: - return selectCompareBranch(I, MF, MRI); + case TargetOpcode::G_BRCOND: + return selectCompareBranch(I, MF, MRI); case TargetOpcode::G_BRINDIRECT: { I.setDesc(TII.get(AArch64::BR)); @@ -2313,7 +2313,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); - const LLT s128 = LLT::scalar(128); + const LLT s128 = LLT::scalar(128); const LLT p0 = LLT::pointer(0, 64); const Register DefReg = I.getOperand(0).getReg(); @@ -2323,10 +2323,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { // FIXME: Redundant check, but even less readable when factored out. if (isFP) { - if (Ty != s32 && Ty != s64 && Ty != s128) { + if (Ty != s32 && Ty != s64 && Ty != s128) { LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty << " constant, expected: " << s32 << " or " << s64 - << " or " << s128 << '\n'); + << " or " << s128 << '\n'); return false; } @@ -2339,9 +2339,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { // The case when we have 0.0 is covered by tablegen. Reject it here so we // can be sure tablegen works correctly and isn't rescued by this code. - // 0.0 is not covered by tablegen for FP128. So we will handle this - // scenario in the code here. - if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0)) + // 0.0 is not covered by tablegen for FP128. So we will handle this + // scenario in the code here. + if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0)) return false; } else { // s32 and s64 are covered by tablegen. @@ -2368,17 +2368,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { // Either emit a FMOV, or emit a copy to emit a normal mov. const TargetRegisterClass &GPRRC = DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass; - const TargetRegisterClass &FPRRC = - DefSize == 32 ? AArch64::FPR32RegClass - : (DefSize == 64 ? AArch64::FPR64RegClass - : AArch64::FPR128RegClass); + const TargetRegisterClass &FPRRC = + DefSize == 32 ? AArch64::FPR32RegClass + : (DefSize == 64 ? AArch64::FPR64RegClass + : AArch64::FPR128RegClass); // Can we use a FMOV instruction to represent the immediate? if (emitFMovForFConstant(I, MRI)) return true; // For 64b values, emit a constant pool load instead. - if (DefSize == 64 || DefSize == 128) { + if (DefSize == 64 || DefSize == 128) { auto *FPImm = I.getOperand(1).getFPImm(); MachineIRBuilder MIB(I); auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB); @@ -2571,21 +2571,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } auto &MemOp = **I.memoperands_begin(); - uint64_t MemSizeInBytes = MemOp.getSize(); + uint64_t MemSizeInBytes = MemOp.getSize(); if (MemOp.isAtomic()) { // For now we just support s8 acquire loads to be able to compile stack // protector code. if (MemOp.getOrdering() == AtomicOrdering::Acquire && - MemSizeInBytes == 1) { + MemSizeInBytes == 1) { I.setDesc(TII.get(AArch64::LDARB)); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n"); return false; } - unsigned MemSizeInBits = MemSizeInBytes * 8; + unsigned MemSizeInBits = MemSizeInBytes * 8; -#ifndef NDEBUG +#ifndef NDEBUG const Register PtrReg = I.getOperand(1).getReg(); const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); // Sanity-check the pointer register. @@ -2598,78 +2598,78 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { const Register ValReg = I.getOperand(0).getReg(); const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); - // Helper lambda for partially selecting I. Either returns the original - // instruction with an updated opcode, or a new instruction. - auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { - bool IsStore = I.getOpcode() == TargetOpcode::G_STORE; - const unsigned NewOpc = - selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); - if (NewOpc == I.getOpcode()) - return nullptr; - // Check if we can fold anything into the addressing mode. - auto AddrModeFns = - selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); - if (!AddrModeFns) { - // Can't fold anything. Use the original instruction. - I.setDesc(TII.get(NewOpc)); - I.addOperand(MachineOperand::CreateImm(0)); - return &I; + // Helper lambda for partially selecting I. Either returns the original + // instruction with an updated opcode, or a new instruction. + auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { + bool IsStore = I.getOpcode() == TargetOpcode::G_STORE; + const unsigned NewOpc = + selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); + if (NewOpc == I.getOpcode()) + return nullptr; + // Check if we can fold anything into the addressing mode. + auto AddrModeFns = + selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); + if (!AddrModeFns) { + // Can't fold anything. Use the original instruction. + I.setDesc(TII.get(NewOpc)); + I.addOperand(MachineOperand::CreateImm(0)); + return &I; } - // Folded something. Create a new instruction and return it. - auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); - IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg); - NewInst.cloneMemRefs(I); - for (auto &Fn : *AddrModeFns) - Fn(NewInst); - I.eraseFromParent(); - return &*NewInst; - }; + // Folded something. Create a new instruction and return it. + auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); + IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg); + NewInst.cloneMemRefs(I); + for (auto &Fn : *AddrModeFns) + Fn(NewInst); + I.eraseFromParent(); + return &*NewInst; + }; - MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); - if (!LoadStore) - return false; + MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); + if (!LoadStore) + return false; // If we're storing a 0, use WZR/XZR. - if (Opcode == TargetOpcode::G_STORE) { - auto CVal = getConstantVRegValWithLookThrough( - LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true, - /*HandleFConstants = */ false); - if (CVal && CVal->Value == 0) { - switch (LoadStore->getOpcode()) { - case AArch64::STRWui: - case AArch64::STRHHui: - case AArch64::STRBBui: - LoadStore->getOperand(0).setReg(AArch64::WZR); - break; - case AArch64::STRXui: - LoadStore->getOperand(0).setReg(AArch64::XZR); - break; - } + if (Opcode == TargetOpcode::G_STORE) { + auto CVal = getConstantVRegValWithLookThrough( + LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true, + /*HandleFConstants = */ false); + if (CVal && CVal->Value == 0) { + switch (LoadStore->getOpcode()) { + case AArch64::STRWui: + case AArch64::STRHHui: + case AArch64::STRBBui: + LoadStore->getOperand(0).setReg(AArch64::WZR); + break; + case AArch64::STRXui: + LoadStore->getOperand(0).setReg(AArch64::XZR); + break; + } } } if (IsZExtLoad) { - // The zextload from a smaller type to i32 should be handled by the - // importer. - if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) + // The zextload from a smaller type to i32 should be handled by the + // importer. + if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) return false; // If we have a ZEXTLOAD then change the load's type to be a narrower reg - // and zero_extend with SUBREG_TO_REG. + // and zero_extend with SUBREG_TO_REG. Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - Register DstReg = LoadStore->getOperand(0).getReg(); - LoadStore->getOperand(0).setReg(LdReg); + Register DstReg = LoadStore->getOperand(0).getReg(); + LoadStore->getOperand(0).setReg(LdReg); - MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator())); + MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator())); MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) .addImm(0) .addUse(LdReg) .addImm(AArch64::sub_32); - constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); + constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, MRI); } - return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); + return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); } case TargetOpcode::G_SMULH: @@ -2700,21 +2700,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { // operands to use appropriate classes. return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - case TargetOpcode::G_LSHR: + case TargetOpcode::G_LSHR: case TargetOpcode::G_ASHR: if (MRI.getType(I.getOperand(0).getReg()).isVector()) - return selectVectorAshrLshr(I, MRI); + return selectVectorAshrLshr(I, MRI); LLVM_FALLTHROUGH; case TargetOpcode::G_SHL: if (Opcode == TargetOpcode::G_SHL && MRI.getType(I.getOperand(0).getReg()).isVector()) return selectVectorSHL(I, MRI); LLVM_FALLTHROUGH; - case TargetOpcode::G_FADD: - case TargetOpcode::G_FSUB: - case TargetOpcode::G_FMUL: - case TargetOpcode::G_FDIV: - case TargetOpcode::G_OR: { + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_OR: { // Reject the various things we don't support yet. if (unsupportedBinOp(I, RBI, MRI, TRI)) return false; @@ -2743,24 +2743,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { I.eraseFromParent(); return true; } - case TargetOpcode::G_SADDO: - case TargetOpcode::G_UADDO: - case TargetOpcode::G_SSUBO: - case TargetOpcode::G_USUBO: { - // Emit the operation and get the correct condition code. + case TargetOpcode::G_SADDO: + case TargetOpcode::G_UADDO: + case TargetOpcode::G_SSUBO: + case TargetOpcode::G_USUBO: { + // Emit the operation and get the correct condition code. MachineIRBuilder MIRBuilder(I); - auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), - I.getOperand(2), I.getOperand(3), MIRBuilder); + auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), + I.getOperand(2), I.getOperand(3), MIRBuilder); // Now, put the overflow result in the register given by the first operand - // to the overflow op. CSINC increments the result when the predicate is - // false, so to get the increment when it's true, we need to use the - // inverse. In this case, we want to increment when carry is set. - Register ZReg = AArch64::WZR; + // to the overflow op. CSINC increments the result when the predicate is + // false, so to get the increment when it's true, we need to use the + // inverse. In this case, we want to increment when carry is set. + Register ZReg = AArch64::WZR; auto CsetMI = MIRBuilder .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, - {ZReg, ZReg}) - .addImm(getInvertedCondCode(OpAndCC.second)); + {ZReg, ZReg}) + .addImm(getInvertedCondCode(OpAndCC.second)); constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); I.eraseFromParent(); return true; @@ -2768,7 +2768,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_PTRMASK: { Register MaskReg = I.getOperand(2).getReg(); - Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI); + Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI); // TODO: Implement arbitrary cases if (!MaskVal || !isShiftedMask_64(*MaskVal)) return false; @@ -3059,15 +3059,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { if (tryOptSelect(I)) return true; - // Make sure to use an unused vreg instead of wzr, so that the peephole - // optimizations will be able to optimize these. - MachineIRBuilder MIB(I); - Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) - .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); - constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); - if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB)) - return false; + // Make sure to use an unused vreg instead of wzr, so that the peephole + // optimizations will be able to optimize these. + MachineIRBuilder MIB(I); + Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); + constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); + if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB)) + return false; I.eraseFromParent(); return true; } @@ -3082,21 +3082,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } MachineIRBuilder MIRBuilder(I); - auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); - emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), - MIRBuilder); + auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); + emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), + MIRBuilder); emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder); I.eraseFromParent(); return true; } case TargetOpcode::G_FCMP: { - MachineIRBuilder MIRBuilder(I); - CmpInst::Predicate Pred = - static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); - if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), - MIRBuilder, Pred) || - !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder)) + MachineIRBuilder MIRBuilder(I); + CmpInst::Predicate Pred = + static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); + if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), + MIRBuilder, Pred) || + !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder)) return false; I.eraseFromParent(); return true; @@ -3136,24 +3136,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); } } - case AArch64::G_DUP: { - // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by - // imported patterns. Do it manually here. Avoiding generating s16 gpr is - // difficult because at RBS we may end up pessimizing the fpr case if we - // decided to add an anyextend to fix this. Manual selection is the most - // robust solution for now. - Register SrcReg = I.getOperand(1).getReg(); - if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID) - return false; // We expect the fpr regbank case to be imported. - LLT SrcTy = MRI.getType(SrcReg); - if (SrcTy.getSizeInBits() == 16) - I.setDesc(TII.get(AArch64::DUPv8i16gpr)); - else if (SrcTy.getSizeInBits() == 8) - I.setDesc(TII.get(AArch64::DUPv16i8gpr)); - else - return false; - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); - } + case AArch64::G_DUP: { + // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by + // imported patterns. Do it manually here. Avoiding generating s16 gpr is + // difficult because at RBS we may end up pessimizing the fpr case if we + // decided to add an anyextend to fix this. Manual selection is the most + // robust solution for now. + Register SrcReg = I.getOperand(1).getReg(); + if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID) + return false; // We expect the fpr regbank case to be imported. + LLT SrcTy = MRI.getType(SrcReg); + if (SrcTy.getSizeInBits() == 16) + I.setDesc(TII.get(AArch64::DUPv8i16gpr)); + else if (SrcTy.getSizeInBits() == 8) + I.setDesc(TII.get(AArch64::DUPv16i8gpr)); + else + return false; + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } case TargetOpcode::G_INTRINSIC_TRUNC: return selectIntrinsicTrunc(I, MRI); case TargetOpcode::G_INTRINSIC_ROUND: @@ -3174,52 +3174,52 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return selectConcatVectors(I, MRI); case TargetOpcode::G_JUMP_TABLE: return selectJumpTable(I, MRI); - case TargetOpcode::G_VECREDUCE_FADD: - case TargetOpcode::G_VECREDUCE_ADD: - return selectReduction(I, MRI); - } - - return false; -} - -bool AArch64InstructionSelector::selectReduction( - MachineInstr &I, MachineRegisterInfo &MRI) const { - Register VecReg = I.getOperand(1).getReg(); - LLT VecTy = MRI.getType(VecReg); - if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) { - unsigned Opc = 0; - if (VecTy == LLT::vector(16, 8)) - Opc = AArch64::ADDVv16i8v; - else if (VecTy == LLT::vector(8, 16)) - Opc = AArch64::ADDVv8i16v; - else if (VecTy == LLT::vector(4, 32)) - Opc = AArch64::ADDVv4i32v; - else if (VecTy == LLT::vector(2, 64)) - Opc = AArch64::ADDPv2i64p; - else { - LLVM_DEBUG(dbgs() << "Unhandled type for add reduction"); - return false; - } - I.setDesc(TII.get(Opc)); - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + case TargetOpcode::G_VECREDUCE_FADD: + case TargetOpcode::G_VECREDUCE_ADD: + return selectReduction(I, MRI); } - if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) { - unsigned Opc = 0; - if (VecTy == LLT::vector(2, 32)) - Opc = AArch64::FADDPv2i32p; - else if (VecTy == LLT::vector(2, 64)) - Opc = AArch64::FADDPv2i64p; - else { - LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction"); - return false; - } - I.setDesc(TII.get(Opc)); - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); - } return false; } +bool AArch64InstructionSelector::selectReduction( + MachineInstr &I, MachineRegisterInfo &MRI) const { + Register VecReg = I.getOperand(1).getReg(); + LLT VecTy = MRI.getType(VecReg); + if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) { + unsigned Opc = 0; + if (VecTy == LLT::vector(16, 8)) + Opc = AArch64::ADDVv16i8v; + else if (VecTy == LLT::vector(8, 16)) + Opc = AArch64::ADDVv8i16v; + else if (VecTy == LLT::vector(4, 32)) + Opc = AArch64::ADDVv4i32v; + else if (VecTy == LLT::vector(2, 64)) + Opc = AArch64::ADDPv2i64p; + else { + LLVM_DEBUG(dbgs() << "Unhandled type for add reduction"); + return false; + } + I.setDesc(TII.get(Opc)); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } + + if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) { + unsigned Opc = 0; + if (VecTy == LLT::vector(2, 32)) + Opc = AArch64::FADDPv2i32p; + else if (VecTy == LLT::vector(2, 64)) + Opc = AArch64::FADDPv2i64p; + else { + LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction"); + return false; + } + I.setDesc(TII.get(Opc)); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } + return false; +} + bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const { assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"); @@ -3230,8 +3230,8 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); - - MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr); + + MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr); auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg}, {JTAddr, Index}) .addJumpTableIndex(JTI); @@ -3268,20 +3268,20 @@ bool AArch64InstructionSelector::selectTLSGlobalValue( const GlobalValue &GV = *I.getOperand(1).getGlobal(); MachineIRBuilder MIB(I); - auto LoadGOT = - MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {}) - .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); + auto LoadGOT = + MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {}) + .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, - {LoadGOT.getReg(0)}) + {LoadGOT.getReg(0)}) .addImm(0); - MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0)); + MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0)); // TLS calls preserve all registers except those that absolutely must be // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be // silly). MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load}) - .addUse(AArch64::X0, RegState::Implicit) + .addUse(AArch64::X0, RegState::Implicit) .addDef(AArch64::X0, RegState::Implicit) .addRegMask(TRI.getTLSCallPreservedMask()); @@ -3767,7 +3767,7 @@ bool AArch64InstructionSelector::selectExtractElt( (void)WideTy; assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && "source register size too small!"); - assert(!NarrowTy.isVector() && "cannot extract vector into vector!"); + assert(!NarrowTy.isVector() && "cannot extract vector into vector!"); // Need the lane index to determine the correct copy opcode. MachineOperand &LaneIdxOp = I.getOperand(2); @@ -3782,7 +3782,7 @@ bool AArch64InstructionSelector::selectExtractElt( auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI); if (!VRegAndVal) return false; - unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); + unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); MachineIRBuilder MIRBuilder(I); @@ -4005,10 +4005,10 @@ static std::pair<unsigned, unsigned> getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { unsigned Opc, SubregIdx; if (RB.getID() == AArch64::GPRRegBankID) { - if (EltSize == 16) { - Opc = AArch64::INSvi16gpr; - SubregIdx = AArch64::ssub; - } else if (EltSize == 32) { + if (EltSize == 16) { + Opc = AArch64::INSvi16gpr; + SubregIdx = AArch64::ssub; + } else if (EltSize == 32) { Opc = AArch64::INSvi32gpr; SubregIdx = AArch64::ssub; } else if (EltSize == 64) { @@ -4037,93 +4037,93 @@ getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { return std::make_pair(Opc, SubregIdx); } -MachineInstr *AArch64InstructionSelector::emitInstr( - unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, - std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, - const ComplexRendererFns &RenderFns) const { - assert(Opcode && "Expected an opcode?"); - assert(!isPreISelGenericOpcode(Opcode) && - "Function should only be used to produce selected instructions!"); - auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps); - if (RenderFns) - for (auto &Fn : *RenderFns) - Fn(MI); - constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); - return &*MI; -} - -MachineInstr *AArch64InstructionSelector::emitAddSub( - const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, - Register Dst, MachineOperand &LHS, MachineOperand &RHS, - MachineIRBuilder &MIRBuilder) const { - MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); - assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); - auto Ty = MRI.getType(LHS.getReg()); - assert(!Ty.isVector() && "Expected a scalar or pointer?"); - unsigned Size = Ty.getSizeInBits(); - assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"); - bool Is32Bit = Size == 32; - - // INSTRri form with positive arithmetic immediate. - if (auto Fns = selectArithImmed(RHS)) - return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS}, - MIRBuilder, Fns); - - // INSTRri form with negative arithmetic immediate. - if (auto Fns = selectNegArithImmed(RHS)) - return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS}, - MIRBuilder, Fns); - - // INSTRrx form. - if (auto Fns = selectArithExtendedRegister(RHS)) - return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS}, - MIRBuilder, Fns); - - // INSTRrs form. - if (auto Fns = selectShiftedRegister(RHS)) - return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS}, - MIRBuilder, Fns); - return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS}, - MIRBuilder); -} - +MachineInstr *AArch64InstructionSelector::emitInstr( + unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, + std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, + const ComplexRendererFns &RenderFns) const { + assert(Opcode && "Expected an opcode?"); + assert(!isPreISelGenericOpcode(Opcode) && + "Function should only be used to produce selected instructions!"); + auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps); + if (RenderFns) + for (auto &Fn : *RenderFns) + Fn(MI); + constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); + return &*MI; +} + +MachineInstr *AArch64InstructionSelector::emitAddSub( + const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, + Register Dst, MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); + assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); + auto Ty = MRI.getType(LHS.getReg()); + assert(!Ty.isVector() && "Expected a scalar or pointer?"); + unsigned Size = Ty.getSizeInBits(); + assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"); + bool Is32Bit = Size == 32; + + // INSTRri form with positive arithmetic immediate. + if (auto Fns = selectArithImmed(RHS)) + return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS}, + MIRBuilder, Fns); + + // INSTRri form with negative arithmetic immediate. + if (auto Fns = selectNegArithImmed(RHS)) + return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS}, + MIRBuilder, Fns); + + // INSTRrx form. + if (auto Fns = selectArithExtendedRegister(RHS)) + return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS}, + MIRBuilder, Fns); + + // INSTRrs form. + if (auto Fns = selectShiftedRegister(RHS)) + return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS}, + MIRBuilder, Fns); + return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS}, + MIRBuilder); +} + MachineInstr * AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { - const std::array<std::array<unsigned, 2>, 5> OpcTable{ - {{AArch64::ADDXri, AArch64::ADDWri}, - {AArch64::ADDXrs, AArch64::ADDWrs}, - {AArch64::ADDXrr, AArch64::ADDWrr}, - {AArch64::SUBXri, AArch64::SUBWri}, - {AArch64::ADDXrx, AArch64::ADDWrx}}}; - return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder); -} - -MachineInstr * -AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, - MachineOperand &RHS, - MachineIRBuilder &MIRBuilder) const { - const std::array<std::array<unsigned, 2>, 5> OpcTable{ - {{AArch64::ADDSXri, AArch64::ADDSWri}, - {AArch64::ADDSXrs, AArch64::ADDSWrs}, - {AArch64::ADDSXrr, AArch64::ADDSWrr}, - {AArch64::SUBSXri, AArch64::SUBSWri}, - {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; - return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); -} - -MachineInstr * -AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, - MachineOperand &RHS, - MachineIRBuilder &MIRBuilder) const { - const std::array<std::array<unsigned, 2>, 5> OpcTable{ - {{AArch64::SUBSXri, AArch64::SUBSWri}, - {AArch64::SUBSXrs, AArch64::SUBSWrs}, - {AArch64::SUBSXrr, AArch64::SUBSWrr}, - {AArch64::ADDSXri, AArch64::ADDSWri}, - {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; - return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); + const std::array<std::array<unsigned, 2>, 5> OpcTable{ + {{AArch64::ADDXri, AArch64::ADDWri}, + {AArch64::ADDXrs, AArch64::ADDWrs}, + {AArch64::ADDXrr, AArch64::ADDWrr}, + {AArch64::SUBXri, AArch64::SUBWri}, + {AArch64::ADDXrx, AArch64::ADDWrx}}}; + return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder); +} + +MachineInstr * +AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, + MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + const std::array<std::array<unsigned, 2>, 5> OpcTable{ + {{AArch64::ADDSXri, AArch64::ADDSWri}, + {AArch64::ADDSXrs, AArch64::ADDSWrs}, + {AArch64::ADDSXrr, AArch64::ADDSWrr}, + {AArch64::SUBSXri, AArch64::SUBSWri}, + {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; + return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); +} + +MachineInstr * +AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, + MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + const std::array<std::array<unsigned, 2>, 5> OpcTable{ + {{AArch64::SUBSXri, AArch64::SUBSWri}, + {AArch64::SUBSXrs, AArch64::SUBSWrs}, + {AArch64::SUBSXrr, AArch64::SUBSWrr}, + {AArch64::ADDSXri, AArch64::ADDSWri}, + {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; + return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); } MachineInstr * @@ -4131,129 +4131,129 @@ AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); - auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; - return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); + auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; + return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); } MachineInstr * -AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, +AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { - assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); + assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); - LLT Ty = MRI.getType(LHS.getReg()); - unsigned RegSize = Ty.getSizeInBits(); + LLT Ty = MRI.getType(LHS.getReg()); + unsigned RegSize = Ty.getSizeInBits(); bool Is32Bit = (RegSize == 32); - const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, - {AArch64::ANDSXrs, AArch64::ANDSWrs}, - {AArch64::ANDSXrr, AArch64::ANDSWrr}}; - // ANDS needs a logical immediate for its immediate form. Check if we can - // fold one in. - if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) { - int64_t Imm = ValAndVReg->Value.getSExtValue(); - - if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) { - auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS}); - TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); - constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); - return &*TstMI; - } - } - - if (auto Fns = selectLogicalShiftedRegister(RHS)) - return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns); - return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder); + const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, + {AArch64::ANDSXrs, AArch64::ANDSWrs}, + {AArch64::ANDSXrr, AArch64::ANDSWrr}}; + // ANDS needs a logical immediate for its immediate form. Check if we can + // fold one in. + if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) { + int64_t Imm = ValAndVReg->Value.getSExtValue(); + + if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) { + auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS}); + TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); + constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); + return &*TstMI; + } + } + + if (auto Fns = selectLogicalShiftedRegister(RHS)) + return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns); + return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder); } -MachineInstr *AArch64InstructionSelector::emitIntegerCompare( +MachineInstr *AArch64InstructionSelector::emitIntegerCompare( MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const { assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); assert(Predicate.isPredicate() && "Expected predicate?"); MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); - LLT CmpTy = MRI.getType(LHS.getReg()); - assert(!CmpTy.isVector() && "Expected scalar or pointer"); - unsigned Size = CmpTy.getSizeInBits(); - (void)Size; - assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?"); - // Fold the compare into a cmn or tst if possible. - if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) - return FoldCmp; - auto Dst = MRI.cloneVirtualRegister(LHS.getReg()); - return emitSUBS(Dst, LHS, RHS, MIRBuilder); -} - -MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( - Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { - MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); -#ifndef NDEBUG - LLT Ty = MRI.getType(Dst); - assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && - "Expected a 32-bit scalar register?"); -#endif - const Register ZeroReg = AArch64::WZR; - auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) { - auto CSet = - MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg}) - .addImm(getInvertedCondCode(CC)); - constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI); - return &*CSet; - }; - - AArch64CC::CondCode CC1, CC2; - changeFCMPPredToAArch64CC(Pred, CC1, CC2); - if (CC2 == AArch64CC::AL) - return EmitCSet(Dst, CC1); - - const TargetRegisterClass *RC = &AArch64::GPR32RegClass; - Register Def1Reg = MRI.createVirtualRegister(RC); - Register Def2Reg = MRI.createVirtualRegister(RC); - EmitCSet(Def1Reg, CC1); - EmitCSet(Def2Reg, CC2); - auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); - constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); - return &*OrMI; -} - -MachineInstr * -AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS, - MachineIRBuilder &MIRBuilder, - Optional<CmpInst::Predicate> Pred) const { - MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); - LLT Ty = MRI.getType(LHS); - if (Ty.isVector()) - return nullptr; - unsigned OpSize = Ty.getSizeInBits(); - if (OpSize != 32 && OpSize != 64) - return nullptr; - - // If this is a compare against +0.0, then we don't have - // to explicitly materialize a constant. - const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI); - bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); - - auto IsEqualityPred = [](CmpInst::Predicate P) { - return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || - P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; - }; - if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { - // Try commutating the operands. - const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI); - if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { - ShouldUseImm = true; - std::swap(LHS, RHS); - } - } - unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, - {AArch64::FCMPSri, AArch64::FCMPDri}}; - unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64]; - - // Partially build the compare. Decide if we need to add a use for the - // third operand based off whether or not we're comparing against 0.0. - auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS); - if (!ShouldUseImm) - CmpMI.addUse(RHS); + LLT CmpTy = MRI.getType(LHS.getReg()); + assert(!CmpTy.isVector() && "Expected scalar or pointer"); + unsigned Size = CmpTy.getSizeInBits(); + (void)Size; + assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?"); + // Fold the compare into a cmn or tst if possible. + if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) + return FoldCmp; + auto Dst = MRI.cloneVirtualRegister(LHS.getReg()); + return emitSUBS(Dst, LHS, RHS, MIRBuilder); +} + +MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( + Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); +#ifndef NDEBUG + LLT Ty = MRI.getType(Dst); + assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && + "Expected a 32-bit scalar register?"); +#endif + const Register ZeroReg = AArch64::WZR; + auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) { + auto CSet = + MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg}) + .addImm(getInvertedCondCode(CC)); + constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI); + return &*CSet; + }; + + AArch64CC::CondCode CC1, CC2; + changeFCMPPredToAArch64CC(Pred, CC1, CC2); + if (CC2 == AArch64CC::AL) + return EmitCSet(Dst, CC1); + + const TargetRegisterClass *RC = &AArch64::GPR32RegClass; + Register Def1Reg = MRI.createVirtualRegister(RC); + Register Def2Reg = MRI.createVirtualRegister(RC); + EmitCSet(Def1Reg, CC1); + EmitCSet(Def2Reg, CC2); + auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); + constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); + return &*OrMI; +} + +MachineInstr * +AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS, + MachineIRBuilder &MIRBuilder, + Optional<CmpInst::Predicate> Pred) const { + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + LLT Ty = MRI.getType(LHS); + if (Ty.isVector()) + return nullptr; + unsigned OpSize = Ty.getSizeInBits(); + if (OpSize != 32 && OpSize != 64) + return nullptr; + + // If this is a compare against +0.0, then we don't have + // to explicitly materialize a constant. + const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI); + bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); + + auto IsEqualityPred = [](CmpInst::Predicate P) { + return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || + P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; + }; + if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { + // Try commutating the operands. + const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI); + if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { + ShouldUseImm = true; + std::swap(LHS, RHS); + } + } + unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, + {AArch64::FCMPSri, AArch64::FCMPDri}}; + unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64]; + + // Partially build the compare. Decide if we need to add a use for the + // third operand based off whether or not we're comparing against 0.0. + auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS); + if (!ShouldUseImm) + CmpMI.addUse(RHS); constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); - return &*CmpMI; + return &*CmpMI; } MachineInstr *AArch64InstructionSelector::emitVectorConcat( @@ -4363,25 +4363,25 @@ AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred, return &*I; } -std::pair<MachineInstr *, AArch64CC::CondCode> -AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, - MachineOperand &LHS, - MachineOperand &RHS, - MachineIRBuilder &MIRBuilder) const { - switch (Opcode) { - default: - llvm_unreachable("Unexpected opcode!"); - case TargetOpcode::G_SADDO: - return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); - case TargetOpcode::G_UADDO: - return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); - case TargetOpcode::G_SSUBO: - return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); - case TargetOpcode::G_USUBO: - return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); - } -} - +std::pair<MachineInstr *, AArch64CC::CondCode> +AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, + MachineOperand &LHS, + MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode!"); + case TargetOpcode::G_SADDO: + return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); + case TargetOpcode::G_UADDO: + return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); + case TargetOpcode::G_SSUBO: + return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); + case TargetOpcode::G_USUBO: + return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); + } +} + bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { MachineIRBuilder MIB(I); MachineRegisterInfo &MRI = *MIB.getMRI(); @@ -4441,17 +4441,17 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { AArch64CC::CondCode CondCode; if (CondOpc == TargetOpcode::G_ICMP) { - auto Pred = - static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); + auto Pred = + static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); CondCode = changeICMPPredToAArch64CC(Pred); - emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), - CondDef->getOperand(1), MIB); + emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), + CondDef->getOperand(1), MIB); } else { // Get the condition code for the select. - auto Pred = - static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); + auto Pred = + static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); AArch64CC::CondCode CondCode2; - changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2); + changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2); // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two // instructions to emit the comparison. @@ -4460,16 +4460,16 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { if (CondCode2 != AArch64CC::AL) return false; - if (!emitFPCompare(CondDef->getOperand(2).getReg(), - CondDef->getOperand(3).getReg(), MIB)) { - LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); + if (!emitFPCompare(CondDef->getOperand(2).getReg(), + CondDef->getOperand(3).getReg(), MIB)) { + LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); return false; - } + } } // Emit the select. - emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(), - I.getOperand(3).getReg(), CondCode, MIB); + emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(), + I.getOperand(3).getReg(), CondCode, MIB); I.eraseFromParent(); return true; } @@ -4552,15 +4552,15 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( // Produce this if the compare is signed: // // tst x, y - if (!CmpInst::isUnsigned(P) && LHSDef && + if (!CmpInst::isUnsigned(P) && LHSDef && LHSDef->getOpcode() == TargetOpcode::G_AND) { // Make sure that the RHS is 0. auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI); if (!ValAndVReg || ValAndVReg->Value != 0) return nullptr; - return emitTST(LHSDef->getOperand(1), - LHSDef->getOperand(2), MIRBuilder); + return emitTST(LHSDef->getOperand(1), + LHSDef->getOperand(2), MIRBuilder); } return nullptr; @@ -4708,7 +4708,7 @@ bool AArch64InstructionSelector::selectInsertElt( auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI); if (!VRegAndVal) return false; - unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); + unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); // Perform the lane insert. Register SrcReg = I.getOperand(1).getReg(); @@ -4765,9 +4765,9 @@ bool AArch64InstructionSelector::selectInsertElt( bool AArch64InstructionSelector::tryOptConstantBuildVec( MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const { assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); - unsigned DstSize = DstTy.getSizeInBits(); - assert(DstSize <= 128 && "Unexpected build_vec type!"); - if (DstSize < 32) + unsigned DstSize = DstTy.getSizeInBits(); + assert(DstSize <= 128 && "Unexpected build_vec type!"); + if (DstSize < 32) return false; // Check if we're building a constant vector, in which case we want to // generate a constant pool load instead of a vector insert sequence. @@ -4788,24 +4788,24 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec( } Constant *CV = ConstantVector::get(Csts); MachineIRBuilder MIB(I); - if (CV->isNullValue()) { - // Until the importer can support immAllZerosV in pattern leaf nodes, - // select a zero move manually here. - Register DstReg = I.getOperand(0).getReg(); - if (DstSize == 128) { - auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0); - I.eraseFromParent(); - return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); - } else if (DstSize == 64) { - auto Mov = - MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) - .addImm(0); - MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) - .addReg(Mov.getReg(0), 0, AArch64::dsub); - I.eraseFromParent(); - return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI); - } - } + if (CV->isNullValue()) { + // Until the importer can support immAllZerosV in pattern leaf nodes, + // select a zero move manually here. + Register DstReg = I.getOperand(0).getReg(); + if (DstSize == 128) { + auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0); + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); + } else if (DstSize == 64) { + auto Mov = + MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) + .addImm(0); + MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) + .addReg(Mov.getReg(0), 0, AArch64::dsub); + I.eraseFromParent(); + return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI); + } + } auto *CPLoad = emitLoadFromConstantPool(CV, MIB); if (!CPLoad) { LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector"); @@ -4927,10 +4927,10 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( case Intrinsic::debugtrap: MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); break; - case Intrinsic::ubsantrap: - MIRBuilder.buildInstr(AArch64::BRK, {}, {}) - .addImm(I.getOperand(1).getImm() | ('U' << 8)); - break; + case Intrinsic::ubsantrap: + MIRBuilder.buildInstr(AArch64::BRK, {}, {}) + .addImm(I.getOperand(1).getImm() | ('U' << 8)); + break; } I.eraseFromParent(); @@ -4996,22 +4996,22 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { - if (!MFReturnAddr) { - // Insert the copy from LR/X30 into the entry block, before it can be - // clobbered by anything. - MFI.setReturnAddressIsTaken(true); - MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR, - AArch64::GPR64RegClass); - } - - if (STI.hasPAuth()) { - MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr}); - } else { - MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr}); - MIRBuilder.buildInstr(AArch64::XPACLRI); - MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); + if (!MFReturnAddr) { + // Insert the copy from LR/X30 into the entry block, before it can be + // clobbered by anything. + MFI.setReturnAddressIsTaken(true); + MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR, + AArch64::GPR64RegClass); } - + + if (STI.hasPAuth()) { + MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr}); + } else { + MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr}); + MIRBuilder.buildInstr(AArch64::XPACLRI); + MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); + } + I.eraseFromParent(); return true; } @@ -5031,16 +5031,16 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, MIRBuilder.buildCopy({DstReg}, {FrameAddr}); else { MFI.setReturnAddressIsTaken(true); - - if (STI.hasPAuth()) { - Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); - MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); - MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); - } else { - MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1); - MIRBuilder.buildInstr(AArch64::XPACLRI); - MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); - } + + if (STI.hasPAuth()) { + Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); + MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); + } else { + MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1); + MIRBuilder.buildInstr(AArch64::XPACLRI); + MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); + } } I.eraseFromParent(); @@ -5248,7 +5248,7 @@ AArch64InstructionSelector::selectExtendedSHL( // The value must fit into 3 bits, and must be positive. Make sure that is // true. - int64_t ImmVal = ValAndVReg->Value.getSExtValue(); + int64_t ImmVal = ValAndVReg->Value.getSExtValue(); // Since we're going to pull this into a shift, the constant value must be // a power of 2. If we got a multiply, then we need to check this. @@ -5388,60 +5388,60 @@ InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, unsigned SizeInBytes) const { MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); - if (!Root.isReg()) - return None; - MachineInstr *PtrAdd = - getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); - if (!PtrAdd) + if (!Root.isReg()) return None; - - // Check for an immediates which cannot be encoded in the [base + imm] - // addressing mode, and can't be encoded in an add/sub. If this happens, we'll - // end up with code like: - // - // mov x0, wide - // add x1 base, x0 - // ldr x2, [x1, x0] - // - // In this situation, we can use the [base, xreg] addressing mode to save an - // add/sub: - // - // mov x0, wide - // ldr x2, [base, x0] - auto ValAndVReg = - getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI); - if (ValAndVReg) { - unsigned Scale = Log2_32(SizeInBytes); - int64_t ImmOff = ValAndVReg->Value.getSExtValue(); - - // Skip immediates that can be selected in the load/store addresing - // mode. - if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && - ImmOff < (0x1000 << Scale)) - return None; - - // Helper lambda to decide whether or not it is preferable to emit an add. - auto isPreferredADD = [](int64_t ImmOff) { - // Constants in [0x0, 0xfff] can be encoded in an add. - if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) - return true; - - // Can it be encoded in an add lsl #12? - if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) - return false; - - // It can be encoded in an add lsl #12, but we may not want to. If it is - // possible to select this as a single movz, then prefer that. A single - // movz is faster than an add with a shift. - return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && - (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; - }; - - // If the immediate can be encoded in a single add/sub, then bail out. - if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) - return None; - } - + MachineInstr *PtrAdd = + getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); + if (!PtrAdd) + return None; + + // Check for an immediates which cannot be encoded in the [base + imm] + // addressing mode, and can't be encoded in an add/sub. If this happens, we'll + // end up with code like: + // + // mov x0, wide + // add x1 base, x0 + // ldr x2, [x1, x0] + // + // In this situation, we can use the [base, xreg] addressing mode to save an + // add/sub: + // + // mov x0, wide + // ldr x2, [base, x0] + auto ValAndVReg = + getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI); + if (ValAndVReg) { + unsigned Scale = Log2_32(SizeInBytes); + int64_t ImmOff = ValAndVReg->Value.getSExtValue(); + + // Skip immediates that can be selected in the load/store addresing + // mode. + if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && + ImmOff < (0x1000 << Scale)) + return None; + + // Helper lambda to decide whether or not it is preferable to emit an add. + auto isPreferredADD = [](int64_t ImmOff) { + // Constants in [0x0, 0xfff] can be encoded in an add. + if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) + return true; + + // Can it be encoded in an add lsl #12? + if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) + return false; + + // It can be encoded in an add lsl #12, but we may not want to. If it is + // possible to select this as a single movz, then prefer that. A single + // movz is faster than an add with a shift. + return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && + (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; + }; + + // If the immediate can be encoded in a single add/sub, then bail out. + if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) + return None; + } + // Try to fold shifts into the addressing mode. auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); if (AddrModeFns) @@ -5871,8 +5871,8 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT"); - Optional<int64_t> CstVal = - getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI); + Optional<int64_t> CstVal = + getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI); assert(CstVal && "Expected constant value"); MIB.addImm(CstVal.getValue()); } diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 5a6c904e3f..af24267bf2 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -14,7 +14,7 @@ #include "AArch64LegalizerInfo.h" #include "AArch64Subtarget.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" -#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineInstr.h" @@ -23,8 +23,8 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" -#include <initializer_list> -#include "llvm/Support/MathExtras.h" +#include <initializer_list> +#include "llvm/Support/MathExtras.h" #define DEBUG_TYPE "aarch64-legalinfo" @@ -56,13 +56,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) const LLT v2s64 = LLT::vector(2, 64); const LLT v2p0 = LLT::vector(2, p0); - std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */ - v16s8, v8s16, v4s32, - v2s64, v2p0, - /* End 128bit types */ - /* Begin 64bit types */ - v8s8, v4s16, v2s32}; - + std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */ + v16s8, v8s16, v4s32, + v2s64, v2p0, + /* End 128bit types */ + /* Begin 64bit types */ + v8s8, v4s16, v2s32}; + const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine(); // FIXME: support subtargets which have neon/fp-armv8 disabled. @@ -71,31 +71,31 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) return; } - // Some instructions only support s16 if the subtarget has full 16-bit FP - // support. - const bool HasFP16 = ST.hasFullFP16(); - const LLT &MinFPScalar = HasFP16 ? s16 : s32; - + // Some instructions only support s16 if the subtarget has full 16-bit FP + // support. + const bool HasFP16 = ST.hasFullFP16(); + const LLT &MinFPScalar = HasFP16 ? s16 : s32; + getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) - .legalFor({p0, s1, s8, s16, s32, s64}) - .legalFor(PackedVectorAllTypeList) - .clampScalar(0, s1, s64) - .widenScalarToNextPow2(0, 8) - .fewerElementsIf( - [=](const LegalityQuery &Query) { - return Query.Types[0].isVector() && - (Query.Types[0].getElementType() != s64 || - Query.Types[0].getNumElements() != 2); - }, - [=](const LegalityQuery &Query) { - LLT EltTy = Query.Types[0].getElementType(); - if (EltTy == s64) - return std::make_pair(0, LLT::vector(2, 64)); - return std::make_pair(0, EltTy); - }); - - getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64}) - .legalFor(PackedVectorAllTypeList) + .legalFor({p0, s1, s8, s16, s32, s64}) + .legalFor(PackedVectorAllTypeList) + .clampScalar(0, s1, s64) + .widenScalarToNextPow2(0, 8) + .fewerElementsIf( + [=](const LegalityQuery &Query) { + return Query.Types[0].isVector() && + (Query.Types[0].getElementType() != s64 || + Query.Types[0].getNumElements() != 2); + }, + [=](const LegalityQuery &Query) { + LLT EltTy = Query.Types[0].getElementType(); + if (EltTy == s64) + return std::make_pair(0, LLT::vector(2, 64)); + return std::make_pair(0, EltTy); + }); + + getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64}) + .legalFor(PackedVectorAllTypeList) .clampScalar(0, s16, s64) .widenScalarToNextPow2(0); @@ -105,38 +105,38 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .widenScalarToNextPow2(0); getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) - .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8}) - .scalarizeIf( - [=](const LegalityQuery &Query) { - return Query.Opcode == G_MUL && Query.Types[0] == v2s64; - }, - 0) - .legalFor({v2s64}) + .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8}) + .scalarizeIf( + [=](const LegalityQuery &Query) { + return Query.Opcode == G_MUL && Query.Types[0] == v2s64; + }, + 0) + .legalFor({v2s64}) .clampScalar(0, s32, s64) .widenScalarToNextPow2(0) .clampNumElements(0, v2s32, v4s32) .clampNumElements(0, v2s64, v2s64) .moreElementsToNextPow2(0); - getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR}) + getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR}) .customIf([=](const LegalityQuery &Query) { const auto &SrcTy = Query.Types[0]; const auto &AmtTy = Query.Types[1]; return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && AmtTy.getSizeInBits() == 32; }) - .legalFor({ - {s32, s32}, - {s32, s64}, - {s64, s64}, - {v8s8, v8s8}, - {v16s8, v16s8}, - {v4s16, v4s16}, - {v8s16, v8s16}, - {v2s32, v2s32}, - {v4s32, v4s32}, - {v2s64, v2s64}, - }) + .legalFor({ + {s32, s32}, + {s32, s64}, + {s64, s64}, + {v8s8, v8s8}, + {v16s8, v16s8}, + {v4s16, v4s16}, + {v8s16, v8s16}, + {v2s32, v2s32}, + {v4s32, v4s32}, + {v2s64, v2s64}, + }) .clampScalar(1, s32, s64) .clampScalar(0, s32, s64) .widenScalarToNextPow2(0) @@ -161,25 +161,25 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder({G_SREM, G_UREM}) .lowerFor({s1, s8, s16, s32, s64}); - getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}}); + getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}}); getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64}); - getActionDefinitionsBuilder( - {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO}) + getActionDefinitionsBuilder( + {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO}) .legalFor({{s32, s1}, {s64, s1}}) .minScalar(0, s32); getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG}) - .legalFor({s32, s64, v2s64, v4s32, v2s32}) - .clampNumElements(0, v2s32, v4s32) - .clampNumElements(0, v2s64, v2s64); + .legalFor({s32, s64, v2s64, v4s32, v2s32}) + .clampNumElements(0, v2s32, v4s32) + .clampNumElements(0, v2s64, v2s64); getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64}); getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT, G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, - G_FNEARBYINT, G_INTRINSIC_LRINT}) + G_FNEARBYINT, G_INTRINSIC_LRINT}) // If we don't have full FP16 support, then scalarize the elements of // vectors containing fp16 types. .fewerElementsIf( @@ -285,7 +285,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {v4s32, p0, 128, 8}, {v2s64, p0, 128, 8}}) // These extends are also legal - .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}}) + .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}}) .clampScalar(0, s8, s64) .lowerIfMemSizeNotPow2() // Lower any any-extending loads left into G_ANYEXT and G_LOAD @@ -307,7 +307,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {p0, p0, 64, 8}, {s128, p0, 128, 8}, {v16s8, p0, 128, 8}, - {v8s8, p0, 64, 8}, + {v8s8, p0, 64, 8}, {v4s16, p0, 64, 8}, {v8s16, p0, 128, 8}, {v2s32, p0, 64, 8}, @@ -325,19 +325,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) // Constants getActionDefinitionsBuilder(G_CONSTANT) - .legalFor({p0, s8, s16, s32, s64}) + .legalFor({p0, s8, s16, s32, s64}) .clampScalar(0, s8, s64) .widenScalarToNextPow2(0); getActionDefinitionsBuilder(G_FCONSTANT) - .legalIf([=](const LegalityQuery &Query) { - const auto &Ty = Query.Types[0]; - if (HasFP16 && Ty == s16) - return true; - return Ty == s32 || Ty == s64 || Ty == s128; - }) - .clampScalar(0, MinFPScalar, s128); - - getActionDefinitionsBuilder({G_ICMP, G_FCMP}) + .legalIf([=](const LegalityQuery &Query) { + const auto &Ty = Query.Types[0]; + if (HasFP16 && Ty == s16) + return true; + return Ty == s32 || Ty == s64 || Ty == s128; + }) + .clampScalar(0, MinFPScalar, s128); + + getActionDefinitionsBuilder({G_ICMP, G_FCMP}) .legalFor({{s32, s32}, {s32, s64}, {s32, p0}, @@ -365,8 +365,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .minScalarOrEltIf( [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0, s64) - .widenScalarOrEltToNextPow2(1) - .clampNumElements(0, v2s32, v4s32); + .widenScalarOrEltToNextPow2(1) + .clampNumElements(0, v2s32, v4s32); // Extensions auto ExtLegalFunc = [=](const LegalityQuery &Query) { @@ -374,7 +374,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) if (DstSize == 128 && !Query.Types[0].isVector()) return false; // Extending to a scalar s128 needs narrowing. - + // Make sure that we have something that will fit in a register, and // make sure it's a power of 2. if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) @@ -399,28 +399,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalIf(ExtLegalFunc) .clampScalar(0, s64, s64); // Just for s128, others are handled above. - getActionDefinitionsBuilder(G_TRUNC) - .minScalarOrEltIf( - [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); }, - 0, s8) - .customIf([=](const LegalityQuery &Query) { - LLT DstTy = Query.Types[0]; - LLT SrcTy = Query.Types[1]; - return DstTy == v8s8 && SrcTy.getSizeInBits() > 128; - }) - .alwaysLegal(); + getActionDefinitionsBuilder(G_TRUNC) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); }, + 0, s8) + .customIf([=](const LegalityQuery &Query) { + LLT DstTy = Query.Types[0]; + LLT SrcTy = Query.Types[1]; + return DstTy == v8s8 && SrcTy.getSizeInBits() > 128; + }) + .alwaysLegal(); - getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower(); + getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower(); // FP conversions - getActionDefinitionsBuilder(G_FPTRUNC) - .legalFor( - {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}) - .clampMaxNumElements(0, s32, 2); - getActionDefinitionsBuilder(G_FPEXT) - .legalFor( - {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}) - .clampMaxNumElements(0, s64, 2); + getActionDefinitionsBuilder(G_FPTRUNC) + .legalFor( + {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}) + .clampMaxNumElements(0, s32, 2); + getActionDefinitionsBuilder(G_FPEXT) + .legalFor( + {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}) + .clampMaxNumElements(0, s64, 2); // Conversions getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) @@ -433,7 +433,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) .clampScalar(1, s32, s64) - .minScalarSameAs(1, 0) + .minScalarSameAs(1, 0) .clampScalar(0, s32, s64) .widenScalarToNextPow2(0); @@ -445,8 +445,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalFor({{s32, s1}, {s64, s1}, {p0, s1}}) .clampScalar(0, s32, s64) .widenScalarToNextPow2(0) - .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0) - .lowerIf(isVector(0)); + .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0) + .lowerIf(isVector(0)); // Pointer-handling getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); @@ -576,8 +576,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0; }) // Any vectors left are the wrong size. Scalarize them. - .scalarize(0) - .scalarize(1); + .scalarize(0) + .scalarize(1); } getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT) @@ -589,40 +589,40 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalIf([=](const LegalityQuery &Query) { const LLT &VecTy = Query.Types[1]; return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 || - VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 || - VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0; - }) - .minScalarOrEltIf( - [=](const LegalityQuery &Query) { - // We want to promote to <M x s1> to <M x s64> if that wouldn't - // cause the total vec size to be > 128b. - return Query.Types[1].getNumElements() <= 2; - }, - 0, s64) - .minScalarOrEltIf( - [=](const LegalityQuery &Query) { - return Query.Types[1].getNumElements() <= 4; - }, - 0, s32) - .minScalarOrEltIf( - [=](const LegalityQuery &Query) { - return Query.Types[1].getNumElements() <= 8; - }, - 0, s16) - .minScalarOrEltIf( - [=](const LegalityQuery &Query) { - return Query.Types[1].getNumElements() <= 16; - }, - 0, s8) - .minScalarOrElt(0, s8); // Worst case, we need at least s8. + VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 || + VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0; + }) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { + // We want to promote to <M x s1> to <M x s64> if that wouldn't + // cause the total vec size to be > 128b. + return Query.Types[1].getNumElements() <= 2; + }, + 0, s64) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { + return Query.Types[1].getNumElements() <= 4; + }, + 0, s32) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { + return Query.Types[1].getNumElements() <= 8; + }, + 0, s16) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { + return Query.Types[1].getNumElements() <= 16; + }, + 0, s8) + .minScalarOrElt(0, s8); // Worst case, we need at least s8. getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) - .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64})); + .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64})); getActionDefinitionsBuilder(G_BUILD_VECTOR) - .legalFor({{v8s8, s8}, - {v16s8, s8}, - {v4s16, s16}, + .legalFor({{v8s8, s8}, + {v16s8, s8}, + {v4s16, s16}, {v8s16, s16}, {v2s32, s32}, {v4s32, s32}, @@ -638,9 +638,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) }) .minScalarSameAs(1, 0); - getActionDefinitionsBuilder(G_CTLZ) - .legalForCartesianProduct( - {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) + getActionDefinitionsBuilder(G_CTLZ) + .legalForCartesianProduct( + {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) .scalarize(1); getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) @@ -651,7 +651,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) // to be the same size as the dest. if (DstTy != SrcTy) return false; - for (auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) { + for (auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) { if (DstTy == Ty) return true; } @@ -668,7 +668,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_CONCAT_VECTORS) .legalFor({{v4s32, v2s32}, {v8s16, v4s16}}); - getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}}); + getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}}); getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) { return Query.Types[0] == p0 && Query.Types[1] == s64; @@ -676,20 +676,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); - getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); - - getActionDefinitionsBuilder(G_ABS).lowerIf( - [=](const LegalityQuery &Query) { return Query.Types[0].isScalar(); }); - - getActionDefinitionsBuilder(G_VECREDUCE_FADD) - // We only have FADDP to do reduction-like operations. Lower the rest. - .legalFor({{s32, v2s32}, {s64, v2s64}}) - .lower(); - - getActionDefinitionsBuilder(G_VECREDUCE_ADD) - .legalFor({{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s64, v2s64}}) - .lower(); - + getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); + + getActionDefinitionsBuilder(G_ABS).lowerIf( + [=](const LegalityQuery &Query) { return Query.Types[0].isScalar(); }); + + getActionDefinitionsBuilder(G_VECREDUCE_FADD) + // We only have FADDP to do reduction-like operations. Lower the rest. + .legalFor({{s32, v2s32}, {s64, v2s64}}) + .lower(); + + getActionDefinitionsBuilder(G_VECREDUCE_ADD) + .legalFor({{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s64, v2s64}}) + .lower(); + computeTables(); verify(*ST.getInstrInfo()); } @@ -714,63 +714,63 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer); case TargetOpcode::G_GLOBAL_VALUE: return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer); - case TargetOpcode::G_TRUNC: - return legalizeVectorTrunc(MI, Helper); + case TargetOpcode::G_TRUNC: + return legalizeVectorTrunc(MI, Helper); } llvm_unreachable("expected switch to return"); } -static void extractParts(Register Reg, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts, - SmallVectorImpl<Register> &VRegs) { - for (int I = 0; I < NumParts; ++I) - VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); - MIRBuilder.buildUnmerge(VRegs, Reg); -} - -bool AArch64LegalizerInfo::legalizeVectorTrunc( - MachineInstr &MI, LegalizerHelper &Helper) const { - MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; - MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); - // Similar to how operand splitting is done in SelectiondDAG, we can handle - // %res(v8s8) = G_TRUNC %in(v8s32) by generating: - // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>) - // %lo16(<4 x s16>) = G_TRUNC %inlo - // %hi16(<4 x s16>) = G_TRUNC %inhi - // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16 - // %res(<8 x s8>) = G_TRUNC %in16 - - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(SrcReg); - assert(isPowerOf2_32(DstTy.getSizeInBits()) && - isPowerOf2_32(SrcTy.getSizeInBits())); - - // Split input type. - LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2); - // First, split the source into two smaller vectors. - SmallVector<Register, 2> SplitSrcs; - extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs); - - // Truncate the splits into intermediate narrower elements. - LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2); - for (unsigned I = 0; I < SplitSrcs.size(); ++I) - SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0); - - auto Concat = MIRBuilder.buildConcatVectors( - DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs); - - Helper.Observer.changingInstr(MI); - MI.getOperand(1).setReg(Concat.getReg(0)); - Helper.Observer.changedInstr(MI); - return true; -} - -bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( - MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, - GISelChangeObserver &Observer) const { +static void extractParts(Register Reg, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts, + SmallVectorImpl<Register> &VRegs) { + for (int I = 0; I < NumParts; ++I) + VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); + MIRBuilder.buildUnmerge(VRegs, Reg); +} + +bool AArch64LegalizerInfo::legalizeVectorTrunc( + MachineInstr &MI, LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + // Similar to how operand splitting is done in SelectiondDAG, we can handle + // %res(v8s8) = G_TRUNC %in(v8s32) by generating: + // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>) + // %lo16(<4 x s16>) = G_TRUNC %inlo + // %hi16(<4 x s16>) = G_TRUNC %inhi + // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16 + // %res(<8 x s8>) = G_TRUNC %in16 + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + assert(isPowerOf2_32(DstTy.getSizeInBits()) && + isPowerOf2_32(SrcTy.getSizeInBits())); + + // Split input type. + LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2); + // First, split the source into two smaller vectors. + SmallVector<Register, 2> SplitSrcs; + extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs); + + // Truncate the splits into intermediate narrower elements. + LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2); + for (unsigned I = 0; I < SplitSrcs.size(); ++I) + SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0); + + auto Concat = MIRBuilder.buildConcatVectors( + DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs); + + Helper.Observer.changingInstr(MI); + MI.getOperand(1).setReg(Concat.getReg(0)); + Helper.Observer.changedInstr(MI); + return true; +} + +bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer) const { assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE); // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP + // G_ADD_LOW instructions. @@ -792,27 +792,27 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( // Set the regclass on the dest reg too. MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); - // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so - // by creating a MOVK that sets bits 48-63 of the register to (global address - // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to - // prevent an incorrect tag being generated during relocation when the the - // global appears before the code section. Without the offset, a global at - // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced - // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 = - // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe` - // instead of `0xf`. - // This assumes that we're in the small code model so we can assume a binary - // size of <= 4GB, which makes the untagged PC relative offset positive. The - // binary must also be loaded into address range [0, 2^48). Both of these - // properties need to be ensured at runtime when using tagged addresses. - if (OpFlags & AArch64II::MO_TAGGED) { - ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP}) - .addGlobalAddress(GV, 0x100000000, - AArch64II::MO_PREL | AArch64II::MO_G3) - .addImm(48); - MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); - } - + // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so + // by creating a MOVK that sets bits 48-63 of the register to (global address + // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to + // prevent an incorrect tag being generated during relocation when the the + // global appears before the code section. Without the offset, a global at + // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced + // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 = + // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe` + // instead of `0xf`. + // This assumes that we're in the small code model so we can assume a binary + // size of <= 4GB, which makes the untagged PC relative offset positive. The + // binary must also be loaded into address range [0, 2^48). Both of these + // properties need to be ensured at runtime when using tagged addresses. + if (OpFlags & AArch64II::MO_TAGGED) { + ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP}) + .addGlobalAddress(GV, 0x100000000, + AArch64II::MO_PREL | AArch64II::MO_G3) + .addImm(48); + MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); + } + MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP}) .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); @@ -820,8 +820,8 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( return true; } -bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, + MachineInstr &MI) const { return true; } @@ -838,13 +838,13 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr( if (!VRegAndVal) return true; // Check the shift amount is in range for an immediate form. - int64_t Amount = VRegAndVal->Value.getSExtValue(); + int64_t Amount = VRegAndVal->Value.getSExtValue(); if (Amount > 31) return true; // This will have to remain a register variant. auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount); - Observer.changingInstr(MI); + Observer.changingInstr(MI); MI.getOperand(2).setReg(ExtCst.getReg(0)); - Observer.changedInstr(MI); + Observer.changedInstr(MI); return true; } diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index 8217e37c85..c22cb26608 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -15,7 +15,7 @@ #define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" -#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" namespace llvm { @@ -46,7 +46,7 @@ private: bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const; - bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const; + bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const; const AArch64Subtarget *ST; }; } // End llvm namespace. diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index fdd04cb77f..bf3190ce93 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -1,22 +1,22 @@ -//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===// +//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -/// -/// \file -/// Post-legalization combines on generic MachineInstrs. -/// -/// The combines here must preserve instruction legality. -/// -/// Lowering combines (e.g. pseudo matching) should be handled by -/// AArch64PostLegalizerLowering. -/// -/// Combines which don't rely on instruction legality should go in the -/// AArch64PreLegalizerCombiner. -/// +/// +/// \file +/// Post-legalization combines on generic MachineInstrs. +/// +/// The combines here must preserve instruction legality. +/// +/// Lowering combines (e.g. pseudo matching) should be handled by +/// AArch64PostLegalizerLowering. +/// +/// Combines which don't rely on instruction legality should go in the +/// AArch64PreLegalizerCombiner. +/// //===----------------------------------------------------------------------===// #include "AArch64TargetMachine.h" @@ -24,12 +24,12 @@ #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" @@ -37,202 +37,202 @@ using namespace llvm; -/// This combine tries do what performExtractVectorEltCombine does in SDAG. -/// Rewrite for pairwise fadd pattern -/// (s32 (g_extract_vector_elt -/// (g_fadd (vXs32 Other) -/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0)) -/// -> -/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0) -/// (g_extract_vector_elt (vXs32 Other) 1)) -bool matchExtractVecEltPairwiseAdd( - MachineInstr &MI, MachineRegisterInfo &MRI, - std::tuple<unsigned, LLT, Register> &MatchInfo) { - Register Src1 = MI.getOperand(1).getReg(); - Register Src2 = MI.getOperand(2).getReg(); - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - - auto Cst = getConstantVRegValWithLookThrough(Src2, MRI); - if (!Cst || Cst->Value != 0) +/// This combine tries do what performExtractVectorEltCombine does in SDAG. +/// Rewrite for pairwise fadd pattern +/// (s32 (g_extract_vector_elt +/// (g_fadd (vXs32 Other) +/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0)) +/// -> +/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0) +/// (g_extract_vector_elt (vXs32 Other) 1)) +bool matchExtractVecEltPairwiseAdd( + MachineInstr &MI, MachineRegisterInfo &MRI, + std::tuple<unsigned, LLT, Register> &MatchInfo) { + Register Src1 = MI.getOperand(1).getReg(); + Register Src2 = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + auto Cst = getConstantVRegValWithLookThrough(Src2, MRI); + if (!Cst || Cst->Value != 0) return false; - // SDAG also checks for FullFP16, but this looks to be beneficial anyway. + // SDAG also checks for FullFP16, but this looks to be beneficial anyway. - // Now check for an fadd operation. TODO: expand this for integer add? - auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI); - if (!FAddMI) + // Now check for an fadd operation. TODO: expand this for integer add? + auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI); + if (!FAddMI) return false; - // If we add support for integer add, must restrict these types to just s64. - unsigned DstSize = DstTy.getSizeInBits(); - if (DstSize != 16 && DstSize != 32 && DstSize != 64) + // If we add support for integer add, must restrict these types to just s64. + unsigned DstSize = DstTy.getSizeInBits(); + if (DstSize != 16 && DstSize != 32 && DstSize != 64) return false; - Register Src1Op1 = FAddMI->getOperand(1).getReg(); - Register Src1Op2 = FAddMI->getOperand(2).getReg(); - MachineInstr *Shuffle = - getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI); - MachineInstr *Other = MRI.getVRegDef(Src1Op1); - if (!Shuffle) { - Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI); - Other = MRI.getVRegDef(Src1Op2); + Register Src1Op1 = FAddMI->getOperand(1).getReg(); + Register Src1Op2 = FAddMI->getOperand(2).getReg(); + MachineInstr *Shuffle = + getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI); + MachineInstr *Other = MRI.getVRegDef(Src1Op1); + if (!Shuffle) { + Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI); + Other = MRI.getVRegDef(Src1Op2); } - // We're looking for a shuffle that moves the second element to index 0. - if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 && - Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) { - std::get<0>(MatchInfo) = TargetOpcode::G_FADD; - std::get<1>(MatchInfo) = DstTy; - std::get<2>(MatchInfo) = Other->getOperand(0).getReg(); + // We're looking for a shuffle that moves the second element to index 0. + if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 && + Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) { + std::get<0>(MatchInfo) = TargetOpcode::G_FADD; + std::get<1>(MatchInfo) = DstTy; + std::get<2>(MatchInfo) = Other->getOperand(0).getReg(); return true; } return false; } -bool applyExtractVecEltPairwiseAdd( - MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, - std::tuple<unsigned, LLT, Register> &MatchInfo) { - unsigned Opc = std::get<0>(MatchInfo); - assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!"); - // We want to generate two extracts of elements 0 and 1, and add them. - LLT Ty = std::get<1>(MatchInfo); - Register Src = std::get<2>(MatchInfo); - LLT s64 = LLT::scalar(64); - B.setInstrAndDebugLoc(MI); - auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0)); - auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1)); - B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1}); - MI.eraseFromParent(); +bool applyExtractVecEltPairwiseAdd( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, + std::tuple<unsigned, LLT, Register> &MatchInfo) { + unsigned Opc = std::get<0>(MatchInfo); + assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!"); + // We want to generate two extracts of elements 0 and 1, and add them. + LLT Ty = std::get<1>(MatchInfo); + Register Src = std::get<2>(MatchInfo); + LLT s64 = LLT::scalar(64); + B.setInstrAndDebugLoc(MI); + auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0)); + auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1)); + B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1}); + MI.eraseFromParent(); return true; } -static bool isSignExtended(Register R, MachineRegisterInfo &MRI) { - // TODO: check if extended build vector as well. - unsigned Opc = MRI.getVRegDef(R)->getOpcode(); - return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG; +static bool isSignExtended(Register R, MachineRegisterInfo &MRI) { + // TODO: check if extended build vector as well. + unsigned Opc = MRI.getVRegDef(R)->getOpcode(); + return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG; } -static bool isZeroExtended(Register R, MachineRegisterInfo &MRI) { - // TODO: check if extended build vector as well. - return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT; +static bool isZeroExtended(Register R, MachineRegisterInfo &MRI) { + // TODO: check if extended build vector as well. + return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT; } -bool matchAArch64MulConstCombine( - MachineInstr &MI, MachineRegisterInfo &MRI, - std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) { - assert(MI.getOpcode() == TargetOpcode::G_MUL); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); - Register Dst = MI.getOperand(0).getReg(); - const LLT Ty = MRI.getType(LHS); - - // The below optimizations require a constant RHS. - auto Const = getConstantVRegValWithLookThrough(RHS, MRI); - if (!Const) +bool matchAArch64MulConstCombine( + MachineInstr &MI, MachineRegisterInfo &MRI, + std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) { + assert(MI.getOpcode() == TargetOpcode::G_MUL); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + Register Dst = MI.getOperand(0).getReg(); + const LLT Ty = MRI.getType(LHS); + + // The below optimizations require a constant RHS. + auto Const = getConstantVRegValWithLookThrough(RHS, MRI); + if (!Const) return false; - const APInt ConstValue = Const->Value.sextOrSelf(Ty.getSizeInBits()); - // The following code is ported from AArch64ISelLowering. - // Multiplication of a power of two plus/minus one can be done more - // cheaply as as shift+add/sub. For now, this is true unilaterally. If - // future CPUs have a cheaper MADD instruction, this may need to be - // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and - // 64-bit is 5 cycles, so this is always a win. - // More aggressively, some multiplications N0 * C can be lowered to - // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M, - // e.g. 6=3*2=(2+1)*2. - // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45 - // which equals to (1+2)*16-(1+2). - // TrailingZeroes is used to test if the mul can be lowered to - // shift+add+shift. - unsigned TrailingZeroes = ConstValue.countTrailingZeros(); - if (TrailingZeroes) { - // Conservatively do not lower to shift+add+shift if the mul might be - // folded into smul or umul. - if (MRI.hasOneNonDBGUse(LHS) && - (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI))) - return false; - // Conservatively do not lower to shift+add+shift if the mul might be - // folded into madd or msub. - if (MRI.hasOneNonDBGUse(Dst)) { - MachineInstr &UseMI = *MRI.use_instr_begin(Dst); - if (UseMI.getOpcode() == TargetOpcode::G_ADD || - UseMI.getOpcode() == TargetOpcode::G_SUB) - return false; - } - } - // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub - // and shift+add+shift. - APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes); - - unsigned ShiftAmt, AddSubOpc; - // Is the shifted value the LHS operand of the add/sub? - bool ShiftValUseIsLHS = true; - // Do we need to negate the result? - bool NegateResult = false; - - if (ConstValue.isNonNegative()) { - // (mul x, 2^N + 1) => (add (shl x, N), x) - // (mul x, 2^N - 1) => (sub (shl x, N), x) - // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M) - APInt SCVMinus1 = ShiftedConstValue - 1; - APInt CVPlus1 = ConstValue + 1; - if (SCVMinus1.isPowerOf2()) { - ShiftAmt = SCVMinus1.logBase2(); - AddSubOpc = TargetOpcode::G_ADD; - } else if (CVPlus1.isPowerOf2()) { - ShiftAmt = CVPlus1.logBase2(); - AddSubOpc = TargetOpcode::G_SUB; - } else - return false; - } else { - // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) - // (mul x, -(2^N + 1)) => - (add (shl x, N), x) - APInt CVNegPlus1 = -ConstValue + 1; - APInt CVNegMinus1 = -ConstValue - 1; - if (CVNegPlus1.isPowerOf2()) { - ShiftAmt = CVNegPlus1.logBase2(); - AddSubOpc = TargetOpcode::G_SUB; - ShiftValUseIsLHS = false; - } else if (CVNegMinus1.isPowerOf2()) { - ShiftAmt = CVNegMinus1.logBase2(); - AddSubOpc = TargetOpcode::G_ADD; - NegateResult = true; - } else - return false; - } - - if (NegateResult && TrailingZeroes) + const APInt ConstValue = Const->Value.sextOrSelf(Ty.getSizeInBits()); + // The following code is ported from AArch64ISelLowering. + // Multiplication of a power of two plus/minus one can be done more + // cheaply as as shift+add/sub. For now, this is true unilaterally. If + // future CPUs have a cheaper MADD instruction, this may need to be + // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and + // 64-bit is 5 cycles, so this is always a win. + // More aggressively, some multiplications N0 * C can be lowered to + // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M, + // e.g. 6=3*2=(2+1)*2. + // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45 + // which equals to (1+2)*16-(1+2). + // TrailingZeroes is used to test if the mul can be lowered to + // shift+add+shift. + unsigned TrailingZeroes = ConstValue.countTrailingZeros(); + if (TrailingZeroes) { + // Conservatively do not lower to shift+add+shift if the mul might be + // folded into smul or umul. + if (MRI.hasOneNonDBGUse(LHS) && + (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI))) + return false; + // Conservatively do not lower to shift+add+shift if the mul might be + // folded into madd or msub. + if (MRI.hasOneNonDBGUse(Dst)) { + MachineInstr &UseMI = *MRI.use_instr_begin(Dst); + if (UseMI.getOpcode() == TargetOpcode::G_ADD || + UseMI.getOpcode() == TargetOpcode::G_SUB) + return false; + } + } + // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub + // and shift+add+shift. + APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes); + + unsigned ShiftAmt, AddSubOpc; + // Is the shifted value the LHS operand of the add/sub? + bool ShiftValUseIsLHS = true; + // Do we need to negate the result? + bool NegateResult = false; + + if (ConstValue.isNonNegative()) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + // (mul x, 2^N - 1) => (sub (shl x, N), x) + // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M) + APInt SCVMinus1 = ShiftedConstValue - 1; + APInt CVPlus1 = ConstValue + 1; + if (SCVMinus1.isPowerOf2()) { + ShiftAmt = SCVMinus1.logBase2(); + AddSubOpc = TargetOpcode::G_ADD; + } else if (CVPlus1.isPowerOf2()) { + ShiftAmt = CVPlus1.logBase2(); + AddSubOpc = TargetOpcode::G_SUB; + } else + return false; + } else { + // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) + // (mul x, -(2^N + 1)) => - (add (shl x, N), x) + APInt CVNegPlus1 = -ConstValue + 1; + APInt CVNegMinus1 = -ConstValue - 1; + if (CVNegPlus1.isPowerOf2()) { + ShiftAmt = CVNegPlus1.logBase2(); + AddSubOpc = TargetOpcode::G_SUB; + ShiftValUseIsLHS = false; + } else if (CVNegMinus1.isPowerOf2()) { + ShiftAmt = CVNegMinus1.logBase2(); + AddSubOpc = TargetOpcode::G_ADD; + NegateResult = true; + } else + return false; + } + + if (NegateResult && TrailingZeroes) return false; - ApplyFn = [=](MachineIRBuilder &B, Register DstReg) { - auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt); - auto ShiftedVal = B.buildShl(Ty, LHS, Shift); - - Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS; - Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0); - auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS}); - assert(!(NegateResult && TrailingZeroes) && - "NegateResult and TrailingZeroes cannot both be true for now."); - // Negate the result. - if (NegateResult) { - B.buildSub(DstReg, B.buildConstant(Ty, 0), Res); - return; - } - // Shift the result. - if (TrailingZeroes) { - B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes)); - return; - } - B.buildCopy(DstReg, Res.getReg(0)); - }; + ApplyFn = [=](MachineIRBuilder &B, Register DstReg) { + auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt); + auto ShiftedVal = B.buildShl(Ty, LHS, Shift); + + Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS; + Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0); + auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS}); + assert(!(NegateResult && TrailingZeroes) && + "NegateResult and TrailingZeroes cannot both be true for now."); + // Negate the result. + if (NegateResult) { + B.buildSub(DstReg, B.buildConstant(Ty, 0), Res); + return; + } + // Shift the result. + if (TrailingZeroes) { + B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes)); + return; + } + B.buildCopy(DstReg, Res.getReg(0)); + }; return true; } -bool applyAArch64MulConstCombine( - MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, - std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) { - B.setInstrAndDebugLoc(MI); - ApplyFn(B, MI.getOperand(0).getReg()); +bool applyAArch64MulConstCombine( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, + std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) { + B.setInstrAndDebugLoc(MI); + ApplyFn(B, MI.getOperand(0).getReg()); MI.eraseFromParent(); return true; } @@ -348,7 +348,7 @@ INITIALIZE_PASS_END(AArch64PostLegalizerCombiner, DEBUG_TYPE, false) namespace llvm { -FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) { +FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) { return new AArch64PostLegalizerCombiner(IsOptNone); } } // end namespace llvm diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index a06ff4b541..0447c3e8a0 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -1,704 +1,704 @@ -//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Post-legalization lowering for instructions. -/// -/// This is used to offload pattern matching from the selector. -/// -/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually -/// a G_ZIP, G_UZP, etc. -/// -/// General optimization combines should be handled by either the -/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner. -/// -//===----------------------------------------------------------------------===// - -#include "AArch64TargetMachine.h" -#include "AArch64GlobalISelUtils.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/CodeGen/GlobalISel/Combiner.h" -#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" -#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" -#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/InitializePasses.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "aarch64-postlegalizer-lowering" - -using namespace llvm; -using namespace MIPatternMatch; -using namespace AArch64GISelUtils; - -/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR. -/// -/// Used for matching target-supported shuffles before codegen. -struct ShuffleVectorPseudo { - unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1) - Register Dst; ///< Destination register. - SmallVector<SrcOp, 2> SrcOps; ///< Source registers. - ShuffleVectorPseudo(unsigned Opc, Register Dst, - std::initializer_list<SrcOp> SrcOps) - : Opc(Opc), Dst(Dst), SrcOps(SrcOps){}; - ShuffleVectorPseudo() {} -}; - -/// Check if a vector shuffle corresponds to a REV instruction with the -/// specified blocksize. -static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts, - unsigned BlockSize) { - assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && - "Only possible block sizes for REV are: 16, 32, 64"); - assert(EltSize != 64 && "EltSize cannot be 64 for REV mask."); - - unsigned BlockElts = M[0] + 1; - - // If the first shuffle index is UNDEF, be optimistic. - if (M[0] < 0) - BlockElts = BlockSize / EltSize; - - if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize) - return false; - - for (unsigned i = 0; i < NumElts; ++i) { - // Ignore undef indices. - if (M[i] < 0) - continue; - if (static_cast<unsigned>(M[i]) != - (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) - return false; - } - - return true; -} - -/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts. -/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult. -static bool isTRNMask(ArrayRef<int> M, unsigned NumElts, - unsigned &WhichResult) { - if (NumElts % 2 != 0) - return false; - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned i = 0; i < NumElts; i += 2) { - if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) || - (M[i + 1] >= 0 && - static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult)) - return false; - } - return true; -} - -/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector -/// sources of the shuffle are different. -static Optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M, - unsigned NumElts) { - // Look for the first non-undef element. - auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); - if (FirstRealElt == M.end()) - return None; - - // Use APInt to handle overflow when calculating expected element. - unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); - APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); - - // The following shuffle indices must be the successive elements after the - // first real element. - if (any_of( - make_range(std::next(FirstRealElt), M.end()), - [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; })) - return None; - - // The index of an EXT is the first element if it is not UNDEF. - // Watch out for the beginning UNDEFs. The EXT index should be the expected - // value of the first element. E.g. - // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. - // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. - // ExpectedElt is the last mask index plus 1. - uint64_t Imm = ExpectedElt.getZExtValue(); - bool ReverseExt = false; - - // There are two difference cases requiring to reverse input vectors. - // For example, for vector <4 x i32> we have the following cases, - // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) - // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) - // For both cases, we finally use mask <5, 6, 7, 0>, which requires - // to reverse two input vectors. - if (Imm < NumElts) - ReverseExt = true; - else - Imm -= NumElts; - return std::make_pair(ReverseExt, Imm); -} - -/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. -/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. -static bool isUZPMask(ArrayRef<int> M, unsigned NumElts, - unsigned &WhichResult) { - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned i = 0; i != NumElts; ++i) { - // Skip undef indices. - if (M[i] < 0) - continue; - if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult) - return false; - } - return true; -} - -/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts. -/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult. -static bool isZipMask(ArrayRef<int> M, unsigned NumElts, - unsigned &WhichResult) { - if (NumElts % 2 != 0) - return false; - - // 0 means use ZIP1, 1 means use ZIP2. - WhichResult = (M[0] == 0 ? 0 : 1); - unsigned Idx = WhichResult * NumElts / 2; - for (unsigned i = 0; i != NumElts; i += 2) { - if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) || - (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts)) - return false; - Idx += 1; - } - return true; -} - -/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a -/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc. -static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI, - ShuffleVectorPseudo &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); - ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - LLT Ty = MRI.getType(Dst); - unsigned EltSize = Ty.getScalarSizeInBits(); - - // Element size for a rev cannot be 64. - if (EltSize == 64) - return false; - - unsigned NumElts = Ty.getNumElements(); - - // Try to produce G_REV64 - if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) { - MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src}); - return true; - } - - // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support. - // This should be identical to above, but with a constant 32 and constant - // 16. - return false; -} - -/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with -/// a G_TRN1 or G_TRN2 instruction. -static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI, - ShuffleVectorPseudo &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); - unsigned WhichResult; - ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); - Register Dst = MI.getOperand(0).getReg(); - unsigned NumElts = MRI.getType(Dst).getNumElements(); - if (!isTRNMask(ShuffleMask, NumElts, WhichResult)) - return false; - unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2; - Register V1 = MI.getOperand(1).getReg(); - Register V2 = MI.getOperand(2).getReg(); - MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); - return true; -} - -/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with -/// a G_UZP1 or G_UZP2 instruction. -/// -/// \param [in] MI - The shuffle vector instruction. -/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success. -static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI, - ShuffleVectorPseudo &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); - unsigned WhichResult; - ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); - Register Dst = MI.getOperand(0).getReg(); - unsigned NumElts = MRI.getType(Dst).getNumElements(); - if (!isUZPMask(ShuffleMask, NumElts, WhichResult)) - return false; - unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2; - Register V1 = MI.getOperand(1).getReg(); - Register V2 = MI.getOperand(2).getReg(); - MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); - return true; -} - -static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, - ShuffleVectorPseudo &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); - unsigned WhichResult; - ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); - Register Dst = MI.getOperand(0).getReg(); - unsigned NumElts = MRI.getType(Dst).getNumElements(); - if (!isZipMask(ShuffleMask, NumElts, WhichResult)) - return false; - unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2; - Register V1 = MI.getOperand(1).getReg(); - Register V2 = MI.getOperand(2).getReg(); - MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); - return true; -} - -/// Helper function for matchDup. -static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI, - MachineRegisterInfo &MRI, - ShuffleVectorPseudo &MatchInfo) { - if (Lane != 0) - return false; - - // Try to match a vector splat operation into a dup instruction. - // We're looking for this pattern: - // - // %scalar:gpr(s64) = COPY $x0 - // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF - // %cst0:gpr(s32) = G_CONSTANT i32 0 - // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32) - // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32) - // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>) - // - // ...into: - // %splat = G_DUP %scalar - - // Begin matching the insert. - auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT, - MI.getOperand(1).getReg(), MRI); - if (!InsMI) - return false; - // Match the undef vector operand. - if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), - MRI)) - return false; - - // Match the index constant 0. - if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt())) - return false; - - MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), - {InsMI->getOperand(2).getReg()}); - return true; -} - -/// Helper function for matchDup. -static bool matchDupFromBuildVector(int Lane, MachineInstr &MI, - MachineRegisterInfo &MRI, - ShuffleVectorPseudo &MatchInfo) { - assert(Lane >= 0 && "Expected positive lane?"); - // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the - // lane's definition directly. - auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, - MI.getOperand(1).getReg(), MRI); - if (!BuildVecMI) - return false; - Register Reg = BuildVecMI->getOperand(Lane + 1).getReg(); - MatchInfo = - ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg}); - return true; -} - -static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, - ShuffleVectorPseudo &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); - auto MaybeLane = getSplatIndex(MI); - if (!MaybeLane) - return false; - int Lane = *MaybeLane; - // If this is undef splat, generate it via "just" vdup, if possible. - if (Lane < 0) - Lane = 0; - if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo)) - return true; - if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo)) - return true; - return false; -} - -static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, - ShuffleVectorPseudo &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); - Register Dst = MI.getOperand(0).getReg(); - auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(), - MRI.getType(Dst).getNumElements()); - if (!ExtInfo) - return false; - bool ReverseExt; - uint64_t Imm; - std::tie(ReverseExt, Imm) = *ExtInfo; - Register V1 = MI.getOperand(1).getReg(); - Register V2 = MI.getOperand(2).getReg(); - if (ReverseExt) - std::swap(V1, V2); - uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; - Imm *= ExtFactor; - MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm}); - return true; -} - -/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo. -/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR. -static bool applyShuffleVectorPseudo(MachineInstr &MI, - ShuffleVectorPseudo &MatchInfo) { - MachineIRBuilder MIRBuilder(MI); - MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps); - MI.eraseFromParent(); - return true; -} - -/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT. -/// Special-cased because the constant operand must be emitted as a G_CONSTANT -/// for the imported tablegen patterns to work. -static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { - MachineIRBuilder MIRBuilder(MI); - // Tablegen patterns expect an i32 G_CONSTANT as the final op. - auto Cst = - MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm()); - MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, - {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst}); - MI.eraseFromParent(); - return true; -} - -/// isVShiftRImm - Check if this is a valid vector for the immediate -/// operand of a vector shift right operation. The value must be in the range: -/// 1 <= Value <= ElementBits for a right shift. -static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty, - int64_t &Cnt) { - assert(Ty.isVector() && "vector shift count is not a vector type"); - MachineInstr *MI = MRI.getVRegDef(Reg); - auto Cst = getBuildVectorConstantSplat(*MI, MRI); - if (!Cst) - return false; - Cnt = *Cst; - int64_t ElementBits = Ty.getScalarSizeInBits(); - return Cnt >= 1 && Cnt <= ElementBits; -} - -/// Match a vector G_ASHR or G_LSHR with a valid immediate shift. -static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, - int64_t &Imm) { - assert(MI.getOpcode() == TargetOpcode::G_ASHR || - MI.getOpcode() == TargetOpcode::G_LSHR); - LLT Ty = MRI.getType(MI.getOperand(1).getReg()); - if (!Ty.isVector()) - return false; - return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm); -} - -static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, - int64_t &Imm) { - unsigned Opc = MI.getOpcode(); - assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR); - unsigned NewOpc = - Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR; - MachineIRBuilder MIB(MI); - auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm); - MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef}); - MI.eraseFromParent(); - return true; -} - -/// Determine if it is possible to modify the \p RHS and predicate \p P of a -/// G_ICMP instruction such that the right-hand side is an arithmetic immediate. -/// -/// \returns A pair containing the updated immediate and predicate which may -/// be used to optimize the instruction. -/// -/// \note This assumes that the comparison has been legalized. -Optional<std::pair<uint64_t, CmpInst::Predicate>> -tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P, - const MachineRegisterInfo &MRI) { - const auto &Ty = MRI.getType(RHS); - if (Ty.isVector()) - return None; - unsigned Size = Ty.getSizeInBits(); - assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?"); - - // If the RHS is not a constant, or the RHS is already a valid arithmetic - // immediate, then there is nothing to change. - auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI); - if (!ValAndVReg) - return None; - uint64_t C = ValAndVReg->Value.getZExtValue(); - if (isLegalArithImmed(C)) - return None; - - // We have a non-arithmetic immediate. Check if adjusting the immediate and - // adjusting the predicate will result in a legal arithmetic immediate. - switch (P) { - default: - return None; - case CmpInst::ICMP_SLT: - case CmpInst::ICMP_SGE: - // Check for - // - // x slt c => x sle c - 1 - // x sge c => x sgt c - 1 - // - // When c is not the smallest possible negative number. - if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) || - (Size == 32 && static_cast<int32_t>(C) == INT32_MIN)) - return None; - P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; - C -= 1; - break; - case CmpInst::ICMP_ULT: - case CmpInst::ICMP_UGE: - // Check for - // - // x ult c => x ule c - 1 - // x uge c => x ugt c - 1 - // - // When c is not zero. - if (C == 0) - return None; - P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; - C -= 1; - break; - case CmpInst::ICMP_SLE: - case CmpInst::ICMP_SGT: - // Check for - // - // x sle c => x slt c + 1 - // x sgt c => s sge c + 1 - // - // When c is not the largest possible signed integer. - if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) || - (Size == 64 && static_cast<int64_t>(C) == INT64_MAX)) - return None; - P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; - C += 1; - break; - case CmpInst::ICMP_ULE: - case CmpInst::ICMP_UGT: - // Check for - // - // x ule c => x ult c + 1 - // x ugt c => s uge c + 1 - // - // When c is not the largest possible unsigned integer. - if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) || - (Size == 64 && C == UINT64_MAX)) - return None; - P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; - C += 1; - break; - } - - // Check if the new constant is valid, and return the updated constant and - // predicate if it is. - if (Size == 32) - C = static_cast<uint32_t>(C); - if (!isLegalArithImmed(C)) - return None; - return {{C, P}}; -} - -/// Determine whether or not it is possible to update the RHS and predicate of -/// a G_ICMP instruction such that the RHS will be selected as an arithmetic -/// immediate. -/// -/// \p MI - The G_ICMP instruction -/// \p MatchInfo - The new RHS immediate and predicate on success -/// -/// See tryAdjustICmpImmAndPred for valid transformations. -bool matchAdjustICmpImmAndPred( - MachineInstr &MI, const MachineRegisterInfo &MRI, - std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_ICMP); - Register RHS = MI.getOperand(3).getReg(); - auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); - if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) { - MatchInfo = *MaybeNewImmAndPred; - return true; - } - return false; -} - -bool applyAdjustICmpImmAndPred( - MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo, - MachineIRBuilder &MIB, GISelChangeObserver &Observer) { - MIB.setInstrAndDebugLoc(MI); - MachineOperand &RHS = MI.getOperand(3); - MachineRegisterInfo &MRI = *MIB.getMRI(); - auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()), - MatchInfo.first); - Observer.changingInstr(MI); - RHS.setReg(Cst->getOperand(0).getReg()); - MI.getOperand(1).setPredicate(MatchInfo.second); - Observer.changedInstr(MI); - return true; -} - -bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, - std::pair<unsigned, int> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); - Register Src1Reg = MI.getOperand(1).getReg(); - const LLT SrcTy = MRI.getType(Src1Reg); - const LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - - auto LaneIdx = getSplatIndex(MI); - if (!LaneIdx) - return false; - - // The lane idx should be within the first source vector. - if (*LaneIdx >= SrcTy.getNumElements()) - return false; - - if (DstTy != SrcTy) - return false; - - LLT ScalarTy = SrcTy.getElementType(); - unsigned ScalarSize = ScalarTy.getSizeInBits(); - - unsigned Opc = 0; - switch (SrcTy.getNumElements()) { - case 2: - if (ScalarSize == 64) - Opc = AArch64::G_DUPLANE64; - break; - case 4: - if (ScalarSize == 32) - Opc = AArch64::G_DUPLANE32; - break; - case 8: - if (ScalarSize == 16) - Opc = AArch64::G_DUPLANE16; - break; - case 16: - if (ScalarSize == 8) - Opc = AArch64::G_DUPLANE8; - break; - default: - break; - } - if (!Opc) - return false; - - MatchInfo.first = Opc; - MatchInfo.second = *LaneIdx; - return true; -} - -bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); - B.setInstrAndDebugLoc(MI); - auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second); - B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, - {MI.getOperand(1).getReg(), Lane}); - MI.eraseFromParent(); - return true; -} - -#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS -#include "AArch64GenPostLegalizeGILowering.inc" -#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS - -namespace { -#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H -#include "AArch64GenPostLegalizeGILowering.inc" -#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H - -class AArch64PostLegalizerLoweringInfo : public CombinerInfo { -public: - AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg; - - AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize) - : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, - /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize, - MinSize) { - if (!GeneratedRuleCfg.parseCommandLineOption()) - report_fatal_error("Invalid rule identifier"); - } - - virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, - MachineIRBuilder &B) const override; -}; - -bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer, - MachineInstr &MI, - MachineIRBuilder &B) const { - CombinerHelper Helper(Observer, B); - AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg); - return Generated.tryCombineAll(Observer, MI, B, Helper); -} - -#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP -#include "AArch64GenPostLegalizeGILowering.inc" -#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP - -class AArch64PostLegalizerLowering : public MachineFunctionPass { -public: - static char ID; - - AArch64PostLegalizerLowering(); - - StringRef getPassName() const override { - return "AArch64PostLegalizerLowering"; - } - - bool runOnMachineFunction(MachineFunction &MF) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; -}; -} // end anonymous namespace - -void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetPassConfig>(); - AU.setPreservesCFG(); - getSelectionDAGFallbackAnalysisUsage(AU); - MachineFunctionPass::getAnalysisUsage(AU); -} - -AArch64PostLegalizerLowering::AArch64PostLegalizerLowering() - : MachineFunctionPass(ID) { - initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry()); -} - -bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) { - if (MF.getProperties().hasProperty( - MachineFunctionProperties::Property::FailedISel)) - return false; - assert(MF.getProperties().hasProperty( - MachineFunctionProperties::Property::Legalized) && - "Expected a legalized function?"); - auto *TPC = &getAnalysis<TargetPassConfig>(); - const Function &F = MF.getFunction(); - AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize()); - Combiner C(PCInfo, TPC); - return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); -} - -char AArch64PostLegalizerLowering::ID = 0; -INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE, - "Lower AArch64 MachineInstrs after legalization", false, - false) -INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) -INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE, - "Lower AArch64 MachineInstrs after legalization", false, - false) - -namespace llvm { -FunctionPass *createAArch64PostLegalizerLowering() { - return new AArch64PostLegalizerLowering(); -} -} // end namespace llvm +//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Post-legalization lowering for instructions. +/// +/// This is used to offload pattern matching from the selector. +/// +/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually +/// a G_ZIP, G_UZP, etc. +/// +/// General optimization combines should be handled by either the +/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner. +/// +//===----------------------------------------------------------------------===// + +#include "AArch64TargetMachine.h" +#include "AArch64GlobalISelUtils.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "aarch64-postlegalizer-lowering" + +using namespace llvm; +using namespace MIPatternMatch; +using namespace AArch64GISelUtils; + +/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR. +/// +/// Used for matching target-supported shuffles before codegen. +struct ShuffleVectorPseudo { + unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1) + Register Dst; ///< Destination register. + SmallVector<SrcOp, 2> SrcOps; ///< Source registers. + ShuffleVectorPseudo(unsigned Opc, Register Dst, + std::initializer_list<SrcOp> SrcOps) + : Opc(Opc), Dst(Dst), SrcOps(SrcOps){}; + ShuffleVectorPseudo() {} +}; + +/// Check if a vector shuffle corresponds to a REV instruction with the +/// specified blocksize. +static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts, + unsigned BlockSize) { + assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && + "Only possible block sizes for REV are: 16, 32, 64"); + assert(EltSize != 64 && "EltSize cannot be 64 for REV mask."); + + unsigned BlockElts = M[0] + 1; + + // If the first shuffle index is UNDEF, be optimistic. + if (M[0] < 0) + BlockElts = BlockSize / EltSize; + + if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize) + return false; + + for (unsigned i = 0; i < NumElts; ++i) { + // Ignore undef indices. + if (M[i] < 0) + continue; + if (static_cast<unsigned>(M[i]) != + (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) + return false; + } + + return true; +} + +/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts. +/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult. +static bool isTRNMask(ArrayRef<int> M, unsigned NumElts, + unsigned &WhichResult) { + if (NumElts % 2 != 0) + return false; + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i < NumElts; i += 2) { + if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) || + (M[i + 1] >= 0 && + static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult)) + return false; + } + return true; +} + +/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector +/// sources of the shuffle are different. +static Optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M, + unsigned NumElts) { + // Look for the first non-undef element. + auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); + if (FirstRealElt == M.end()) + return None; + + // Use APInt to handle overflow when calculating expected element. + unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); + APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); + + // The following shuffle indices must be the successive elements after the + // first real element. + if (any_of( + make_range(std::next(FirstRealElt), M.end()), + [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; })) + return None; + + // The index of an EXT is the first element if it is not UNDEF. + // Watch out for the beginning UNDEFs. The EXT index should be the expected + // value of the first element. E.g. + // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. + // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. + // ExpectedElt is the last mask index plus 1. + uint64_t Imm = ExpectedElt.getZExtValue(); + bool ReverseExt = false; + + // There are two difference cases requiring to reverse input vectors. + // For example, for vector <4 x i32> we have the following cases, + // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) + // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) + // For both cases, we finally use mask <5, 6, 7, 0>, which requires + // to reverse two input vectors. + if (Imm < NumElts) + ReverseExt = true; + else + Imm -= NumElts; + return std::make_pair(ReverseExt, Imm); +} + +/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. +/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. +static bool isUZPMask(ArrayRef<int> M, unsigned NumElts, + unsigned &WhichResult) { + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i != NumElts; ++i) { + // Skip undef indices. + if (M[i] < 0) + continue; + if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult) + return false; + } + return true; +} + +/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts. +/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult. +static bool isZipMask(ArrayRef<int> M, unsigned NumElts, + unsigned &WhichResult) { + if (NumElts % 2 != 0) + return false; + + // 0 means use ZIP1, 1 means use ZIP2. + WhichResult = (M[0] == 0 ? 0 : 1); + unsigned Idx = WhichResult * NumElts / 2; + for (unsigned i = 0; i != NumElts; i += 2) { + if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) || + (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts)) + return false; + Idx += 1; + } + return true; +} + +/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a +/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc. +static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(Dst); + unsigned EltSize = Ty.getScalarSizeInBits(); + + // Element size for a rev cannot be 64. + if (EltSize == 64) + return false; + + unsigned NumElts = Ty.getNumElements(); + + // Try to produce G_REV64 + if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) { + MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src}); + return true; + } + + // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support. + // This should be identical to above, but with a constant 32 and constant + // 16. + return false; +} + +/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with +/// a G_TRN1 or G_TRN2 instruction. +static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + unsigned WhichResult; + ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); + Register Dst = MI.getOperand(0).getReg(); + unsigned NumElts = MRI.getType(Dst).getNumElements(); + if (!isTRNMask(ShuffleMask, NumElts, WhichResult)) + return false; + unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2; + Register V1 = MI.getOperand(1).getReg(); + Register V2 = MI.getOperand(2).getReg(); + MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); + return true; +} + +/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with +/// a G_UZP1 or G_UZP2 instruction. +/// +/// \param [in] MI - The shuffle vector instruction. +/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success. +static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + unsigned WhichResult; + ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); + Register Dst = MI.getOperand(0).getReg(); + unsigned NumElts = MRI.getType(Dst).getNumElements(); + if (!isUZPMask(ShuffleMask, NumElts, WhichResult)) + return false; + unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2; + Register V1 = MI.getOperand(1).getReg(); + Register V2 = MI.getOperand(2).getReg(); + MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); + return true; +} + +static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + unsigned WhichResult; + ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); + Register Dst = MI.getOperand(0).getReg(); + unsigned NumElts = MRI.getType(Dst).getNumElements(); + if (!isZipMask(ShuffleMask, NumElts, WhichResult)) + return false; + unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2; + Register V1 = MI.getOperand(1).getReg(); + Register V2 = MI.getOperand(2).getReg(); + MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); + return true; +} + +/// Helper function for matchDup. +static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI, + MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + if (Lane != 0) + return false; + + // Try to match a vector splat operation into a dup instruction. + // We're looking for this pattern: + // + // %scalar:gpr(s64) = COPY $x0 + // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF + // %cst0:gpr(s32) = G_CONSTANT i32 0 + // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32) + // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32) + // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>) + // + // ...into: + // %splat = G_DUP %scalar + + // Begin matching the insert. + auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT, + MI.getOperand(1).getReg(), MRI); + if (!InsMI) + return false; + // Match the undef vector operand. + if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), + MRI)) + return false; + + // Match the index constant 0. + if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt())) + return false; + + MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), + {InsMI->getOperand(2).getReg()}); + return true; +} + +/// Helper function for matchDup. +static bool matchDupFromBuildVector(int Lane, MachineInstr &MI, + MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(Lane >= 0 && "Expected positive lane?"); + // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the + // lane's definition directly. + auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, + MI.getOperand(1).getReg(), MRI); + if (!BuildVecMI) + return false; + Register Reg = BuildVecMI->getOperand(Lane + 1).getReg(); + MatchInfo = + ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg}); + return true; +} + +static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + auto MaybeLane = getSplatIndex(MI); + if (!MaybeLane) + return false; + int Lane = *MaybeLane; + // If this is undef splat, generate it via "just" vdup, if possible. + if (Lane < 0) + Lane = 0; + if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo)) + return true; + if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo)) + return true; + return false; +} + +static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + Register Dst = MI.getOperand(0).getReg(); + auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(), + MRI.getType(Dst).getNumElements()); + if (!ExtInfo) + return false; + bool ReverseExt; + uint64_t Imm; + std::tie(ReverseExt, Imm) = *ExtInfo; + Register V1 = MI.getOperand(1).getReg(); + Register V2 = MI.getOperand(2).getReg(); + if (ReverseExt) + std::swap(V1, V2); + uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; + Imm *= ExtFactor; + MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm}); + return true; +} + +/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo. +/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR. +static bool applyShuffleVectorPseudo(MachineInstr &MI, + ShuffleVectorPseudo &MatchInfo) { + MachineIRBuilder MIRBuilder(MI); + MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps); + MI.eraseFromParent(); + return true; +} + +/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT. +/// Special-cased because the constant operand must be emitted as a G_CONSTANT +/// for the imported tablegen patterns to work. +static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { + MachineIRBuilder MIRBuilder(MI); + // Tablegen patterns expect an i32 G_CONSTANT as the final op. + auto Cst = + MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm()); + MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, + {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst}); + MI.eraseFromParent(); + return true; +} + +/// isVShiftRImm - Check if this is a valid vector for the immediate +/// operand of a vector shift right operation. The value must be in the range: +/// 1 <= Value <= ElementBits for a right shift. +static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty, + int64_t &Cnt) { + assert(Ty.isVector() && "vector shift count is not a vector type"); + MachineInstr *MI = MRI.getVRegDef(Reg); + auto Cst = getBuildVectorConstantSplat(*MI, MRI); + if (!Cst) + return false; + Cnt = *Cst; + int64_t ElementBits = Ty.getScalarSizeInBits(); + return Cnt >= 1 && Cnt <= ElementBits; +} + +/// Match a vector G_ASHR or G_LSHR with a valid immediate shift. +static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, + int64_t &Imm) { + assert(MI.getOpcode() == TargetOpcode::G_ASHR || + MI.getOpcode() == TargetOpcode::G_LSHR); + LLT Ty = MRI.getType(MI.getOperand(1).getReg()); + if (!Ty.isVector()) + return false; + return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm); +} + +static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, + int64_t &Imm) { + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR); + unsigned NewOpc = + Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR; + MachineIRBuilder MIB(MI); + auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm); + MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef}); + MI.eraseFromParent(); + return true; +} + +/// Determine if it is possible to modify the \p RHS and predicate \p P of a +/// G_ICMP instruction such that the right-hand side is an arithmetic immediate. +/// +/// \returns A pair containing the updated immediate and predicate which may +/// be used to optimize the instruction. +/// +/// \note This assumes that the comparison has been legalized. +Optional<std::pair<uint64_t, CmpInst::Predicate>> +tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P, + const MachineRegisterInfo &MRI) { + const auto &Ty = MRI.getType(RHS); + if (Ty.isVector()) + return None; + unsigned Size = Ty.getSizeInBits(); + assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?"); + + // If the RHS is not a constant, or the RHS is already a valid arithmetic + // immediate, then there is nothing to change. + auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI); + if (!ValAndVReg) + return None; + uint64_t C = ValAndVReg->Value.getZExtValue(); + if (isLegalArithImmed(C)) + return None; + + // We have a non-arithmetic immediate. Check if adjusting the immediate and + // adjusting the predicate will result in a legal arithmetic immediate. + switch (P) { + default: + return None; + case CmpInst::ICMP_SLT: + case CmpInst::ICMP_SGE: + // Check for + // + // x slt c => x sle c - 1 + // x sge c => x sgt c - 1 + // + // When c is not the smallest possible negative number. + if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) || + (Size == 32 && static_cast<int32_t>(C) == INT32_MIN)) + return None; + P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; + C -= 1; + break; + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_UGE: + // Check for + // + // x ult c => x ule c - 1 + // x uge c => x ugt c - 1 + // + // When c is not zero. + if (C == 0) + return None; + P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; + C -= 1; + break; + case CmpInst::ICMP_SLE: + case CmpInst::ICMP_SGT: + // Check for + // + // x sle c => x slt c + 1 + // x sgt c => s sge c + 1 + // + // When c is not the largest possible signed integer. + if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) || + (Size == 64 && static_cast<int64_t>(C) == INT64_MAX)) + return None; + P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; + C += 1; + break; + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_UGT: + // Check for + // + // x ule c => x ult c + 1 + // x ugt c => s uge c + 1 + // + // When c is not the largest possible unsigned integer. + if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) || + (Size == 64 && C == UINT64_MAX)) + return None; + P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; + C += 1; + break; + } + + // Check if the new constant is valid, and return the updated constant and + // predicate if it is. + if (Size == 32) + C = static_cast<uint32_t>(C); + if (!isLegalArithImmed(C)) + return None; + return {{C, P}}; +} + +/// Determine whether or not it is possible to update the RHS and predicate of +/// a G_ICMP instruction such that the RHS will be selected as an arithmetic +/// immediate. +/// +/// \p MI - The G_ICMP instruction +/// \p MatchInfo - The new RHS immediate and predicate on success +/// +/// See tryAdjustICmpImmAndPred for valid transformations. +bool matchAdjustICmpImmAndPred( + MachineInstr &MI, const MachineRegisterInfo &MRI, + std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_ICMP); + Register RHS = MI.getOperand(3).getReg(); + auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) { + MatchInfo = *MaybeNewImmAndPred; + return true; + } + return false; +} + +bool applyAdjustICmpImmAndPred( + MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo, + MachineIRBuilder &MIB, GISelChangeObserver &Observer) { + MIB.setInstrAndDebugLoc(MI); + MachineOperand &RHS = MI.getOperand(3); + MachineRegisterInfo &MRI = *MIB.getMRI(); + auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()), + MatchInfo.first); + Observer.changingInstr(MI); + RHS.setReg(Cst->getOperand(0).getReg()); + MI.getOperand(1).setPredicate(MatchInfo.second); + Observer.changedInstr(MI); + return true; +} + +bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, + std::pair<unsigned, int> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + Register Src1Reg = MI.getOperand(1).getReg(); + const LLT SrcTy = MRI.getType(Src1Reg); + const LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + auto LaneIdx = getSplatIndex(MI); + if (!LaneIdx) + return false; + + // The lane idx should be within the first source vector. + if (*LaneIdx >= SrcTy.getNumElements()) + return false; + + if (DstTy != SrcTy) + return false; + + LLT ScalarTy = SrcTy.getElementType(); + unsigned ScalarSize = ScalarTy.getSizeInBits(); + + unsigned Opc = 0; + switch (SrcTy.getNumElements()) { + case 2: + if (ScalarSize == 64) + Opc = AArch64::G_DUPLANE64; + break; + case 4: + if (ScalarSize == 32) + Opc = AArch64::G_DUPLANE32; + break; + case 8: + if (ScalarSize == 16) + Opc = AArch64::G_DUPLANE16; + break; + case 16: + if (ScalarSize == 8) + Opc = AArch64::G_DUPLANE8; + break; + default: + break; + } + if (!Opc) + return false; + + MatchInfo.first = Opc; + MatchInfo.second = *LaneIdx; + return true; +} + +bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + B.setInstrAndDebugLoc(MI); + auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second); + B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, + {MI.getOperand(1).getReg(), Lane}); + MI.eraseFromParent(); + return true; +} + +#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS +#include "AArch64GenPostLegalizeGILowering.inc" +#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS + +namespace { +#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H +#include "AArch64GenPostLegalizeGILowering.inc" +#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H + +class AArch64PostLegalizerLoweringInfo : public CombinerInfo { +public: + AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg; + + AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize) + : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, + /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize, + MinSize) { + if (!GeneratedRuleCfg.parseCommandLineOption()) + report_fatal_error("Invalid rule identifier"); + } + + virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, + MachineIRBuilder &B) const override; +}; + +bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer, + MachineInstr &MI, + MachineIRBuilder &B) const { + CombinerHelper Helper(Observer, B); + AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg); + return Generated.tryCombineAll(Observer, MI, B, Helper); +} + +#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP +#include "AArch64GenPostLegalizeGILowering.inc" +#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP + +class AArch64PostLegalizerLowering : public MachineFunctionPass { +public: + static char ID; + + AArch64PostLegalizerLowering(); + + StringRef getPassName() const override { + return "AArch64PostLegalizerLowering"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; +} // end anonymous namespace + +void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetPassConfig>(); + AU.setPreservesCFG(); + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + +AArch64PostLegalizerLowering::AArch64PostLegalizerLowering() + : MachineFunctionPass(ID) { + initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry()); +} + +bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + assert(MF.getProperties().hasProperty( + MachineFunctionProperties::Property::Legalized) && + "Expected a legalized function?"); + auto *TPC = &getAnalysis<TargetPassConfig>(); + const Function &F = MF.getFunction(); + AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize()); + Combiner C(PCInfo, TPC); + return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); +} + +char AArch64PostLegalizerLowering::ID = 0; +INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE, + "Lower AArch64 MachineInstrs after legalization", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE, + "Lower AArch64 MachineInstrs after legalization", false, + false) + +namespace llvm { +FunctionPass *createAArch64PostLegalizerLowering() { + return new AArch64PostLegalizerLowering(); +} +} // end namespace llvm diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp index 2f882ecb1f..00436b5924 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp +++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp @@ -1,187 +1,187 @@ -//=== AArch64PostSelectOptimize.cpp ---------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass does post-instruction-selection optimizations in the GlobalISel -// pipeline, before the rest of codegen runs. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "aarch64-post-select-optimize" - -using namespace llvm; - -namespace { -class AArch64PostSelectOptimize : public MachineFunctionPass { -public: - static char ID; - - AArch64PostSelectOptimize(); - - StringRef getPassName() const override { - return "AArch64 Post Select Optimizer"; - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override; - -private: - bool optimizeNZCVDefs(MachineBasicBlock &MBB); -}; -} // end anonymous namespace - -void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetPassConfig>(); - AU.setPreservesCFG(); - getSelectionDAGFallbackAnalysisUsage(AU); - MachineFunctionPass::getAnalysisUsage(AU); -} - -AArch64PostSelectOptimize::AArch64PostSelectOptimize() - : MachineFunctionPass(ID) { - initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry()); -} - -unsigned getNonFlagSettingVariant(unsigned Opc) { - switch (Opc) { - default: - return 0; - case AArch64::SUBSXrr: - return AArch64::SUBXrr; - case AArch64::SUBSWrr: - return AArch64::SUBWrr; - case AArch64::SUBSXrs: - return AArch64::SUBXrs; - case AArch64::SUBSXri: - return AArch64::SUBXri; - case AArch64::SUBSWri: - return AArch64::SUBWri; - } -} - -bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { - // Consider the following code: - // FCMPSrr %0, %1, implicit-def $nzcv - // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv - // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv - // FCMPSrr %0, %1, implicit-def $nzcv - // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv - // This kind of code where we have 2 FCMPs each feeding a CSEL can happen - // when we have a single IR fcmp being used by two selects. During selection, - // to ensure that there can be no clobbering of nzcv between the fcmp and the - // csel, we have to generate an fcmp immediately before each csel is - // selected. - // However, often we can essentially CSE these together later in MachineCSE. - // This doesn't work though if there are unrelated flag-setting instructions - // in between the two FCMPs. In this case, the SUBS defines NZCV - // but it doesn't have any users, being overwritten by the second FCMP. - // - // Our solution here is to try to convert flag setting operations between - // a interval of identical FCMPs, so that CSE will be able to eliminate one. - bool Changed = false; - const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo(); - - // The first step is to find the first and last FCMPs. If we have found - // at least two, then set the limit of the bottom-up walk to the first FCMP - // found since we're only interested in dealing with instructions between - // them. - MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr; - for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) { - if (MI.getOpcode() == AArch64::FCMPSrr || - MI.getOpcode() == AArch64::FCMPDrr) { - if (!FirstCmp) - FirstCmp = &MI; - else - LastCmp = &MI; - } - } - - // In addition to converting flag-setting ops in fcmp ranges into non-flag - // setting ops, across the whole basic block we also detect when nzcv - // implicit-defs are dead, and mark them as dead. Peephole optimizations need - // this information later. - - LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo()); - LRU.addLiveOuts(MBB); - bool NZCVDead = LRU.available(AArch64::NZCV); - bool InsideCmpRange = false; - for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) { - LRU.stepBackward(II); - - if (LastCmp) { // There's a range present in this block. - // If we're inside an fcmp range, look for begin instruction. - if (InsideCmpRange && &II == FirstCmp) - InsideCmpRange = false; - else if (&II == LastCmp) - InsideCmpRange = true; - } - - // Did this instruction define NZCV? - bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV); - if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) { - // If we have a def and NZCV is dead, then we may convert this op. - unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode()); - int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV); - if (DeadNZCVIdx != -1) { - // If we're inside an fcmp range, then convert flag setting ops. - if (InsideCmpRange && NewOpc) { - LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting " - "op in fcmp range: " - << II); - II.setDesc(TII->get(NewOpc)); - II.RemoveOperand(DeadNZCVIdx); - Changed |= true; - } else { - // Otherwise, we just set the nzcv imp-def operand to be dead, so the - // peephole optimizations can optimize them further. - II.getOperand(DeadNZCVIdx).setIsDead(); - } - } - } - - NZCVDead = NZCVDeadAtCurrInstr; - } - return Changed; -} - -bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { - if (MF.getProperties().hasProperty( - MachineFunctionProperties::Property::FailedISel)) - return false; - assert(MF.getProperties().hasProperty( - MachineFunctionProperties::Property::Selected) && - "Expected a selected MF"); - - bool Changed = false; - for (auto &BB : MF) - Changed |= optimizeNZCVDefs(BB); - return true; -} - -char AArch64PostSelectOptimize::ID = 0; -INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE, - "Optimize AArch64 selected instructions", - false, false) -INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE, - "Optimize AArch64 selected instructions", false, - false) - -namespace llvm { -FunctionPass *createAArch64PostSelectOptimize() { - return new AArch64PostSelectOptimize(); -} -} // end namespace llvm +//=== AArch64PostSelectOptimize.cpp ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass does post-instruction-selection optimizations in the GlobalISel +// pipeline, before the rest of codegen runs. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "aarch64-post-select-optimize" + +using namespace llvm; + +namespace { +class AArch64PostSelectOptimize : public MachineFunctionPass { +public: + static char ID; + + AArch64PostSelectOptimize(); + + StringRef getPassName() const override { + return "AArch64 Post Select Optimizer"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + bool optimizeNZCVDefs(MachineBasicBlock &MBB); +}; +} // end anonymous namespace + +void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetPassConfig>(); + AU.setPreservesCFG(); + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + +AArch64PostSelectOptimize::AArch64PostSelectOptimize() + : MachineFunctionPass(ID) { + initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry()); +} + +unsigned getNonFlagSettingVariant(unsigned Opc) { + switch (Opc) { + default: + return 0; + case AArch64::SUBSXrr: + return AArch64::SUBXrr; + case AArch64::SUBSWrr: + return AArch64::SUBWrr; + case AArch64::SUBSXrs: + return AArch64::SUBXrs; + case AArch64::SUBSXri: + return AArch64::SUBXri; + case AArch64::SUBSWri: + return AArch64::SUBWri; + } +} + +bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { + // Consider the following code: + // FCMPSrr %0, %1, implicit-def $nzcv + // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv + // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv + // FCMPSrr %0, %1, implicit-def $nzcv + // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv + // This kind of code where we have 2 FCMPs each feeding a CSEL can happen + // when we have a single IR fcmp being used by two selects. During selection, + // to ensure that there can be no clobbering of nzcv between the fcmp and the + // csel, we have to generate an fcmp immediately before each csel is + // selected. + // However, often we can essentially CSE these together later in MachineCSE. + // This doesn't work though if there are unrelated flag-setting instructions + // in between the two FCMPs. In this case, the SUBS defines NZCV + // but it doesn't have any users, being overwritten by the second FCMP. + // + // Our solution here is to try to convert flag setting operations between + // a interval of identical FCMPs, so that CSE will be able to eliminate one. + bool Changed = false; + const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo(); + + // The first step is to find the first and last FCMPs. If we have found + // at least two, then set the limit of the bottom-up walk to the first FCMP + // found since we're only interested in dealing with instructions between + // them. + MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr; + for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) { + if (MI.getOpcode() == AArch64::FCMPSrr || + MI.getOpcode() == AArch64::FCMPDrr) { + if (!FirstCmp) + FirstCmp = &MI; + else + LastCmp = &MI; + } + } + + // In addition to converting flag-setting ops in fcmp ranges into non-flag + // setting ops, across the whole basic block we also detect when nzcv + // implicit-defs are dead, and mark them as dead. Peephole optimizations need + // this information later. + + LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo()); + LRU.addLiveOuts(MBB); + bool NZCVDead = LRU.available(AArch64::NZCV); + bool InsideCmpRange = false; + for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) { + LRU.stepBackward(II); + + if (LastCmp) { // There's a range present in this block. + // If we're inside an fcmp range, look for begin instruction. + if (InsideCmpRange && &II == FirstCmp) + InsideCmpRange = false; + else if (&II == LastCmp) + InsideCmpRange = true; + } + + // Did this instruction define NZCV? + bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV); + if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) { + // If we have a def and NZCV is dead, then we may convert this op. + unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode()); + int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV); + if (DeadNZCVIdx != -1) { + // If we're inside an fcmp range, then convert flag setting ops. + if (InsideCmpRange && NewOpc) { + LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting " + "op in fcmp range: " + << II); + II.setDesc(TII->get(NewOpc)); + II.RemoveOperand(DeadNZCVIdx); + Changed |= true; + } else { + // Otherwise, we just set the nzcv imp-def operand to be dead, so the + // peephole optimizations can optimize them further. + II.getOperand(DeadNZCVIdx).setIsDead(); + } + } + } + + NZCVDead = NZCVDeadAtCurrInstr; + } + return Changed; +} + +bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + assert(MF.getProperties().hasProperty( + MachineFunctionProperties::Property::Selected) && + "Expected a selected MF"); + + bool Changed = false; + for (auto &BB : MF) + Changed |= optimizeNZCVDefs(BB); + return true; +} + +char AArch64PostSelectOptimize::ID = 0; +INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE, + "Optimize AArch64 selected instructions", + false, false) +INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE, + "Optimize AArch64 selected instructions", false, + false) + +namespace llvm { +FunctionPass *createAArch64PostSelectOptimize() { + return new AArch64PostSelectOptimize(); +} +} // end namespace llvm diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp index 5f9b64e274..2686f6dc46 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -104,16 +104,16 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_SHUFFLE_VECTOR: return Helper.tryCombineShuffleVector(MI); - case TargetOpcode::G_MEMCPY: - case TargetOpcode::G_MEMMOVE: - case TargetOpcode::G_MEMSET: { - // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other - // heuristics decide. - unsigned MaxLen = EnableOpt ? 0 : 32; - // Try to inline memcpy type calls if optimizations are enabled. - return !EnableMinSize ? Helper.tryCombineMemCpyFamily(MI, MaxLen) : false; - } + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMSET: { + // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other + // heuristics decide. + unsigned MaxLen = EnableOpt ? 0 : 32; + // Try to inline memcpy type calls if optimizations are enabled. + return !EnableMinSize ? Helper.tryCombineMemCpyFamily(MI, MaxLen) : false; } + } return false; } @@ -188,7 +188,7 @@ INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE, namespace llvm { -FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone) { +FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone) { return new AArch64PreLegalizerCombiner(IsOptNone); } } // end namespace llvm diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index c76c43389b..e26fe60d93 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -13,7 +13,7 @@ #include "AArch64RegisterBankInfo.h" #include "AArch64InstrInfo.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" @@ -466,10 +466,10 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping( getValueMapping(RBIdx, Size), NumOperands); } -bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, - const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - unsigned Depth) const { +bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth) const { unsigned Op = MI.getOpcode(); // Do we have an explicit floating point instruction? @@ -481,30 +481,30 @@ bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, if (Op != TargetOpcode::COPY && !MI.isPHI()) return false; - // Check if we already know the register bank. - auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); - if (RB == &AArch64::FPRRegBank) - return true; - if (RB == &AArch64::GPRRegBank) - return false; - - // We don't know anything. - // - // If we have a phi, we may be able to infer that it will be assigned a FPR - // based off of its inputs. - if (!MI.isPHI() || Depth > MaxFPRSearchDepth) - return false; - - return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { - return Op.isReg() && - onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); - }); + // Check if we already know the register bank. + auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); + if (RB == &AArch64::FPRRegBank) + return true; + if (RB == &AArch64::GPRRegBank) + return false; + + // We don't know anything. + // + // If we have a phi, we may be able to infer that it will be assigned a FPR + // based off of its inputs. + if (!MI.isPHI() || Depth > MaxFPRSearchDepth) + return false; + + return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { + return Op.isReg() && + onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); + }); } bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - unsigned Depth) const { + const TargetRegisterInfo &TRI, + unsigned Depth) const { switch (MI.getOpcode()) { case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: @@ -513,13 +513,13 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, default: break; } - return hasFPConstraints(MI, MRI, TRI, Depth); + return hasFPConstraints(MI, MRI, TRI, Depth); } -bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, - const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - unsigned Depth) const { +bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth) const { switch (MI.getOpcode()) { case AArch64::G_DUP: case TargetOpcode::G_SITOFP: @@ -530,7 +530,7 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, default: break; } - return hasFPConstraints(MI, MRI, TRI, Depth); + return hasFPConstraints(MI, MRI, TRI, Depth); } const RegisterBankInfo::InstructionMapping & @@ -680,18 +680,18 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case TargetOpcode::G_SITOFP: - case TargetOpcode::G_UITOFP: { + case TargetOpcode::G_UITOFP: { if (MRI.getType(MI.getOperand(0).getReg()).isVector()) break; - // Integer to FP conversions don't necessarily happen between GPR -> FPR - // regbanks. They can also be done within an FPR register. - Register SrcReg = MI.getOperand(1).getReg(); - if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank) - OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; - else - OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; + // Integer to FP conversions don't necessarily happen between GPR -> FPR + // regbanks. They can also be done within an FPR register. + Register SrcReg = MI.getOperand(1).getReg(); + if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank) + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; + else + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; break; - } + } case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: if (MRI.getType(MI.getOperand(0).getReg()).isVector()) @@ -729,8 +729,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // assume this was a floating point load in the IR. // If it was not, we would have had a bitcast before // reaching that instruction. - // Int->FP conversion operations are also captured in onlyDefinesFP(). - if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) { + // Int->FP conversion operations are also captured in onlyDefinesFP(). + if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) { OpRegBankIdx[0] = PMI_FirstFPR; break; } @@ -853,7 +853,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } break; } - case TargetOpcode::G_BUILD_VECTOR: { + case TargetOpcode::G_BUILD_VECTOR: { // If the first source operand belongs to a FPR register bank, then make // sure that we preserve that. if (OpRegBankIdx[1] != PMI_FirstGPR) @@ -864,17 +864,17 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // Get the instruction that defined the source operand reg, and check if // it's a floating point operation. Or, if it's a type like s16 which - // doesn't have a exact size gpr register class. The exception is if the - // build_vector has all constant operands, which may be better to leave as - // gpr without copies, so it can be matched in imported patterns. + // doesn't have a exact size gpr register class. The exception is if the + // build_vector has all constant operands, which may be better to leave as + // gpr without copies, so it can be matched in imported patterns. MachineInstr *DefMI = MRI.getVRegDef(VReg); unsigned DefOpc = DefMI->getOpcode(); const LLT SrcTy = MRI.getType(VReg); - if (all_of(MI.operands(), [&](const MachineOperand &Op) { - return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() == - TargetOpcode::G_CONSTANT; - })) - break; + if (all_of(MI.operands(), [&](const MachineOperand &Op) { + return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() == + TargetOpcode::G_CONSTANT; + })) + break; if (isPreISelGenericFloatingPointOpcode(DefOpc) || SrcTy.getSizeInBits() < 32) { // Have a floating point op. @@ -885,30 +885,30 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } break; } - case TargetOpcode::G_VECREDUCE_FADD: - case TargetOpcode::G_VECREDUCE_FMUL: - case TargetOpcode::G_VECREDUCE_FMAX: - case TargetOpcode::G_VECREDUCE_FMIN: - case TargetOpcode::G_VECREDUCE_ADD: - case TargetOpcode::G_VECREDUCE_MUL: - case TargetOpcode::G_VECREDUCE_AND: - case TargetOpcode::G_VECREDUCE_OR: - case TargetOpcode::G_VECREDUCE_XOR: - case TargetOpcode::G_VECREDUCE_SMAX: - case TargetOpcode::G_VECREDUCE_SMIN: - case TargetOpcode::G_VECREDUCE_UMAX: - case TargetOpcode::G_VECREDUCE_UMIN: - // Reductions produce a scalar value from a vector, the scalar should be on - // FPR bank. - OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; - break; - case TargetOpcode::G_VECREDUCE_SEQ_FADD: - case TargetOpcode::G_VECREDUCE_SEQ_FMUL: - // These reductions also take a scalar accumulator input. - // Assign them FPR for now. - OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR}; - break; - } + case TargetOpcode::G_VECREDUCE_FADD: + case TargetOpcode::G_VECREDUCE_FMUL: + case TargetOpcode::G_VECREDUCE_FMAX: + case TargetOpcode::G_VECREDUCE_FMIN: + case TargetOpcode::G_VECREDUCE_ADD: + case TargetOpcode::G_VECREDUCE_MUL: + case TargetOpcode::G_VECREDUCE_AND: + case TargetOpcode::G_VECREDUCE_OR: + case TargetOpcode::G_VECREDUCE_XOR: + case TargetOpcode::G_VECREDUCE_SMAX: + case TargetOpcode::G_VECREDUCE_SMIN: + case TargetOpcode::G_VECREDUCE_UMAX: + case TargetOpcode::G_VECREDUCE_UMIN: + // Reductions produce a scalar value from a vector, the scalar should be on + // FPR bank. + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; + break; + case TargetOpcode::G_VECREDUCE_SEQ_FADD: + case TargetOpcode::G_VECREDUCE_SEQ_FMUL: + // These reductions also take a scalar accumulator input. + // Assign them FPR for now. + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR}; + break; + } // Finally construct the computed mapping. SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands); diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h index 019017bc3e..c8cfe53299 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h +++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h @@ -114,20 +114,20 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo { const InstructionMapping & getSameKindOfOperandsMapping(const MachineInstr &MI) const; - /// Maximum recursion depth for hasFPConstraints. - const unsigned MaxFPRSearchDepth = 2; - - /// \returns true if \p MI only uses and defines FPRs. + /// Maximum recursion depth for hasFPConstraints. + const unsigned MaxFPRSearchDepth = 2; + + /// \returns true if \p MI only uses and defines FPRs. bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, unsigned Depth = 0) const; + const TargetRegisterInfo &TRI, unsigned Depth = 0) const; - /// \returns true if \p MI only uses FPRs. + /// \returns true if \p MI only uses FPRs. bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, unsigned Depth = 0) const; + const TargetRegisterInfo &TRI, unsigned Depth = 0) const; - /// \returns true if \p MI only defines FPRs. + /// \returns true if \p MI only defines FPRs. bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, unsigned Depth = 0) const; + const TargetRegisterInfo &TRI, unsigned Depth = 0) const; public: AArch64RegisterBankInfo(const TargetRegisterInfo &TRI); |