diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp')
-rw-r--r-- | contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 2534 |
1 files changed, 1267 insertions, 1267 deletions
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 66871ca3b9..995abb85d0 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -16,7 +16,7 @@ #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -30,7 +30,7 @@ using namespace llvm; using namespace LegalizeActions; -using namespace MIPatternMatch; +using namespace MIPatternMatch; /// Try to break down \p OrigTy into \p NarrowTy sized pieces. /// @@ -77,8 +77,8 @@ static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) { return Type::getFloatTy(Ctx); case 64: return Type::getDoubleTy(Ctx); - case 80: - return Type::getX86_FP80Ty(Ctx); + case 80: + return Type::getX86_FP80Ty(Ctx); case 128: return Type::getFP128Ty(Ctx); default: @@ -90,15 +90,15 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &Builder) : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()), - LI(*MF.getSubtarget().getLegalizerInfo()), - TLI(*MF.getSubtarget().getTargetLowering()) { } + LI(*MF.getSubtarget().getLegalizerInfo()), + TLI(*MF.getSubtarget().getTargetLowering()) { } LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, GISelChangeObserver &Observer, MachineIRBuilder &B) - : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI), - TLI(*MF.getSubtarget().getTargetLowering()) { } - + : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI), + TLI(*MF.getSubtarget().getTargetLowering()) { } + LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { LLVM_DEBUG(dbgs() << "Legalizing: " << MI); @@ -240,20 +240,20 @@ void LegalizerHelper::insertParts(Register DstReg, } } -/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. +/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. static void getUnmergeResults(SmallVectorImpl<Register> &Regs, const MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); - const int StartIdx = Regs.size(); + const int StartIdx = Regs.size(); const int NumResults = MI.getNumOperands() - 1; - Regs.resize(Regs.size() + NumResults); + Regs.resize(Regs.size() + NumResults); for (int I = 0; I != NumResults; ++I) - Regs[StartIdx + I] = MI.getOperand(I).getReg(); + Regs[StartIdx + I] = MI.getOperand(I).getReg(); } -void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, - LLT GCDTy, Register SrcReg) { +void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, + LLT GCDTy, Register SrcReg) { LLT SrcTy = MRI.getType(SrcReg); if (SrcTy == GCDTy) { // If the source already evenly divides the result type, we don't need to do @@ -264,13 +264,13 @@ void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); getUnmergeResults(Parts, *Unmerge); } -} +} -LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, - LLT NarrowTy, Register SrcReg) { - LLT SrcTy = MRI.getType(SrcReg); - LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); - extractGCDType(Parts, GCDTy, SrcReg); +LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, + LLT NarrowTy, Register SrcReg) { + LLT SrcTy = MRI.getType(SrcReg); + LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); + extractGCDType(Parts, GCDTy, SrcReg); return GCDTy; } @@ -384,14 +384,14 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy, } if (LCMTy.isVector()) { - unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits(); - SmallVector<Register, 8> UnmergeDefs(NumDefs); - UnmergeDefs[0] = DstReg; - for (unsigned I = 1; I != NumDefs; ++I) - UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy); - - MIRBuilder.buildUnmerge(UnmergeDefs, - MIRBuilder.buildMerge(LCMTy, RemergeRegs)); + unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits(); + SmallVector<Register, 8> UnmergeDefs(NumDefs); + UnmergeDefs[0] = DstReg; + for (unsigned I = 1; I != NumDefs; ++I) + UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy); + + MIRBuilder.buildUnmerge(UnmergeDefs, + MIRBuilder.buildMerge(LCMTy, RemergeRegs)); return; } @@ -399,20 +399,20 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy, } static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { -#define RTLIBCASE_INT(LibcallPrefix) \ - do { \ - switch (Size) { \ - case 32: \ - return RTLIB::LibcallPrefix##32; \ - case 64: \ - return RTLIB::LibcallPrefix##64; \ - case 128: \ - return RTLIB::LibcallPrefix##128; \ - default: \ - llvm_unreachable("unexpected size"); \ - } \ - } while (0) - +#define RTLIBCASE_INT(LibcallPrefix) \ + do { \ + switch (Size) { \ + case 32: \ + return RTLIB::LibcallPrefix##32; \ + case 64: \ + return RTLIB::LibcallPrefix##64; \ + case 128: \ + return RTLIB::LibcallPrefix##128; \ + default: \ + llvm_unreachable("unexpected size"); \ + } \ + } while (0) + #define RTLIBCASE(LibcallPrefix) \ do { \ switch (Size) { \ @@ -420,8 +420,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { return RTLIB::LibcallPrefix##32; \ case 64: \ return RTLIB::LibcallPrefix##64; \ - case 80: \ - return RTLIB::LibcallPrefix##80; \ + case 80: \ + return RTLIB::LibcallPrefix##80; \ case 128: \ return RTLIB::LibcallPrefix##128; \ default: \ @@ -431,15 +431,15 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { case TargetOpcode::G_SDIV: - RTLIBCASE_INT(SDIV_I); + RTLIBCASE_INT(SDIV_I); case TargetOpcode::G_UDIV: - RTLIBCASE_INT(UDIV_I); + RTLIBCASE_INT(UDIV_I); case TargetOpcode::G_SREM: - RTLIBCASE_INT(SREM_I); + RTLIBCASE_INT(SREM_I); case TargetOpcode::G_UREM: - RTLIBCASE_INT(UREM_I); + RTLIBCASE_INT(UREM_I); case TargetOpcode::G_CTLZ_ZERO_UNDEF: - RTLIBCASE_INT(CTLZ_I); + RTLIBCASE_INT(CTLZ_I); case TargetOpcode::G_FADD: RTLIBCASE(ADD_F); case TargetOpcode::G_FSUB: @@ -482,16 +482,16 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(RINT_F); case TargetOpcode::G_FNEARBYINT: RTLIBCASE(NEARBYINT_F); - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: - RTLIBCASE(ROUNDEVEN_F); + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + RTLIBCASE(ROUNDEVEN_F); } llvm_unreachable("Unknown libcall function"); } /// True if an instruction is in tail position in its caller. Intended for /// legalizing libcalls as tail calls when possible. -static bool isLibCallInTailPosition(const TargetInstrInfo &TII, - MachineInstr &MI) { +static bool isLibCallInTailPosition(const TargetInstrInfo &TII, + MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const Function &F = MBB.getParent()->getFunction(); @@ -566,7 +566,7 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, SmallVector<CallLowering::ArgInfo, 3> Args; // Add all the args, except for the last which is an imm denoting 'tail'. - for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) { + for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) { Register Reg = MI.getOperand(i).getReg(); // Need derive an IR type for call lowering. @@ -582,14 +582,14 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); RTLIB::Libcall RTLibcall; - switch (MI.getOpcode()) { - case TargetOpcode::G_MEMCPY: + switch (MI.getOpcode()) { + case TargetOpcode::G_MEMCPY: RTLibcall = RTLIB::MEMCPY; break; - case TargetOpcode::G_MEMMOVE: - RTLibcall = RTLIB::MEMMOVE; - break; - case TargetOpcode::G_MEMSET: + case TargetOpcode::G_MEMMOVE: + RTLibcall = RTLIB::MEMMOVE; + break; + case TargetOpcode::G_MEMSET: RTLibcall = RTLIB::MEMSET; break; default: @@ -601,8 +601,8 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); Info.Callee = MachineOperand::CreateES(Name); Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)); - Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() && - isLibCallInTailPosition(MIRBuilder.getTII(), MI); + Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() && + isLibCallInTailPosition(MIRBuilder.getTII(), MI); std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); if (!CLI.lowerCall(MIRBuilder, Info)) @@ -695,11 +695,11 @@ LegalizerHelper::libcall(MachineInstr &MI) { case TargetOpcode::G_FMAXNUM: case TargetOpcode::G_FSQRT: case TargetOpcode::G_FRINT: - case TargetOpcode::G_FNEARBYINT: - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); - if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { - LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); + if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { + LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); return UnableToLegalize; } auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); @@ -748,14 +748,14 @@ LegalizerHelper::libcall(MachineInstr &MI) { return Status; break; } - case TargetOpcode::G_MEMCPY: - case TargetOpcode::G_MEMMOVE: - case TargetOpcode::G_MEMSET: { - LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI); - MI.eraseFromParent(); - return Result; - } + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMSET: { + LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI); + MI.eraseFromParent(); + return Result; } + } MI.eraseFromParent(); return Legalized; @@ -935,7 +935,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, case TargetOpcode::G_INSERT: return narrowScalarInsert(MI, TypeIdx, NarrowTy); case TargetOpcode::G_LOAD: { - auto &MMO = **MI.memoperands_begin(); + auto &MMO = **MI.memoperands_begin(); Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); if (DstTy.isVector()) @@ -959,15 +959,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); auto &MMO = **MI.memoperands_begin(); - unsigned MemSize = MMO.getSizeInBits(); - - if (MemSize == NarrowSize) { + unsigned MemSize = MMO.getSizeInBits(); + + if (MemSize == NarrowSize) { MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); - } else if (MemSize < NarrowSize) { + } else if (MemSize < NarrowSize) { MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO); - } else if (MemSize > NarrowSize) { - // FIXME: Need to split the load. - return UnableToLegalize; + } else if (MemSize > NarrowSize) { + // FIXME: Need to split the load. + return UnableToLegalize; } if (ZExt) @@ -1063,11 +1063,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_PHI: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + unsigned NumParts = SizeOp0 / NarrowSize; SmallVector<Register, 2> DstRegs(NumParts); SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2); @@ -1248,7 +1248,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_PTR_ADD: + case TargetOpcode::G_PTR_ADD: case TargetOpcode::G_PTRMASK: { if (TypeIdx != 1) return UnableToLegalize; @@ -1257,17 +1257,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_FPTOUI: - case TargetOpcode::G_FPTOSI: - return narrowScalarFPTOI(MI, TypeIdx, NarrowTy); - case TargetOpcode::G_FPEXT: - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT); - Observer.changedInstr(MI); - return Legalized; - } + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_FPTOSI: + return narrowScalarFPTOI(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_FPEXT: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT); + Observer.changedInstr(MI); + return Legalized; + } } Register LegalizerHelper::coerceToScalar(Register Val) { @@ -1328,7 +1328,7 @@ void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); + MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); } void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, @@ -1496,40 +1496,40 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, return Legalized; } -Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) { - Register WideReg = MRI.createGenericVirtualRegister(WideTy); - LLT OrigTy = MRI.getType(OrigReg); - LLT LCMTy = getLCMType(WideTy, OrigTy); - - const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits(); - const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits(); - - Register UnmergeSrc = WideReg; - - // Create a merge to the LCM type, padding with undef - // %0:_(<3 x s32>) = G_FOO => <4 x s32> - // => - // %1:_(<4 x s32>) = G_FOO - // %2:_(<4 x s32>) = G_IMPLICIT_DEF - // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2 - // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3 - if (NumMergeParts > 1) { - Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0); - SmallVector<Register, 8> MergeParts(NumMergeParts, Undef); - MergeParts[0] = WideReg; - UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0); - } - - // Unmerge to the original register and pad with dead defs. - SmallVector<Register, 8> UnmergeResults(NumUnmergeParts); - UnmergeResults[0] = OrigReg; - for (int I = 1; I != NumUnmergeParts; ++I) - UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy); - - MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc); - return WideReg; -} - +Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) { + Register WideReg = MRI.createGenericVirtualRegister(WideTy); + LLT OrigTy = MRI.getType(OrigReg); + LLT LCMTy = getLCMType(WideTy, OrigTy); + + const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits(); + const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits(); + + Register UnmergeSrc = WideReg; + + // Create a merge to the LCM type, padding with undef + // %0:_(<3 x s32>) = G_FOO => <4 x s32> + // => + // %1:_(<4 x s32>) = G_FOO + // %2:_(<4 x s32>) = G_IMPLICIT_DEF + // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2 + // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3 + if (NumMergeParts > 1) { + Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0); + SmallVector<Register, 8> MergeParts(NumMergeParts, Undef); + MergeParts[0] = WideReg; + UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0); + } + + // Unmerge to the original register and pad with dead defs. + SmallVector<Register, 8> UnmergeResults(NumUnmergeParts); + UnmergeResults[0] = OrigReg; + for (int I = 1; I != NumUnmergeParts; ++I) + UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy); + + MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc); + return WideReg; +} + LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { @@ -1599,60 +1599,60 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc); - // Create a sequence of unmerges and merges to the original results. Since we - // may have widened the source, we will need to pad the results with dead defs - // to cover the source register. - // e.g. widen s48 to s64: - // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96) + // Create a sequence of unmerges and merges to the original results. Since we + // may have widened the source, we will need to pad the results with dead defs + // to cover the source register. + // e.g. widen s48 to s64: + // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96) // // => - // %4:_(s192) = G_ANYEXT %0:_(s96) - // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge - // ; unpack to GCD type, with extra dead defs - // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64) - // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64) - // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64) - // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination - // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination - const LLT GCDTy = getGCDType(WideTy, DstTy); + // %4:_(s192) = G_ANYEXT %0:_(s96) + // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge + // ; unpack to GCD type, with extra dead defs + // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64) + // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64) + // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64) + // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination + // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination + const LLT GCDTy = getGCDType(WideTy, DstTy); const int NumUnmerge = Unmerge->getNumOperands() - 1; - const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits(); - - // Directly unmerge to the destination without going through a GCD type - // if possible - if (PartsPerRemerge == 1) { - const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); - - for (int I = 0; I != NumUnmerge; ++I) { - auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); - - for (int J = 0; J != PartsPerUnmerge; ++J) { - int Idx = I * PartsPerUnmerge + J; - if (Idx < NumDst) - MIB.addDef(MI.getOperand(Idx).getReg()); - else { - // Create dead def for excess components. - MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); - } + const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits(); + + // Directly unmerge to the destination without going through a GCD type + // if possible + if (PartsPerRemerge == 1) { + const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); + + for (int I = 0; I != NumUnmerge; ++I) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + + for (int J = 0; J != PartsPerUnmerge; ++J) { + int Idx = I * PartsPerUnmerge + J; + if (Idx < NumDst) + MIB.addDef(MI.getOperand(Idx).getReg()); + else { + // Create dead def for excess components. + MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + } } - - MIB.addUse(Unmerge.getReg(I)); - } - } else { - SmallVector<Register, 16> Parts; - for (int J = 0; J != NumUnmerge; ++J) - extractGCDType(Parts, GCDTy, Unmerge.getReg(J)); - - SmallVector<Register, 8> RemergeParts; - for (int I = 0; I != NumDst; ++I) { - for (int J = 0; J < PartsPerRemerge; ++J) { - const int Idx = I * PartsPerRemerge + J; - RemergeParts.emplace_back(Parts[Idx]); - } - - MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts); - RemergeParts.clear(); + + MIB.addUse(Unmerge.getReg(I)); } + } else { + SmallVector<Register, 16> Parts; + for (int J = 0; J != NumUnmerge; ++J) + extractGCDType(Parts, GCDTy, Unmerge.getReg(J)); + + SmallVector<Register, 8> RemergeParts; + for (int I = 0; I != NumDst; ++I) { + for (int J = 0; J < PartsPerRemerge; ++J) { + const int Idx = I * PartsPerRemerge + J; + RemergeParts.emplace_back(Parts[Idx]); + } + + MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts); + RemergeParts.clear(); + } } MI.eraseFromParent(); @@ -1702,7 +1702,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { Src = MIRBuilder.buildAnyExt(WideTy, Src); ShiftTy = WideTy; - } + } auto LShr = MIRBuilder.buildLShr( ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); @@ -1740,7 +1740,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { - if (TypeIdx != 0 || WideTy.isVector()) + if (TypeIdx != 0 || WideTy.isVector()) return UnableToLegalize; Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); @@ -1750,45 +1750,45 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult -LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx, - LLT WideTy) { - if (TypeIdx == 1) - return UnableToLegalize; // TODO - unsigned Op = MI.getOpcode(); - unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO - ? TargetOpcode::G_ADD - : TargetOpcode::G_SUB; - unsigned ExtOpcode = - Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO - ? TargetOpcode::G_ZEXT - : TargetOpcode::G_SEXT; - auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)}); - auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)}); - // Do the arithmetic in the larger type. - auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}); - LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); - auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp); - auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp}); - // There is no overflow if the ExtOp is the same as NewOp. - MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp); - // Now trunc the NewOp to the original result. - MIRBuilder.buildTrunc(MI.getOperand(0), NewOp); - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, - LLT WideTy) { +LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + if (TypeIdx == 1) + return UnableToLegalize; // TODO + unsigned Op = MI.getOpcode(); + unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO + ? TargetOpcode::G_ADD + : TargetOpcode::G_SUB; + unsigned ExtOpcode = + Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO + ? TargetOpcode::G_ZEXT + : TargetOpcode::G_SEXT; + auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)}); + auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)}); + // Do the arithmetic in the larger type. + auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}); + LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); + auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp); + auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp}); + // There is no overflow if the ExtOp is the same as NewOp. + MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp); + // Now trunc the NewOp to the original result. + MIRBuilder.buildTrunc(MI.getOperand(0), NewOp); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT || - MI.getOpcode() == TargetOpcode::G_SSUBSAT || - MI.getOpcode() == TargetOpcode::G_SSHLSAT; - bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT || - MI.getOpcode() == TargetOpcode::G_USHLSAT; + MI.getOpcode() == TargetOpcode::G_SSUBSAT || + MI.getOpcode() == TargetOpcode::G_SSHLSAT; + bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT || + MI.getOpcode() == TargetOpcode::G_USHLSAT; // We can convert this to: // 1. Any extend iN to iM // 2. SHL by M-N - // 3. [US][ADD|SUB|SHL]SAT + // 3. [US][ADD|SUB|SHL]SAT // 4. L/ASHR by M-N // // It may be more efficient to lower this to a min and a max operation in @@ -1799,14 +1799,14 @@ LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, unsigned NewBits = WideTy.getScalarSizeInBits(); unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits(); - // Shifts must zero-extend the RHS to preserve the unsigned quantity, and - // must not left shift the RHS to preserve the shift amount. + // Shifts must zero-extend the RHS to preserve the unsigned quantity, and + // must not left shift the RHS to preserve the shift amount. auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1)); - auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2)) - : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); + auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2)) + : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount); auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK); - auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK); + auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK); auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {ShiftL, ShiftR}, MI.getFlags()); @@ -1834,18 +1834,18 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return widenScalarMergeValues(MI, TypeIdx, WideTy); case TargetOpcode::G_UNMERGE_VALUES: return widenScalarUnmergeValues(MI, TypeIdx, WideTy); - case TargetOpcode::G_SADDO: - case TargetOpcode::G_SSUBO: + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SSUBO: case TargetOpcode::G_UADDO: - case TargetOpcode::G_USUBO: - return widenScalarAddoSubo(MI, TypeIdx, WideTy); + case TargetOpcode::G_USUBO: + return widenScalarAddoSubo(MI, TypeIdx, WideTy); case TargetOpcode::G_SADDSAT: case TargetOpcode::G_SSUBSAT: - case TargetOpcode::G_SSHLSAT: + case TargetOpcode::G_SSHLSAT: case TargetOpcode::G_UADDSAT: case TargetOpcode::G_USUBSAT: - case TargetOpcode::G_USHLSAT: - return widenScalarAddSubShlSat(MI, TypeIdx, WideTy); + case TargetOpcode::G_USHLSAT: + return widenScalarAddSubShlSat(MI, TypeIdx, WideTy); case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: @@ -2038,22 +2038,22 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; case TargetOpcode::G_SITOFP: Observer.changingInstr(MI); - - if (TypeIdx == 0) - widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); - else - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); - + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_UITOFP: Observer.changingInstr(MI); - - if (TypeIdx == 0) - widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); - else - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); - + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_LOAD: @@ -2069,7 +2069,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return UnableToLegalize; LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (!Ty.isScalar()) + if (!Ty.isScalar()) return UnableToLegalize; Observer.changingInstr(MI); @@ -2267,7 +2267,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FPOW: case TargetOpcode::G_INTRINSIC_TRUNC: case TargetOpcode::G_INTRINSIC_ROUND: - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: assert(TypeIdx == 0); Observer.changingInstr(MI); @@ -2277,15 +2277,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_FPOWI: { - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); - widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); - Observer.changedInstr(MI); - return Legalized; - } + case TargetOpcode::G_FPOWI: { + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_INTTOPTR: if (TypeIdx != 1) return UnableToLegalize; @@ -2312,7 +2312,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { // Avoid changing the result vector type if the source element type was // requested. if (TypeIdx == 1) { - MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); + MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); } else { widenScalarDst(MI, WideTy, 0); } @@ -2415,377 +2415,377 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) { return UnableToLegalize; } -/// Figure out the bit offset into a register when coercing a vector index for -/// the wide element type. This is only for the case when promoting vector to -/// one with larger elements. -// -/// -/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) -/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) -static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, - Register Idx, - unsigned NewEltSize, - unsigned OldEltSize) { - const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); - LLT IdxTy = B.getMRI()->getType(Idx); - - // Now figure out the amount we need to shift to get the target bits. - auto OffsetMask = B.buildConstant( - IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio)); - auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask); - return B.buildShl(IdxTy, OffsetIdx, - B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0); -} - -/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this -/// is casting to a vector with a smaller element size, perform multiple element -/// extracts and merge the results. If this is coercing to a vector with larger -/// elements, index the bitcasted vector and extract the target element with bit -/// operations. This is intended to force the indexing in the native register -/// size for architectures that can dynamically index the register file. -LegalizerHelper::LegalizeResult -LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, - LLT CastTy) { - if (TypeIdx != 1) - return UnableToLegalize; - - Register Dst = MI.getOperand(0).getReg(); - Register SrcVec = MI.getOperand(1).getReg(); - Register Idx = MI.getOperand(2).getReg(); - LLT SrcVecTy = MRI.getType(SrcVec); - LLT IdxTy = MRI.getType(Idx); - - LLT SrcEltTy = SrcVecTy.getElementType(); - unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; - unsigned OldNumElts = SrcVecTy.getNumElements(); - - LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; - Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); - - const unsigned NewEltSize = NewEltTy.getSizeInBits(); - const unsigned OldEltSize = SrcEltTy.getSizeInBits(); - if (NewNumElts > OldNumElts) { - // Decreasing the vector element size - // - // e.g. i64 = extract_vector_elt x:v2i64, y:i32 - // => - // v4i32:castx = bitcast x:v2i64 - // - // i64 = bitcast - // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), - // (i32 (extract_vector_elt castx, (2 * y + 1))) - // - if (NewNumElts % OldNumElts != 0) - return UnableToLegalize; - - // Type of the intermediate result vector. - const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts; - LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy); - - auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt); - - SmallVector<Register, 8> NewOps(NewEltsPerOldElt); - auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK); - - for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { - auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I); - auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset); - auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx); - NewOps[I] = Elt.getReg(0); - } - - auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps); - MIRBuilder.buildBitcast(Dst, NewVec); - MI.eraseFromParent(); - return Legalized; - } - - if (NewNumElts < OldNumElts) { - if (NewEltSize % OldEltSize != 0) - return UnableToLegalize; - - // This only depends on powers of 2 because we use bit tricks to figure out - // the bit offset we need to shift to get the target element. A general - // expansion could emit division/multiply. - if (!isPowerOf2_32(NewEltSize / OldEltSize)) - return UnableToLegalize; - - // Increasing the vector element size. - // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx - // - // => - // - // %cast = G_BITCAST %vec - // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize) - // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx - // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) - // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) - // %elt_bits = G_LSHR %wide_elt, %offset_bits - // %elt = G_TRUNC %elt_bits - - const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); - auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); - - // Divide to get the index in the wider element type. - auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); - - Register WideElt = CastVec; - if (CastTy.isVector()) { - WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, - ScaledIdx).getReg(0); - } - - // Compute the bit offset into the register of the target element. - Register OffsetBits = getBitcastWiderVectorElementOffset( - MIRBuilder, Idx, NewEltSize, OldEltSize); - - // Shift the wide element to get the target element. - auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits); - MIRBuilder.buildTrunc(Dst, ExtractedBits); - MI.eraseFromParent(); - return Legalized; - } - - return UnableToLegalize; -} - -/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p -/// TargetReg, while preserving other bits in \p TargetReg. -/// -/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset) -static Register buildBitFieldInsert(MachineIRBuilder &B, - Register TargetReg, Register InsertReg, - Register OffsetBits) { - LLT TargetTy = B.getMRI()->getType(TargetReg); - LLT InsertTy = B.getMRI()->getType(InsertReg); - auto ZextVal = B.buildZExt(TargetTy, InsertReg); - auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits); - - // Produce a bitmask of the value to insert - auto EltMask = B.buildConstant( - TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(), - InsertTy.getSizeInBits())); - // Shift it into position - auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits); - auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask); - - // Clear out the bits in the wide element - auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask); - - // The value to insert has all zeros already, so stick it into the masked - // wide element. - return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0); -} - -/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this -/// is increasing the element size, perform the indexing in the target element -/// type, and use bit operations to insert at the element position. This is -/// intended for architectures that can dynamically index the register file and -/// want to force indexing in the native register size. -LegalizerHelper::LegalizeResult -LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, - LLT CastTy) { - if (TypeIdx != 0) - return UnableToLegalize; - - Register Dst = MI.getOperand(0).getReg(); - Register SrcVec = MI.getOperand(1).getReg(); - Register Val = MI.getOperand(2).getReg(); - Register Idx = MI.getOperand(3).getReg(); - - LLT VecTy = MRI.getType(Dst); - LLT IdxTy = MRI.getType(Idx); - - LLT VecEltTy = VecTy.getElementType(); - LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; - const unsigned NewEltSize = NewEltTy.getSizeInBits(); - const unsigned OldEltSize = VecEltTy.getSizeInBits(); - - unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; - unsigned OldNumElts = VecTy.getNumElements(); - - Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); - if (NewNumElts < OldNumElts) { - if (NewEltSize % OldEltSize != 0) - return UnableToLegalize; - - // This only depends on powers of 2 because we use bit tricks to figure out - // the bit offset we need to shift to get the target element. A general - // expansion could emit division/multiply. - if (!isPowerOf2_32(NewEltSize / OldEltSize)) - return UnableToLegalize; - - const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); - auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); - - // Divide to get the index in the wider element type. - auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); - - Register ExtractedElt = CastVec; - if (CastTy.isVector()) { - ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, - ScaledIdx).getReg(0); - } - - // Compute the bit offset into the register of the target element. - Register OffsetBits = getBitcastWiderVectorElementOffset( - MIRBuilder, Idx, NewEltSize, OldEltSize); - - Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt, - Val, OffsetBits); - if (CastTy.isVector()) { - InsertedElt = MIRBuilder.buildInsertVectorElement( - CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0); - } - - MIRBuilder.buildBitcast(Dst, InsertedElt); - MI.eraseFromParent(); - return Legalized; - } - - return UnableToLegalize; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerLoad(MachineInstr &MI) { - // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT - Register DstReg = MI.getOperand(0).getReg(); - Register PtrReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - auto &MMO = **MI.memoperands_begin(); - - if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { - if (MI.getOpcode() == TargetOpcode::G_LOAD) { - // This load needs splitting into power of 2 sized loads. - if (DstTy.isVector()) - return UnableToLegalize; - if (isPowerOf2_32(DstTy.getSizeInBits())) - return UnableToLegalize; // Don't know what we're being asked to do. - - // Our strategy here is to generate anyextending loads for the smaller - // types up to next power-2 result type, and then combine the two larger - // result values together, before truncating back down to the non-pow-2 - // type. - // E.g. v1 = i24 load => - // v2 = i32 zextload (2 byte) - // v3 = i32 load (1 byte) - // v4 = i32 shl v3, 16 - // v5 = i32 or v4, v2 - // v1 = i24 trunc v5 - // By doing this we generate the correct truncate which should get - // combined away as an artifact with a matching extend. - uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); - uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; - - MachineFunction &MF = MIRBuilder.getMF(); - MachineMemOperand *LargeMMO = - MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); - MachineMemOperand *SmallMMO = MF.getMachineMemOperand( - &MMO, LargeSplitSize / 8, SmallSplitSize / 8); - - LLT PtrTy = MRI.getType(PtrReg); - unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); - LLT AnyExtTy = LLT::scalar(AnyExtSize); - Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); - Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); - auto LargeLoad = MIRBuilder.buildLoadInstr( - TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO); - - auto OffsetCst = MIRBuilder.buildConstant( - LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); - auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), - *SmallMMO); - - auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); - auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); - auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); - MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); - MI.eraseFromParent(); - return Legalized; - } - - MIRBuilder.buildLoad(DstReg, PtrReg, MMO); - MI.eraseFromParent(); - return Legalized; - } - - if (DstTy.isScalar()) { - Register TmpReg = - MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); - MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); - switch (MI.getOpcode()) { - default: - llvm_unreachable("Unexpected opcode"); - case TargetOpcode::G_LOAD: - MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg); - break; - case TargetOpcode::G_SEXTLOAD: - MIRBuilder.buildSExt(DstReg, TmpReg); - break; - case TargetOpcode::G_ZEXTLOAD: - MIRBuilder.buildZExt(DstReg, TmpReg); - break; - } - - MI.eraseFromParent(); - return Legalized; - } - - return UnableToLegalize; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerStore(MachineInstr &MI) { - // Lower a non-power of 2 store into multiple pow-2 stores. - // E.g. split an i24 store into an i16 store + i8 store. - // We do this by first extending the stored value to the next largest power - // of 2 type, and then using truncating stores to store the components. - // By doing this, likewise with G_LOAD, generate an extend that can be - // artifact-combined away instead of leaving behind extracts. - Register SrcReg = MI.getOperand(0).getReg(); - Register PtrReg = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - MachineMemOperand &MMO = **MI.memoperands_begin(); - if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) - return UnableToLegalize; - if (SrcTy.isVector()) - return UnableToLegalize; - if (isPowerOf2_32(SrcTy.getSizeInBits())) - return UnableToLegalize; // Don't know what we're being asked to do. - - // Extend to the next pow-2. - const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); - auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); - - // Obtain the smaller value by shifting away the larger value. - uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); - uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; - auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); - auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); - - // Generate the PtrAdd and truncating stores. - LLT PtrTy = MRI.getType(PtrReg); - auto OffsetCst = MIRBuilder.buildConstant( - LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); - - MachineFunction &MF = MIRBuilder.getMF(); - MachineMemOperand *LargeMMO = - MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); - MachineMemOperand *SmallMMO = - MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); - MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); - MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); - MI.eraseFromParent(); - return Legalized; -} - +/// Figure out the bit offset into a register when coercing a vector index for +/// the wide element type. This is only for the case when promoting vector to +/// one with larger elements. +// +/// +/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) +/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) +static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, + Register Idx, + unsigned NewEltSize, + unsigned OldEltSize) { + const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); + LLT IdxTy = B.getMRI()->getType(Idx); + + // Now figure out the amount we need to shift to get the target bits. + auto OffsetMask = B.buildConstant( + IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio)); + auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask); + return B.buildShl(IdxTy, OffsetIdx, + B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0); +} + +/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this +/// is casting to a vector with a smaller element size, perform multiple element +/// extracts and merge the results. If this is coercing to a vector with larger +/// elements, index the bitcasted vector and extract the target element with bit +/// operations. This is intended to force the indexing in the native register +/// size for architectures that can dynamically index the register file. LegalizerHelper::LegalizeResult +LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + Register Dst = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register Idx = MI.getOperand(2).getReg(); + LLT SrcVecTy = MRI.getType(SrcVec); + LLT IdxTy = MRI.getType(Idx); + + LLT SrcEltTy = SrcVecTy.getElementType(); + unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; + unsigned OldNumElts = SrcVecTy.getNumElements(); + + LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; + Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); + + const unsigned NewEltSize = NewEltTy.getSizeInBits(); + const unsigned OldEltSize = SrcEltTy.getSizeInBits(); + if (NewNumElts > OldNumElts) { + // Decreasing the vector element size + // + // e.g. i64 = extract_vector_elt x:v2i64, y:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // + // i64 = bitcast + // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), + // (i32 (extract_vector_elt castx, (2 * y + 1))) + // + if (NewNumElts % OldNumElts != 0) + return UnableToLegalize; + + // Type of the intermediate result vector. + const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts; + LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy); + + auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt); + + SmallVector<Register, 8> NewOps(NewEltsPerOldElt); + auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK); + + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I); + auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset); + auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx); + NewOps[I] = Elt.getReg(0); + } + + auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps); + MIRBuilder.buildBitcast(Dst, NewVec); + MI.eraseFromParent(); + return Legalized; + } + + if (NewNumElts < OldNumElts) { + if (NewEltSize % OldEltSize != 0) + return UnableToLegalize; + + // This only depends on powers of 2 because we use bit tricks to figure out + // the bit offset we need to shift to get the target element. A general + // expansion could emit division/multiply. + if (!isPowerOf2_32(NewEltSize / OldEltSize)) + return UnableToLegalize; + + // Increasing the vector element size. + // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx + // + // => + // + // %cast = G_BITCAST %vec + // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize) + // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx + // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) + // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) + // %elt_bits = G_LSHR %wide_elt, %offset_bits + // %elt = G_TRUNC %elt_bits + + const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); + auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); + + // Divide to get the index in the wider element type. + auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); + + Register WideElt = CastVec; + if (CastTy.isVector()) { + WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, + ScaledIdx).getReg(0); + } + + // Compute the bit offset into the register of the target element. + Register OffsetBits = getBitcastWiderVectorElementOffset( + MIRBuilder, Idx, NewEltSize, OldEltSize); + + // Shift the wide element to get the target element. + auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits); + MIRBuilder.buildTrunc(Dst, ExtractedBits); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p +/// TargetReg, while preserving other bits in \p TargetReg. +/// +/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset) +static Register buildBitFieldInsert(MachineIRBuilder &B, + Register TargetReg, Register InsertReg, + Register OffsetBits) { + LLT TargetTy = B.getMRI()->getType(TargetReg); + LLT InsertTy = B.getMRI()->getType(InsertReg); + auto ZextVal = B.buildZExt(TargetTy, InsertReg); + auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits); + + // Produce a bitmask of the value to insert + auto EltMask = B.buildConstant( + TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(), + InsertTy.getSizeInBits())); + // Shift it into position + auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits); + auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask); + + // Clear out the bits in the wide element + auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask); + + // The value to insert has all zeros already, so stick it into the masked + // wide element. + return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0); +} + +/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this +/// is increasing the element size, perform the indexing in the target element +/// type, and use bit operations to insert at the element position. This is +/// intended for architectures that can dynamically index the register file and +/// want to force indexing in the native register size. +LegalizerHelper::LegalizeResult +LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + Register Dst = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register Val = MI.getOperand(2).getReg(); + Register Idx = MI.getOperand(3).getReg(); + + LLT VecTy = MRI.getType(Dst); + LLT IdxTy = MRI.getType(Idx); + + LLT VecEltTy = VecTy.getElementType(); + LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; + const unsigned NewEltSize = NewEltTy.getSizeInBits(); + const unsigned OldEltSize = VecEltTy.getSizeInBits(); + + unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; + unsigned OldNumElts = VecTy.getNumElements(); + + Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); + if (NewNumElts < OldNumElts) { + if (NewEltSize % OldEltSize != 0) + return UnableToLegalize; + + // This only depends on powers of 2 because we use bit tricks to figure out + // the bit offset we need to shift to get the target element. A general + // expansion could emit division/multiply. + if (!isPowerOf2_32(NewEltSize / OldEltSize)) + return UnableToLegalize; + + const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); + auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); + + // Divide to get the index in the wider element type. + auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); + + Register ExtractedElt = CastVec; + if (CastTy.isVector()) { + ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, + ScaledIdx).getReg(0); + } + + // Compute the bit offset into the register of the target element. + Register OffsetBits = getBitcastWiderVectorElementOffset( + MIRBuilder, Idx, NewEltSize, OldEltSize); + + Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt, + Val, OffsetBits); + if (CastTy.isVector()) { + InsertedElt = MIRBuilder.buildInsertVectorElement( + CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0); + } + + MIRBuilder.buildBitcast(Dst, InsertedElt); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerLoad(MachineInstr &MI) { + // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT + Register DstReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + auto &MMO = **MI.memoperands_begin(); + + if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { + if (MI.getOpcode() == TargetOpcode::G_LOAD) { + // This load needs splitting into power of 2 sized loads. + if (DstTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(DstTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Our strategy here is to generate anyextending loads for the smaller + // types up to next power-2 result type, and then combine the two larger + // result values together, before truncating back down to the non-pow-2 + // type. + // E.g. v1 = i24 load => + // v2 = i32 zextload (2 byte) + // v3 = i32 load (1 byte) + // v4 = i32 shl v3, 16 + // v5 = i32 or v4, v2 + // v1 = i24 trunc v5 + // By doing this we generate the correct truncate which should get + // combined away as an artifact with a matching extend. + uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); + uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = MF.getMachineMemOperand( + &MMO, LargeSplitSize / 8, SmallSplitSize / 8); + + LLT PtrTy = MRI.getType(PtrReg); + unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); + LLT AnyExtTy = LLT::scalar(AnyExtSize); + Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + auto LargeLoad = MIRBuilder.buildLoadInstr( + TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO); + + auto OffsetCst = MIRBuilder.buildConstant( + LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); + Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = + MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); + auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), + *SmallMMO); + + auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); + auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); + auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); + MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } + + MIRBuilder.buildLoad(DstReg, PtrReg, MMO); + MI.eraseFromParent(); + return Legalized; + } + + if (DstTy.isScalar()) { + Register TmpReg = + MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); + MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case TargetOpcode::G_LOAD: + MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg); + break; + case TargetOpcode::G_SEXTLOAD: + MIRBuilder.buildSExt(DstReg, TmpReg); + break; + case TargetOpcode::G_ZEXTLOAD: + MIRBuilder.buildZExt(DstReg, TmpReg); + break; + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerStore(MachineInstr &MI) { + // Lower a non-power of 2 store into multiple pow-2 stores. + // E.g. split an i24 store into an i16 store + i8 store. + // We do this by first extending the stored value to the next largest power + // of 2 type, and then using truncating stores to store the components. + // By doing this, likewise with G_LOAD, generate an extend that can be + // artifact-combined away instead of leaving behind extracts. + Register SrcReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + MachineMemOperand &MMO = **MI.memoperands_begin(); + if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) + return UnableToLegalize; + if (SrcTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(SrcTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Extend to the next pow-2. + const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); + auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); + + // Obtain the smaller value by shifting away the larger value. + uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); + uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; + auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); + auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); + + // Generate the PtrAdd and truncating stores. + LLT PtrTy = MRI.getType(PtrReg); + auto OffsetCst = MIRBuilder.buildConstant( + LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); + Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = + MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = + MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); + MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); + MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { switch (MI.getOpcode()) { case TargetOpcode::G_LOAD: { @@ -2833,24 +2833,24 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_EXTRACT_VECTOR_ELT: - return bitcastExtractVectorElt(MI, TypeIdx, CastTy); - case TargetOpcode::G_INSERT_VECTOR_ELT: - return bitcastInsertVectorElt(MI, TypeIdx, CastTy); + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + return bitcastExtractVectorElt(MI, TypeIdx, CastTy); + case TargetOpcode::G_INSERT_VECTOR_ELT: + return bitcastInsertVectorElt(MI, TypeIdx, CastTy); default: return UnableToLegalize; } } -// Legalize an instruction by changing the opcode in place. -void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) { - Observer.changingInstr(MI); - MI.setDesc(MIRBuilder.getTII().get(NewOpcode)); - Observer.changedInstr(MI); -} - +// Legalize an instruction by changing the opcode in place. +void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) { + Observer.changingInstr(MI); + MI.setDesc(MIRBuilder.getTII().get(NewOpcode)); + Observer.changedInstr(MI); +} + LegalizerHelper::LegalizeResult -LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { +LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { using namespace TargetOpcode; switch(MI.getOpcode()) { @@ -2860,7 +2860,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerBitcast(MI); case TargetOpcode::G_SREM: case TargetOpcode::G_UREM: { - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); auto Quot = MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty}, {MI.getOperand(1), MI.getOperand(2)}); @@ -2873,9 +2873,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case TargetOpcode::G_SADDO: case TargetOpcode::G_SSUBO: return lowerSADDO_SSUBO(MI); - case TargetOpcode::G_UMULH: - case TargetOpcode::G_SMULH: - return lowerSMULH_UMULH(MI); + case TargetOpcode::G_UMULH: + case TargetOpcode::G_SMULH: + return lowerSMULH_UMULH(MI); case TargetOpcode::G_SMULO: case TargetOpcode::G_UMULO: { // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the @@ -2884,7 +2884,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { Register Overflow = MI.getOperand(1).getReg(); Register LHS = MI.getOperand(2).getReg(); Register RHS = MI.getOperand(3).getReg(); - LLT Ty = MRI.getType(Res); + LLT Ty = MRI.getType(Res); unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO ? TargetOpcode::G_SMULH @@ -2914,24 +2914,24 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return Legalized; } case TargetOpcode::G_FNEG: { - Register Res = MI.getOperand(0).getReg(); - LLT Ty = MRI.getType(Res); - + Register Res = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Res); + // TODO: Handle vector types once we are able to // represent them. if (Ty.isVector()) return UnableToLegalize; - auto SignMask = - MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits())); + auto SignMask = + MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits())); Register SubByReg = MI.getOperand(1).getReg(); - MIRBuilder.buildXor(Res, SubByReg, SignMask); + MIRBuilder.buildXor(Res, SubByReg, SignMask); MI.eraseFromParent(); return Legalized; } case TargetOpcode::G_FSUB: { - Register Res = MI.getOperand(0).getReg(); - LLT Ty = MRI.getType(Res); - + Register Res = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Res); + // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). // First, check if G_FNEG is marked as Lower. If so, we may // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. @@ -2951,12 +2951,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerFFloor(MI); case TargetOpcode::G_INTRINSIC_ROUND: return lowerIntrinsicRound(MI); - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { - // Since round even is the assumed rounding mode for unconstrained FP - // operations, rint and roundeven are the same operation. - changeOpcode(MI, TargetOpcode::G_FRINT); - return Legalized; - } + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { + // Since round even is the assumed rounding mode for unconstrained FP + // operations, rint and roundeven are the same operation. + changeOpcode(MI, TargetOpcode::G_FRINT); + return Legalized; + } case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { Register OldValRes = MI.getOperand(0).getReg(); Register SuccessRes = MI.getOperand(1).getReg(); @@ -2971,16 +2971,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { } case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: - case TargetOpcode::G_ZEXTLOAD: - return lowerLoad(MI); - case TargetOpcode::G_STORE: - return lowerStore(MI); + case TargetOpcode::G_ZEXTLOAD: + return lowerLoad(MI); + case TargetOpcode::G_STORE: + return lowerStore(MI); case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTPOP: - return lowerBitCount(MI); + return lowerBitCount(MI); case G_UADDO: { Register Res = MI.getOperand(0).getReg(); Register CarryOut = MI.getOperand(1).getReg(); @@ -3042,24 +3042,24 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return Legalized; } case G_UITOFP: - return lowerUITOFP(MI); + return lowerUITOFP(MI); case G_SITOFP: - return lowerSITOFP(MI); + return lowerSITOFP(MI); case G_FPTOUI: - return lowerFPTOUI(MI); + return lowerFPTOUI(MI); case G_FPTOSI: return lowerFPTOSI(MI); case G_FPTRUNC: - return lowerFPTRUNC(MI); - case G_FPOWI: - return lowerFPOWI(MI); + return lowerFPTRUNC(MI); + case G_FPOWI: + return lowerFPOWI(MI); case G_SMIN: case G_SMAX: case G_UMIN: case G_UMAX: - return lowerMinMax(MI); + return lowerMinMax(MI); case G_FCOPYSIGN: - return lowerFCopySign(MI); + return lowerFCopySign(MI); case G_FMINNUM: case G_FMAXNUM: return lowerFMinNumMaxNum(MI); @@ -3082,9 +3082,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { MI.eraseFromParent(); return Legalized; } - case G_EXTRACT_VECTOR_ELT: - case G_INSERT_VECTOR_ELT: - return lowerExtractInsertVectorElt(MI); + case G_EXTRACT_VECTOR_ELT: + case G_INSERT_VECTOR_ELT: + return lowerExtractInsertVectorElt(MI); case G_SHUFFLE_VECTOR: return lowerShuffleVector(MI); case G_DYN_STACKALLOC: @@ -3100,123 +3100,123 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case G_READ_REGISTER: case G_WRITE_REGISTER: return lowerReadWriteRegister(MI); - case G_UADDSAT: - case G_USUBSAT: { - // Try to make a reasonable guess about which lowering strategy to use. The - // target can override this with custom lowering and calling the - // implementation functions. - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (LI.isLegalOrCustom({G_UMIN, Ty})) - return lowerAddSubSatToMinMax(MI); - return lowerAddSubSatToAddoSubo(MI); - } - case G_SADDSAT: - case G_SSUBSAT: { - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - - // FIXME: It would probably make more sense to see if G_SADDO is preferred, - // since it's a shorter expansion. However, we would need to figure out the - // preferred boolean type for the carry out for the query. - if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty})) - return lowerAddSubSatToMinMax(MI); - return lowerAddSubSatToAddoSubo(MI); - } - case G_SSHLSAT: - case G_USHLSAT: - return lowerShlSat(MI); - case G_ABS: { - // Expand %res = G_ABS %a into: - // %v1 = G_ASHR %a, scalar_size-1 - // %v2 = G_ADD %a, %v1 - // %res = G_XOR %v2, %v1 - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - Register OpReg = MI.getOperand(1).getReg(); - auto ShiftAmt = - MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1); - auto Shift = - MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt); - auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift); - MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift); - MI.eraseFromParent(); - return Legalized; - } - case G_SELECT: - return lowerSelect(MI); - } -} - -Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty, - Align MinAlign) const { - // FIXME: We're missing a way to go back from LLT to llvm::Type to query the - // datalayout for the preferred alignment. Also there should be a target hook - // for this to allow targets to reduce the alignment and ignore the - // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of - // the type. - return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign); -} - -MachineInstrBuilder -LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment, - MachinePointerInfo &PtrInfo) { - MachineFunction &MF = MIRBuilder.getMF(); - const DataLayout &DL = MIRBuilder.getDataLayout(); - int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false); - - unsigned AddrSpace = DL.getAllocaAddrSpace(); - LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); - - PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx); - return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx); -} - -static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg, - LLT VecTy) { - int64_t IdxVal; - if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) - return IdxReg; - - LLT IdxTy = B.getMRI()->getType(IdxReg); - unsigned NElts = VecTy.getNumElements(); - if (isPowerOf2_32(NElts)) { - APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts)); - return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0); - } - - return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1)) - .getReg(0); -} - -Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, - Register Index) { - LLT EltTy = VecTy.getElementType(); - - // Calculate the element offset and add it to the pointer. - unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size. - assert(EltSize * 8 == EltTy.getSizeInBits() && - "Converting bits to bytes lost precision"); - - Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy); - - LLT IdxTy = MRI.getType(Index); - auto Mul = MIRBuilder.buildMul(IdxTy, Index, - MIRBuilder.buildConstant(IdxTy, EltSize)); - - LLT PtrTy = MRI.getType(VecPtr); - return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); + case G_UADDSAT: + case G_USUBSAT: { + // Try to make a reasonable guess about which lowering strategy to use. The + // target can override this with custom lowering and calling the + // implementation functions. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (LI.isLegalOrCustom({G_UMIN, Ty})) + return lowerAddSubSatToMinMax(MI); + return lowerAddSubSatToAddoSubo(MI); + } + case G_SADDSAT: + case G_SSUBSAT: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + + // FIXME: It would probably make more sense to see if G_SADDO is preferred, + // since it's a shorter expansion. However, we would need to figure out the + // preferred boolean type for the carry out for the query. + if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty})) + return lowerAddSubSatToMinMax(MI); + return lowerAddSubSatToAddoSubo(MI); + } + case G_SSHLSAT: + case G_USHLSAT: + return lowerShlSat(MI); + case G_ABS: { + // Expand %res = G_ABS %a into: + // %v1 = G_ASHR %a, scalar_size-1 + // %v2 = G_ADD %a, %v1 + // %res = G_XOR %v2, %v1 + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + Register OpReg = MI.getOperand(1).getReg(); + auto ShiftAmt = + MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1); + auto Shift = + MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt); + auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift); + MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift); + MI.eraseFromParent(); + return Legalized; + } + case G_SELECT: + return lowerSelect(MI); + } } +Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty, + Align MinAlign) const { + // FIXME: We're missing a way to go back from LLT to llvm::Type to query the + // datalayout for the preferred alignment. Also there should be a target hook + // for this to allow targets to reduce the alignment and ignore the + // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of + // the type. + return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign); +} + +MachineInstrBuilder +LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment, + MachinePointerInfo &PtrInfo) { + MachineFunction &MF = MIRBuilder.getMF(); + const DataLayout &DL = MIRBuilder.getDataLayout(); + int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false); + + unsigned AddrSpace = DL.getAllocaAddrSpace(); + LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); + + PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx); + return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx); +} + +static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg, + LLT VecTy) { + int64_t IdxVal; + if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) + return IdxReg; + + LLT IdxTy = B.getMRI()->getType(IdxReg); + unsigned NElts = VecTy.getNumElements(); + if (isPowerOf2_32(NElts)) { + APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts)); + return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0); + } + + return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1)) + .getReg(0); +} + +Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, + Register Index) { + LLT EltTy = VecTy.getElementType(); + + // Calculate the element offset and add it to the pointer. + unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size. + assert(EltSize * 8 == EltTy.getSizeInBits() && + "Converting bits to bytes lost precision"); + + Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy); + + LLT IdxTy = MRI.getType(Index); + auto Mul = MIRBuilder.buildMul(IdxTy, Index, + MIRBuilder.buildConstant(IdxTy, EltSize)); + + LLT PtrTy = MRI.getType(VecPtr); + return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); +} + LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT LCMTy = getLCMType(DstTy, NarrowTy); + LLT DstTy = MRI.getType(DstReg); + LLT LCMTy = getLCMType(DstTy, NarrowTy); - unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); + unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); - auto NewUndef = MIRBuilder.buildUndef(NarrowTy); - SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0)); + auto NewUndef = MIRBuilder.buildUndef(NarrowTy); + SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0)); - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); + buildWidenedRemergeToDst(DstReg, LCMTy, Parts); MI.eraseFromParent(); return Legalized; } @@ -3337,7 +3337,7 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) return UnableToLegalize; - NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType()); + NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType()); } else { NumParts = DstTy.getNumElements(); NarrowTy1 = SrcTy.getElementType(); @@ -3610,116 +3610,116 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, return Legalized; } -// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces -// a vector -// -// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with -// undef as necessary. -// -// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 -// -> <2 x s16> -// -// %4:_(s16) = G_IMPLICIT_DEF -// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 -// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 -// %7:_(<2 x s16>) = G_IMPLICIT_DEF -// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7 -// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8 +// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces +// a vector +// +// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with +// undef as necessary. +// +// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 +// -> <2 x s16> +// +// %4:_(s16) = G_IMPLICIT_DEF +// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 +// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 +// %7:_(<2 x s16>) = G_IMPLICIT_DEF +// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7 +// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8 LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { +LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); - LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); - - // Break into a common type - SmallVector<Register, 16> Parts; - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) - extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg()); - - // Build the requested new merge, padding with undef. - LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, - TargetOpcode::G_ANYEXT); - - // Pack into the original result register. - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, - unsigned TypeIdx, - LLT NarrowVecTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcVec = MI.getOperand(1).getReg(); - Register InsertVal; - bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT; - - assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index"); - if (IsInsert) - InsertVal = MI.getOperand(2).getReg(); - - Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); - - // TODO: Handle total scalarization case. - if (!NarrowVecTy.isVector()) - return UnableToLegalize; - - LLT VecTy = MRI.getType(SrcVec); - - // If the index is a constant, we can really break this down as you would - // expect, and index into the target size pieces. - int64_t IdxVal; - if (mi_match(Idx, MRI, m_ICst(IdxVal))) { - // Avoid out of bounds indexing the pieces. - if (IdxVal >= VecTy.getNumElements()) { - MIRBuilder.buildUndef(DstReg); - MI.eraseFromParent(); - return Legalized; + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); + + // Break into a common type + SmallVector<Register, 16> Parts; + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) + extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg()); + + // Build the requested new merge, padding with undef. + LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, + TargetOpcode::G_ANYEXT); + + // Pack into the original result register. + buildWidenedRemergeToDst(DstReg, LCMTy, Parts); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowVecTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register InsertVal; + bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT; + + assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index"); + if (IsInsert) + InsertVal = MI.getOperand(2).getReg(); + + Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); + + // TODO: Handle total scalarization case. + if (!NarrowVecTy.isVector()) + return UnableToLegalize; + + LLT VecTy = MRI.getType(SrcVec); + + // If the index is a constant, we can really break this down as you would + // expect, and index into the target size pieces. + int64_t IdxVal; + if (mi_match(Idx, MRI, m_ICst(IdxVal))) { + // Avoid out of bounds indexing the pieces. + if (IdxVal >= VecTy.getNumElements()) { + MIRBuilder.buildUndef(DstReg); + MI.eraseFromParent(); + return Legalized; } - SmallVector<Register, 8> VecParts; - LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); - - // Build a sequence of NarrowTy pieces in VecParts for this operand. - LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, - TargetOpcode::G_ANYEXT); - - unsigned NewNumElts = NarrowVecTy.getNumElements(); - - LLT IdxTy = MRI.getType(Idx); - int64_t PartIdx = IdxVal / NewNumElts; - auto NewIdx = - MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); - - if (IsInsert) { - LLT PartTy = MRI.getType(VecParts[PartIdx]); - - // Use the adjusted index to insert into one of the subvectors. - auto InsertPart = MIRBuilder.buildInsertVectorElement( - PartTy, VecParts[PartIdx], InsertVal, NewIdx); - VecParts[PartIdx] = InsertPart.getReg(0); - - // Recombine the inserted subvector with the others to reform the result - // vector. - buildWidenedRemergeToDst(DstReg, LCMTy, VecParts); - } else { - MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); - } - - MI.eraseFromParent(); - return Legalized; - } - - // With a variable index, we can't perform the operation in a smaller type, so - // we're forced to expand this. - // - // TODO: We could emit a chain of compare/select to figure out which piece to - // index. - return lowerExtractInsertVectorElt(MI); + SmallVector<Register, 8> VecParts; + LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); + + // Build a sequence of NarrowTy pieces in VecParts for this operand. + LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, + TargetOpcode::G_ANYEXT); + + unsigned NewNumElts = NarrowVecTy.getNumElements(); + + LLT IdxTy = MRI.getType(Idx); + int64_t PartIdx = IdxVal / NewNumElts; + auto NewIdx = + MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); + + if (IsInsert) { + LLT PartTy = MRI.getType(VecParts[PartIdx]); + + // Use the adjusted index to insert into one of the subvectors. + auto InsertPart = MIRBuilder.buildInsertVectorElement( + PartTy, VecParts[PartIdx], InsertVal, NewIdx); + VecParts[PartIdx] = InsertPart.getReg(0); + + // Recombine the inserted subvector with the others to reform the result + // vector. + buildWidenedRemergeToDst(DstReg, LCMTy, VecParts); + } else { + MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); + } + + MI.eraseFromParent(); + return Legalized; + } + + // With a variable index, we can't perform the operation in a smaller type, so + // we're forced to expand this. + // + // TODO: We could emit a chain of compare/select to figure out which piece to + // index. + return lowerExtractInsertVectorElt(MI); } LegalizerHelper::LegalizeResult @@ -3765,8 +3765,8 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, if (NumParts == -1) return UnableToLegalize; - LLT PtrTy = MRI.getType(AddrReg); - const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); + LLT PtrTy = MRI.getType(AddrReg); + const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); unsigned TotalSize = ValTy.getSizeInBits(); @@ -3964,7 +3964,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_ADD: case G_SUB: case G_MUL: - case G_PTR_ADD: + case G_PTR_ADD: case G_SMULH: case G_UMULH: case G_FADD: @@ -3988,7 +3988,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FFLOOR: case G_FRINT: case G_INTRINSIC_ROUND: - case G_INTRINSIC_ROUNDEVEN: + case G_INTRINSIC_ROUNDEVEN: case G_INTRINSIC_TRUNC: case G_FCOS: case G_FSIN: @@ -4020,8 +4020,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_SHL: case G_LSHR: case G_ASHR: - case G_SSHLSAT: - case G_USHLSAT: + case G_SSHLSAT: + case G_USHLSAT: case G_CTLZ: case G_CTLZ_ZERO_UNDEF: case G_CTTZ: @@ -4052,15 +4052,15 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_UNMERGE_VALUES: return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); case G_BUILD_VECTOR: - assert(TypeIdx == 0 && "not a vector type index"); - return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); - case G_CONCAT_VECTORS: - if (TypeIdx != 1) // TODO: This probably does work as expected already. - return UnableToLegalize; - return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); - case G_EXTRACT_VECTOR_ELT: - case G_INSERT_VECTOR_ELT: - return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy); + assert(TypeIdx == 0 && "not a vector type index"); + return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); + case G_CONCAT_VECTORS: + if (TypeIdx != 1) // TODO: This probably does work as expected already. + return UnableToLegalize; + return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); + case G_EXTRACT_VECTOR_ELT: + case G_INSERT_VECTOR_ELT: + return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); @@ -4484,31 +4484,31 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { } LegalizerHelper::LegalizeResult -LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - if (TypeIdx != 0) - return UnableToLegalize; - - bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI; - - Register Src = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(Src); - - // If all finite floats fit into the narrowed integer type, we can just swap - // out the result type. This is practically only useful for conversions from - // half to at least 16-bits, so just handle the one case. - if (SrcTy.getScalarType() != LLT::scalar(16) || - NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16)) - return UnableToLegalize; - - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, - IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT); - Observer.changedInstr(MI); - return Legalized; -} - -LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI; + + Register Src = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(Src); + + // If all finite floats fit into the narrowed integer type, we can just swap + // out the result type. This is practically only useful for conversions from + // half to at least 16-bits, so just handle the one case. + if (SrcTy.getScalarType() != LLT::scalar(16) || + NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16)) + return UnableToLegalize; + + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, + IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { if (TypeIdx != 1) @@ -4857,9 +4857,9 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult -LegalizerHelper::lowerBitCount(MachineInstr &MI) { +LegalizerHelper::lowerBitCount(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); - const auto &TII = MIRBuilder.getTII(); + const auto &TII = MIRBuilder.getTII(); auto isSupported = [this](const LegalityQuery &Q) { auto QAction = LI.getAction(Q).Action; return QAction == Legal || QAction == Libcall || QAction == Custom; @@ -4947,15 +4947,15 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } // Ref: "Hacker's Delight" by Henry Warren - auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1); - auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1); + auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1); + auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1); auto MIBTmp = MIRBuilder.buildAnd( - SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1)); - if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) && - isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) { - auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len); + SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1)); + if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) && + isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) { + auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len); MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen, - MIRBuilder.buildCTLZ(SrcTy, MIBTmp)); + MIRBuilder.buildCTLZ(SrcTy, MIBTmp)); MI.eraseFromParent(); return Legalized; } @@ -4964,8 +4964,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { return Legalized; } case TargetOpcode::G_CTPOP: { - Register SrcReg = MI.getOperand(1).getReg(); - LLT Ty = MRI.getType(SrcReg); + Register SrcReg = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(SrcReg); unsigned Size = Ty.getSizeInBits(); MachineIRBuilder &B = MIRBuilder; @@ -4975,11 +4975,11 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { // B2Count = val - { (val >> 1) & 0x55555555 } // since it gives same result in blocks of 2 with one instruction less. auto C_1 = B.buildConstant(Ty, 1); - auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1); + auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1); APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55)); auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0); auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0); - auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi); + auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi); // In order to get count in blocks of 4 add values from adjacent block of 2. // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 } @@ -5078,7 +5078,7 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { return Legalized; } -LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(Dst); @@ -5106,7 +5106,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { return UnableToLegalize; } -LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(Dst); @@ -5152,7 +5152,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { return UnableToLegalize; } -LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(Dst); @@ -5369,7 +5369,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { } LegalizerHelper::LegalizeResult -LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { +LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -5384,20 +5384,20 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { return UnableToLegalize; } -// TODO: If RHS is a constant SelectionDAGBuilder expands this into a -// multiplication tree. -LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src0 = MI.getOperand(1).getReg(); - Register Src1 = MI.getOperand(2).getReg(); - LLT Ty = MRI.getType(Dst); - - auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1); - MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags()); - MI.eraseFromParent(); - return Legalized; -} - +// TODO: If RHS is a constant SelectionDAGBuilder expands this into a +// multiplication tree. +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Dst); + + auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1); + MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags()); + MI.eraseFromParent(); + return Legalized; +} + static CmpInst::Predicate minMaxToCompare(unsigned Opc) { switch (Opc) { case TargetOpcode::G_SMIN: @@ -5413,7 +5413,7 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) { } } -LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src0 = MI.getOperand(1).getReg(); Register Src1 = MI.getOperand(2).getReg(); @@ -5429,7 +5429,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { } LegalizerHelper::LegalizeResult -LegalizerHelper::lowerFCopySign(MachineInstr &MI) { +LegalizerHelper::lowerFCopySign(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src0 = MI.getOperand(1).getReg(); Register Src1 = MI.getOperand(2).getReg(); @@ -5651,72 +5651,72 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { return Legalized; } -/// Lower a vector extract or insert by writing the vector to a stack temporary -/// and reloading the element or vector. -/// -/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx -/// => -/// %stack_temp = G_FRAME_INDEX -/// G_STORE %vec, %stack_temp -/// %idx = clamp(%idx, %vec.getNumElements()) -/// %element_ptr = G_PTR_ADD %stack_temp, %idx -/// %dst = G_LOAD %element_ptr -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcVec = MI.getOperand(1).getReg(); - Register InsertVal; - if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) - InsertVal = MI.getOperand(2).getReg(); - - Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); - - LLT VecTy = MRI.getType(SrcVec); - LLT EltTy = VecTy.getElementType(); - if (!EltTy.isByteSized()) { // Not implemented. - LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n"); - return UnableToLegalize; - } - - unsigned EltBytes = EltTy.getSizeInBytes(); - Align VecAlign = getStackTemporaryAlignment(VecTy); - Align EltAlign; - - MachinePointerInfo PtrInfo; - auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()), - VecAlign, PtrInfo); - MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign); - - // Get the pointer to the element, and be sure not to hit undefined behavior - // if the index is out of bounds. - Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx); - - int64_t IdxVal; - if (mi_match(Idx, MRI, m_ICst(IdxVal))) { - int64_t Offset = IdxVal * EltBytes; - PtrInfo = PtrInfo.getWithOffset(Offset); - EltAlign = commonAlignment(VecAlign, Offset); - } else { - // We lose information with a variable offset. - EltAlign = getStackTemporaryAlignment(EltTy); - PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace()); - } - - if (InsertVal) { - // Write the inserted element - MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign); - - // Reload the whole vector. - MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign); - } else { - MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign); - } - - MI.eraseFromParent(); - return Legalized; -} - +/// Lower a vector extract or insert by writing the vector to a stack temporary +/// and reloading the element or vector. +/// +/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx +/// => +/// %stack_temp = G_FRAME_INDEX +/// G_STORE %vec, %stack_temp +/// %idx = clamp(%idx, %vec.getNumElements()) +/// %element_ptr = G_PTR_ADD %stack_temp, %idx +/// %dst = G_LOAD %element_ptr LegalizerHelper::LegalizeResult +LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register InsertVal; + if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) + InsertVal = MI.getOperand(2).getReg(); + + Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); + + LLT VecTy = MRI.getType(SrcVec); + LLT EltTy = VecTy.getElementType(); + if (!EltTy.isByteSized()) { // Not implemented. + LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n"); + return UnableToLegalize; + } + + unsigned EltBytes = EltTy.getSizeInBytes(); + Align VecAlign = getStackTemporaryAlignment(VecTy); + Align EltAlign; + + MachinePointerInfo PtrInfo; + auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()), + VecAlign, PtrInfo); + MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign); + + // Get the pointer to the element, and be sure not to hit undefined behavior + // if the index is out of bounds. + Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx); + + int64_t IdxVal; + if (mi_match(Idx, MRI, m_ICst(IdxVal))) { + int64_t Offset = IdxVal * EltBytes; + PtrInfo = PtrInfo.getWithOffset(Offset); + EltAlign = commonAlignment(VecAlign, Offset); + } else { + // We lose information with a variable offset. + EltAlign = getStackTemporaryAlignment(EltTy); + PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace()); + } + + if (InsertVal) { + // Write the inserted element + MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign); + + // Reload the whole vector. + MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign); + } else { + MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { Register DstReg = MI.getOperand(0).getReg(); Register Src0Reg = MI.getOperand(1).getReg(); @@ -5931,185 +5931,185 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { } LegalizerHelper::LegalizeResult -LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) { - Register Res = MI.getOperand(0).getReg(); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); - LLT Ty = MRI.getType(Res); - bool IsSigned; - bool IsAdd; - unsigned BaseOp; - switch (MI.getOpcode()) { - default: - llvm_unreachable("unexpected addsat/subsat opcode"); - case TargetOpcode::G_UADDSAT: - IsSigned = false; - IsAdd = true; - BaseOp = TargetOpcode::G_ADD; - break; - case TargetOpcode::G_SADDSAT: - IsSigned = true; - IsAdd = true; - BaseOp = TargetOpcode::G_ADD; - break; - case TargetOpcode::G_USUBSAT: - IsSigned = false; - IsAdd = false; - BaseOp = TargetOpcode::G_SUB; - break; - case TargetOpcode::G_SSUBSAT: - IsSigned = true; - IsAdd = false; - BaseOp = TargetOpcode::G_SUB; - break; - } - - if (IsSigned) { - // sadd.sat(a, b) -> - // hi = 0x7fffffff - smax(a, 0) - // lo = 0x80000000 - smin(a, 0) - // a + smin(smax(lo, b), hi) - // ssub.sat(a, b) -> - // lo = smax(a, -1) - 0x7fffffff - // hi = smin(a, -1) - 0x80000000 - // a - smin(smax(lo, b), hi) - // TODO: AMDGPU can use a "median of 3" instruction here: - // a +/- med3(lo, b, hi) - uint64_t NumBits = Ty.getScalarSizeInBits(); - auto MaxVal = - MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits)); - auto MinVal = - MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); - MachineInstrBuilder Hi, Lo; - if (IsAdd) { - auto Zero = MIRBuilder.buildConstant(Ty, 0); - Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero)); - Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero)); - } else { - auto NegOne = MIRBuilder.buildConstant(Ty, -1); - Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne), - MaxVal); - Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne), - MinVal); - } - auto RHSClamped = - MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi); - MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped}); - } else { - // uadd.sat(a, b) -> a + umin(~a, b) - // usub.sat(a, b) -> a - umin(a, b) - Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS; - auto Min = MIRBuilder.buildUMin(Ty, Not, RHS); - MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min}); - } - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) { - Register Res = MI.getOperand(0).getReg(); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); - LLT Ty = MRI.getType(Res); - LLT BoolTy = Ty.changeElementSize(1); - bool IsSigned; - bool IsAdd; - unsigned OverflowOp; - switch (MI.getOpcode()) { - default: - llvm_unreachable("unexpected addsat/subsat opcode"); - case TargetOpcode::G_UADDSAT: - IsSigned = false; - IsAdd = true; - OverflowOp = TargetOpcode::G_UADDO; - break; - case TargetOpcode::G_SADDSAT: - IsSigned = true; - IsAdd = true; - OverflowOp = TargetOpcode::G_SADDO; - break; - case TargetOpcode::G_USUBSAT: - IsSigned = false; - IsAdd = false; - OverflowOp = TargetOpcode::G_USUBO; - break; - case TargetOpcode::G_SSUBSAT: - IsSigned = true; - IsAdd = false; - OverflowOp = TargetOpcode::G_SSUBO; - break; - } - - auto OverflowRes = - MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS}); - Register Tmp = OverflowRes.getReg(0); - Register Ov = OverflowRes.getReg(1); - MachineInstrBuilder Clamp; - if (IsSigned) { - // sadd.sat(a, b) -> - // {tmp, ov} = saddo(a, b) - // ov ? (tmp >>s 31) + 0x80000000 : r - // ssub.sat(a, b) -> - // {tmp, ov} = ssubo(a, b) - // ov ? (tmp >>s 31) + 0x80000000 : r - uint64_t NumBits = Ty.getScalarSizeInBits(); - auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1); - auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount); - auto MinVal = - MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); - Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal); - } else { - // uadd.sat(a, b) -> - // {tmp, ov} = uaddo(a, b) - // ov ? 0xffffffff : tmp - // usub.sat(a, b) -> - // {tmp, ov} = usubo(a, b) - // ov ? 0 : tmp - Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0); - } - MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerShlSat(MachineInstr &MI) { - assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT || - MI.getOpcode() == TargetOpcode::G_USHLSAT) && - "Expected shlsat opcode!"); - bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT; - Register Res = MI.getOperand(0).getReg(); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); - LLT Ty = MRI.getType(Res); - LLT BoolTy = Ty.changeElementSize(1); - - unsigned BW = Ty.getScalarSizeInBits(); - auto Result = MIRBuilder.buildShl(Ty, LHS, RHS); - auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS) - : MIRBuilder.buildLShr(Ty, Result, RHS); - - MachineInstrBuilder SatVal; - if (IsSigned) { - auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW)); - auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW)); - auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS, - MIRBuilder.buildConstant(Ty, 0)); - SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax); - } else { - SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW)); - } - auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig); - MIRBuilder.buildSelect(Res, Ov, SatVal, Result); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult +LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) { + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Res); + bool IsSigned; + bool IsAdd; + unsigned BaseOp; + switch (MI.getOpcode()) { + default: + llvm_unreachable("unexpected addsat/subsat opcode"); + case TargetOpcode::G_UADDSAT: + IsSigned = false; + IsAdd = true; + BaseOp = TargetOpcode::G_ADD; + break; + case TargetOpcode::G_SADDSAT: + IsSigned = true; + IsAdd = true; + BaseOp = TargetOpcode::G_ADD; + break; + case TargetOpcode::G_USUBSAT: + IsSigned = false; + IsAdd = false; + BaseOp = TargetOpcode::G_SUB; + break; + case TargetOpcode::G_SSUBSAT: + IsSigned = true; + IsAdd = false; + BaseOp = TargetOpcode::G_SUB; + break; + } + + if (IsSigned) { + // sadd.sat(a, b) -> + // hi = 0x7fffffff - smax(a, 0) + // lo = 0x80000000 - smin(a, 0) + // a + smin(smax(lo, b), hi) + // ssub.sat(a, b) -> + // lo = smax(a, -1) - 0x7fffffff + // hi = smin(a, -1) - 0x80000000 + // a - smin(smax(lo, b), hi) + // TODO: AMDGPU can use a "median of 3" instruction here: + // a +/- med3(lo, b, hi) + uint64_t NumBits = Ty.getScalarSizeInBits(); + auto MaxVal = + MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits)); + auto MinVal = + MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); + MachineInstrBuilder Hi, Lo; + if (IsAdd) { + auto Zero = MIRBuilder.buildConstant(Ty, 0); + Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero)); + Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero)); + } else { + auto NegOne = MIRBuilder.buildConstant(Ty, -1); + Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne), + MaxVal); + Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne), + MinVal); + } + auto RHSClamped = + MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi); + MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped}); + } else { + // uadd.sat(a, b) -> a + umin(~a, b) + // usub.sat(a, b) -> a - umin(a, b) + Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS; + auto Min = MIRBuilder.buildUMin(Ty, Not, RHS); + MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min}); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) { + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Res); + LLT BoolTy = Ty.changeElementSize(1); + bool IsSigned; + bool IsAdd; + unsigned OverflowOp; + switch (MI.getOpcode()) { + default: + llvm_unreachable("unexpected addsat/subsat opcode"); + case TargetOpcode::G_UADDSAT: + IsSigned = false; + IsAdd = true; + OverflowOp = TargetOpcode::G_UADDO; + break; + case TargetOpcode::G_SADDSAT: + IsSigned = true; + IsAdd = true; + OverflowOp = TargetOpcode::G_SADDO; + break; + case TargetOpcode::G_USUBSAT: + IsSigned = false; + IsAdd = false; + OverflowOp = TargetOpcode::G_USUBO; + break; + case TargetOpcode::G_SSUBSAT: + IsSigned = true; + IsAdd = false; + OverflowOp = TargetOpcode::G_SSUBO; + break; + } + + auto OverflowRes = + MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS}); + Register Tmp = OverflowRes.getReg(0); + Register Ov = OverflowRes.getReg(1); + MachineInstrBuilder Clamp; + if (IsSigned) { + // sadd.sat(a, b) -> + // {tmp, ov} = saddo(a, b) + // ov ? (tmp >>s 31) + 0x80000000 : r + // ssub.sat(a, b) -> + // {tmp, ov} = ssubo(a, b) + // ov ? (tmp >>s 31) + 0x80000000 : r + uint64_t NumBits = Ty.getScalarSizeInBits(); + auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1); + auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount); + auto MinVal = + MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); + Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal); + } else { + // uadd.sat(a, b) -> + // {tmp, ov} = uaddo(a, b) + // ov ? 0xffffffff : tmp + // usub.sat(a, b) -> + // {tmp, ov} = usubo(a, b) + // ov ? 0 : tmp + Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0); + } + MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerShlSat(MachineInstr &MI) { + assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT || + MI.getOpcode() == TargetOpcode::G_USHLSAT) && + "Expected shlsat opcode!"); + bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT; + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Res); + LLT BoolTy = Ty.changeElementSize(1); + + unsigned BW = Ty.getScalarSizeInBits(); + auto Result = MIRBuilder.buildShl(Ty, LHS, RHS); + auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS) + : MIRBuilder.buildLShr(Ty, Result, RHS); + + MachineInstrBuilder SatVal; + if (IsSigned) { + auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW)); + auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW)); + auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS, + MIRBuilder.buildConstant(Ty, 0)); + SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax); + } else { + SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW)); + } + auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig); + MIRBuilder.buildSelect(Res, Ov, SatVal, Result); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -6199,7 +6199,7 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) { const MDString *RegStr = cast<MDString>( cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0)); - Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF); + Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF); if (!PhysReg.isValid()) return UnableToLegalize; @@ -6211,63 +6211,63 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } - -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) { - bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH; - unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; - Register Result = MI.getOperand(0).getReg(); - LLT OrigTy = MRI.getType(Result); - auto SizeInBits = OrigTy.getScalarSizeInBits(); - LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2); - - auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)}); - auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)}); - auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS); - unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR; - - auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits); - auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt}); - MIRBuilder.buildTrunc(Result, Shifted); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { - // Implement vector G_SELECT in terms of XOR, AND, OR. - Register DstReg = MI.getOperand(0).getReg(); - Register MaskReg = MI.getOperand(1).getReg(); - Register Op1Reg = MI.getOperand(2).getReg(); - Register Op2Reg = MI.getOperand(3).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT MaskTy = MRI.getType(MaskReg); - LLT Op1Ty = MRI.getType(Op1Reg); - if (!DstTy.isVector()) - return UnableToLegalize; - - // Vector selects can have a scalar predicate. If so, splat into a vector and - // finish for later legalization attempts to try again. - if (MaskTy.isScalar()) { - Register MaskElt = MaskReg; - if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) - MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0); - // Generate a vector splat idiom to be pattern matched later. - auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); - Observer.changingInstr(MI); - MI.getOperand(1).setReg(ShufSplat.getReg(0)); - Observer.changedInstr(MI); - return Legalized; - } - - if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) { - return UnableToLegalize; - } - - auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg); - auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg); - auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask); - MIRBuilder.buildOr(DstReg, NewOp1, NewOp2); - MI.eraseFromParent(); - return Legalized; -} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) { + bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH; + unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; + Register Result = MI.getOperand(0).getReg(); + LLT OrigTy = MRI.getType(Result); + auto SizeInBits = OrigTy.getScalarSizeInBits(); + LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2); + + auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)}); + auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)}); + auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS); + unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR; + + auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits); + auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt}); + MIRBuilder.buildTrunc(Result, Shifted); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { + // Implement vector G_SELECT in terms of XOR, AND, OR. + Register DstReg = MI.getOperand(0).getReg(); + Register MaskReg = MI.getOperand(1).getReg(); + Register Op1Reg = MI.getOperand(2).getReg(); + Register Op2Reg = MI.getOperand(3).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT MaskTy = MRI.getType(MaskReg); + LLT Op1Ty = MRI.getType(Op1Reg); + if (!DstTy.isVector()) + return UnableToLegalize; + + // Vector selects can have a scalar predicate. If so, splat into a vector and + // finish for later legalization attempts to try again. + if (MaskTy.isScalar()) { + Register MaskElt = MaskReg; + if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) + MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0); + // Generate a vector splat idiom to be pattern matched later. + auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(ShufSplat.getReg(0)); + Observer.changedInstr(MI); + return Legalized; + } + + if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) { + return UnableToLegalize; + } + + auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg); + auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg); + auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask); + MIRBuilder.buildOr(DstReg, NewOp1, NewOp2); + MI.eraseFromParent(); + return Legalized; +} |