diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/CodeGen/GlobalISel | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/CodeGen/GlobalISel')
21 files changed, 5189 insertions, 5189 deletions
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEInfo.cpp index 2fa208fbfa..24391970d6 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -59,7 +59,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { case TargetOpcode::G_UNMERGE_VALUES: case TargetOpcode::G_TRUNC: case TargetOpcode::G_PTR_ADD: - case TargetOpcode::G_EXTRACT: + case TargetOpcode::G_EXTRACT: return true; } return false; @@ -367,21 +367,21 @@ GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const { return *this; } -const GISelInstProfileBuilder & -GISelInstProfileBuilder::addNodeIDReg(Register Reg) const { - LLT Ty = MRI.getType(Reg); - if (Ty.isValid()) - addNodeIDRegType(Ty); - - if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) { - if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>()) - addNodeIDRegType(RB); - else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>()) - addNodeIDRegType(RC); - } - return *this; -} - +const GISelInstProfileBuilder & +GISelInstProfileBuilder::addNodeIDReg(Register Reg) const { + LLT Ty = MRI.getType(Reg); + if (Ty.isValid()) + addNodeIDRegType(Ty); + + if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) { + if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>()) + addNodeIDRegType(RB); + else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>()) + addNodeIDRegType(RC); + } + return *this; +} + const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand( const MachineOperand &MO) const { if (MO.isReg()) { @@ -389,8 +389,8 @@ const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand( if (!MO.isDef()) addNodeIDRegNum(Reg); - // Profile the register properties. - addNodeIDReg(Reg); + // Profile the register properties. + addNodeIDReg(Reg); assert(!MO.isImplicit() && "Unhandled case"); } else if (MO.isImm()) ID.AddInteger(MO.getImm()); diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 2c86f06a60..b0f8a6610d 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -13,7 +13,7 @@ #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" -#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugInfoMetadata.h" using namespace llvm; @@ -42,14 +42,14 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID, if (MI) { CSEInfo->countOpcodeHit(MI->getOpcode()); auto CurrPos = getInsertPt(); - auto MII = MachineBasicBlock::iterator(MI); - if (MII == CurrPos) { - // Move the insert point ahead of the instruction so any future uses of - // this builder will have the def ready. - setInsertPt(*CurMBB, std::next(MII)); - } else if (!dominates(MI, CurrPos)) { + auto MII = MachineBasicBlock::iterator(MI); + if (MII == CurrPos) { + // Move the insert point ahead of the instruction so any future uses of + // this builder will have the def ready. + setInsertPt(*CurMBB, std::next(MII)); + } else if (!dominates(MI, CurrPos)) { CurMBB->splice(CurrPos, CurMBB, MI); - } + } return MachineInstrBuilder(getMF(), MI); } return MachineInstrBuilder(); @@ -68,11 +68,11 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op, case DstOp::DstType::Ty_RC: B.addNodeIDRegType(Op.getRegClass()); break; - case DstOp::DstType::Ty_Reg: { - // Regs can have LLT&(RB|RC). If those exist, profile them as well. - B.addNodeIDReg(Op.getReg()); - break; - } + case DstOp::DstType::Ty_Reg: { + // Regs can have LLT&(RB|RC). If those exist, profile them as well. + B.addNodeIDReg(Op.getReg()); + break; + } default: B.addNodeIDRegType(Op.getLLTTy(*getMRI())); break; @@ -82,9 +82,9 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op, void CSEMIRBuilder::profileSrcOp(const SrcOp &Op, GISelInstProfileBuilder &B) const { switch (Op.getSrcOpKind()) { - case SrcOp::SrcType::Ty_Imm: - B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm())); - break; + case SrcOp::SrcType::Ty_Imm: + B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm())); + break; case SrcOp::SrcType::Ty_Predicate: B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate())); break; @@ -130,7 +130,7 @@ bool CSEMIRBuilder::checkCopyToDefsPossible(ArrayRef<DstOp> DstOps) { if (DstOps.size() == 1) return true; // always possible to emit copy to just 1 vreg. - return llvm::all_of(DstOps, [](const DstOp &Op) { + return llvm::all_of(DstOps, [](const DstOp &Op) { DstOp::DstType DT = Op.getDstOpKind(); return DT == DstOp::DstType::Ty_LLT || DT == DstOp::DstType::Ty_RC; }); @@ -146,21 +146,21 @@ CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps, if (Op.getDstOpKind() == DstOp::DstType::Ty_Reg) return buildCopy(Op.getReg(), MIB.getReg(0)); } - - // If we didn't generate a copy then we're re-using an existing node directly - // instead of emitting any code. Merge the debug location we wanted to emit - // into the instruction we're CSE'ing with. Debug locations arent part of the - // profile so we don't need to recompute it. - if (getDebugLoc()) { - GISelChangeObserver *Observer = getState().Observer; - if (Observer) - Observer->changingInstr(*MIB); - MIB->setDebugLoc( - DILocation::getMergedLocation(MIB->getDebugLoc(), getDebugLoc())); - if (Observer) - Observer->changedInstr(*MIB); - } - + + // If we didn't generate a copy then we're re-using an existing node directly + // instead of emitting any code. Merge the debug location we wanted to emit + // into the instruction we're CSE'ing with. Debug locations arent part of the + // profile so we don't need to recompute it. + if (getDebugLoc()) { + GISelChangeObserver *Observer = getState().Observer; + if (Observer) + Observer->changingInstr(*MIB); + MIB->setDebugLoc( + DILocation::getMergedLocation(MIB->getDebugLoc(), getDebugLoc())); + if (Observer) + Observer->changedInstr(*MIB); + } + return MIB; } diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CallLowering.cpp index 803e1527a4..ad7c789b2e 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -30,51 +30,51 @@ using namespace llvm; void CallLowering::anchor() {} -/// Helper function which updates \p Flags when \p AttrFn returns true. -static void -addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, - const std::function<bool(Attribute::AttrKind)> &AttrFn) { - if (AttrFn(Attribute::SExt)) - Flags.setSExt(); - if (AttrFn(Attribute::ZExt)) - Flags.setZExt(); - if (AttrFn(Attribute::InReg)) - Flags.setInReg(); - if (AttrFn(Attribute::StructRet)) - Flags.setSRet(); - if (AttrFn(Attribute::Nest)) - Flags.setNest(); - if (AttrFn(Attribute::ByVal)) - Flags.setByVal(); - if (AttrFn(Attribute::Preallocated)) - Flags.setPreallocated(); - if (AttrFn(Attribute::InAlloca)) - Flags.setInAlloca(); - if (AttrFn(Attribute::Returned)) - Flags.setReturned(); - if (AttrFn(Attribute::SwiftSelf)) - Flags.setSwiftSelf(); - if (AttrFn(Attribute::SwiftError)) - Flags.setSwiftError(); -} - -ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call, - unsigned ArgIdx) const { - ISD::ArgFlagsTy Flags; - addFlagsUsingAttrFn(Flags, [&Call, &ArgIdx](Attribute::AttrKind Attr) { - return Call.paramHasAttr(ArgIdx, Attr); - }); - return Flags; -} - -void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags, - const AttributeList &Attrs, - unsigned OpIdx) const { - addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) { - return Attrs.hasAttribute(OpIdx, Attr); - }); -} - +/// Helper function which updates \p Flags when \p AttrFn returns true. +static void +addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, + const std::function<bool(Attribute::AttrKind)> &AttrFn) { + if (AttrFn(Attribute::SExt)) + Flags.setSExt(); + if (AttrFn(Attribute::ZExt)) + Flags.setZExt(); + if (AttrFn(Attribute::InReg)) + Flags.setInReg(); + if (AttrFn(Attribute::StructRet)) + Flags.setSRet(); + if (AttrFn(Attribute::Nest)) + Flags.setNest(); + if (AttrFn(Attribute::ByVal)) + Flags.setByVal(); + if (AttrFn(Attribute::Preallocated)) + Flags.setPreallocated(); + if (AttrFn(Attribute::InAlloca)) + Flags.setInAlloca(); + if (AttrFn(Attribute::Returned)) + Flags.setReturned(); + if (AttrFn(Attribute::SwiftSelf)) + Flags.setSwiftSelf(); + if (AttrFn(Attribute::SwiftError)) + Flags.setSwiftError(); +} + +ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call, + unsigned ArgIdx) const { + ISD::ArgFlagsTy Flags; + addFlagsUsingAttrFn(Flags, [&Call, &ArgIdx](Attribute::AttrKind Attr) { + return Call.paramHasAttr(ArgIdx, Attr); + }); + return Flags; +} + +void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags, + const AttributeList &Attrs, + unsigned OpIdx) const { + addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) { + return Attrs.hasAttribute(OpIdx, Attr); + }); +} + bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, ArrayRef<Register> ResRegs, ArrayRef<ArrayRef<Register>> ArgRegs, @@ -82,45 +82,45 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, std::function<unsigned()> GetCalleeReg) const { CallLoweringInfo Info; const DataLayout &DL = MIRBuilder.getDataLayout(); - MachineFunction &MF = MIRBuilder.getMF(); - bool CanBeTailCalled = CB.isTailCall() && - isInTailCallPosition(CB, MF.getTarget()) && - (MF.getFunction() - .getFnAttribute("disable-tail-calls") - .getValueAsString() != "true"); - - CallingConv::ID CallConv = CB.getCallingConv(); - Type *RetTy = CB.getType(); - bool IsVarArg = CB.getFunctionType()->isVarArg(); - - SmallVector<BaseArgInfo, 4> SplitArgs; - getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL); - Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg); - - if (!Info.CanLowerReturn) { - // Callee requires sret demotion. - insertSRetOutgoingArgument(MIRBuilder, CB, Info); - - // The sret demotion isn't compatible with tail-calls, since the sret - // argument points into the caller's stack frame. - CanBeTailCalled = false; - } - + MachineFunction &MF = MIRBuilder.getMF(); + bool CanBeTailCalled = CB.isTailCall() && + isInTailCallPosition(CB, MF.getTarget()) && + (MF.getFunction() + .getFnAttribute("disable-tail-calls") + .getValueAsString() != "true"); + + CallingConv::ID CallConv = CB.getCallingConv(); + Type *RetTy = CB.getType(); + bool IsVarArg = CB.getFunctionType()->isVarArg(); + + SmallVector<BaseArgInfo, 4> SplitArgs; + getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL); + Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg); + + if (!Info.CanLowerReturn) { + // Callee requires sret demotion. + insertSRetOutgoingArgument(MIRBuilder, CB, Info); + + // The sret demotion isn't compatible with tail-calls, since the sret + // argument points into the caller's stack frame. + CanBeTailCalled = false; + } + // First step is to marshall all the function's parameters into the correct // physregs and memory locations. Gather the sequence of argument types that // we'll pass to the assigner function. unsigned i = 0; unsigned NumFixedArgs = CB.getFunctionType()->getNumParams(); for (auto &Arg : CB.args()) { - ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i), + ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i), i < NumFixedArgs}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB); - - // If we have an explicit sret argument that is an Instruction, (i.e., it - // might point to function-local memory), we can't meaningfully tail-call. - if (OrigArg.Flags[0].isSRet() && isa<Instruction>(&Arg)) - CanBeTailCalled = false; - + + // If we have an explicit sret argument that is an Instruction, (i.e., it + // might point to function-local memory), we can't meaningfully tail-call. + if (OrigArg.Flags[0].isSRet() && isa<Instruction>(&Arg)) + CanBeTailCalled = false; + Info.OrigArgs.push_back(OrigArg); ++i; } @@ -133,16 +133,16 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, else Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false); - Info.OrigRet = ArgInfo{ResRegs, RetTy, ISD::ArgFlagsTy{}}; + Info.OrigRet = ArgInfo{ResRegs, RetTy, ISD::ArgFlagsTy{}}; if (!Info.OrigRet.Ty->isVoidTy()) setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB); Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees); - Info.CallConv = CallConv; + Info.CallConv = CallConv; Info.SwiftErrorVReg = SwiftErrorVReg; Info.IsMustTailCall = CB.isMustTailCall(); - Info.IsTailCall = CanBeTailCalled; - Info.IsVarArg = IsVarArg; + Info.IsTailCall = CanBeTailCalled; + Info.IsVarArg = IsVarArg; return lowerCall(MIRBuilder, Info); } @@ -152,7 +152,7 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, const FuncInfoTy &FuncInfo) const { auto &Flags = Arg.Flags[0]; const AttributeList &Attrs = FuncInfo.getAttributes(); - addArgFlagsFromAttributes(Flags, Attrs, OpIdx); + addArgFlagsFromAttributes(Flags, Attrs, OpIdx); if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType(); @@ -245,97 +245,97 @@ bool CallLowering::handleAssignments(CCState &CCInfo, unsigned NumArgs = Args.size(); for (unsigned i = 0; i != NumArgs; ++i) { EVT CurVT = EVT::getEVT(Args[i].Ty); - if (CurVT.isSimple() && - !Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(), - CCValAssign::Full, Args[i], Args[i].Flags[0], - CCInfo)) - continue; - - MVT NewVT = TLI->getRegisterTypeForCallingConv( - F.getContext(), F.getCallingConv(), EVT(CurVT)); - - // If we need to split the type over multiple regs, check it's a scenario - // we currently support. - unsigned NumParts = TLI->getNumRegistersForCallingConv( - F.getContext(), F.getCallingConv(), CurVT); - - if (NumParts == 1) { - // Try to use the register type if we couldn't assign the VT. - if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], - Args[i].Flags[0], CCInfo)) - return false; - continue; - } - - assert(NumParts > 1); - // For now only handle exact splits. - if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits()) - return false; - - // For incoming arguments (physregs to vregs), we could have values in - // physregs (or memlocs) which we want to extract and copy to vregs. - // During this, we might have to deal with the LLT being split across - // multiple regs, so we have to record this information for later. - // - // If we have outgoing args, then we have the opposite case. We have a - // vreg with an LLT which we want to assign to a physical location, and - // we might have to record that the value has to be split later. - if (Handler.isIncomingArgumentHandler()) { - // We're handling an incoming arg which is split over multiple regs. - // E.g. passing an s128 on AArch64. - ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; - Args[i].OrigRegs.push_back(Args[i].Regs[0]); - Args[i].Regs.clear(); - Args[i].Flags.clear(); - LLT NewLLT = getLLTForMVT(NewVT); - // For each split register, create and assign a vreg that will store - // the incoming component of the larger value. These will later be - // merged to form the final vreg. - for (unsigned Part = 0; Part < NumParts; ++Part) { - Register Reg = - MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT); - ISD::ArgFlagsTy Flags = OrigFlags; - if (Part == 0) { - Flags.setSplit(); + if (CurVT.isSimple() && + !Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(), + CCValAssign::Full, Args[i], Args[i].Flags[0], + CCInfo)) + continue; + + MVT NewVT = TLI->getRegisterTypeForCallingConv( + F.getContext(), F.getCallingConv(), EVT(CurVT)); + + // If we need to split the type over multiple regs, check it's a scenario + // we currently support. + unsigned NumParts = TLI->getNumRegistersForCallingConv( + F.getContext(), F.getCallingConv(), CurVT); + + if (NumParts == 1) { + // Try to use the register type if we couldn't assign the VT. + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[0], CCInfo)) + return false; + continue; + } + + assert(NumParts > 1); + // For now only handle exact splits. + if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits()) + return false; + + // For incoming arguments (physregs to vregs), we could have values in + // physregs (or memlocs) which we want to extract and copy to vregs. + // During this, we might have to deal with the LLT being split across + // multiple regs, so we have to record this information for later. + // + // If we have outgoing args, then we have the opposite case. We have a + // vreg with an LLT which we want to assign to a physical location, and + // we might have to record that the value has to be split later. + if (Handler.isIncomingArgumentHandler()) { + // We're handling an incoming arg which is split over multiple regs. + // E.g. passing an s128 on AArch64. + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + Args[i].OrigRegs.push_back(Args[i].Regs[0]); + Args[i].Regs.clear(); + Args[i].Flags.clear(); + LLT NewLLT = getLLTForMVT(NewVT); + // For each split register, create and assign a vreg that will store + // the incoming component of the larger value. These will later be + // merged to form the final vreg. + for (unsigned Part = 0; Part < NumParts; ++Part) { + Register Reg = + MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT); + ISD::ArgFlagsTy Flags = OrigFlags; + if (Part == 0) { + Flags.setSplit(); } else { - Flags.setOrigAlign(Align(1)); - if (Part == NumParts - 1) - Flags.setSplitEnd(); + Flags.setOrigAlign(Align(1)); + if (Part == NumParts - 1) + Flags.setSplitEnd(); } - Args[i].Regs.push_back(Reg); - Args[i].Flags.push_back(Flags); - if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], - Args[i].Flags[Part], CCInfo)) { - // Still couldn't assign this smaller part type for some reason. - return false; - } - } - } else { - // This type is passed via multiple registers in the calling convention. - // We need to extract the individual parts. - Register LargeReg = Args[i].Regs[0]; - LLT SmallTy = LLT::scalar(NewVT.getSizeInBits()); - auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg); - assert(Unmerge->getNumOperands() == NumParts + 1); - ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; - // We're going to replace the regs and flags with the split ones. - Args[i].Regs.clear(); - Args[i].Flags.clear(); - for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) { - ISD::ArgFlagsTy Flags = OrigFlags; - if (PartIdx == 0) { - Flags.setSplit(); - } else { - Flags.setOrigAlign(Align(1)); - if (PartIdx == NumParts - 1) - Flags.setSplitEnd(); + Args[i].Regs.push_back(Reg); + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[Part], CCInfo)) { + // Still couldn't assign this smaller part type for some reason. + return false; } - Args[i].Regs.push_back(Unmerge.getReg(PartIdx)); - Args[i].Flags.push_back(Flags); - if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, - Args[i], Args[i].Flags[PartIdx], CCInfo)) - return false; } + } else { + // This type is passed via multiple registers in the calling convention. + // We need to extract the individual parts. + Register LargeReg = Args[i].Regs[0]; + LLT SmallTy = LLT::scalar(NewVT.getSizeInBits()); + auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg); + assert(Unmerge->getNumOperands() == NumParts + 1); + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + // We're going to replace the regs and flags with the split ones. + Args[i].Regs.clear(); + Args[i].Flags.clear(); + for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) { + ISD::ArgFlagsTy Flags = OrigFlags; + if (PartIdx == 0) { + Flags.setSplit(); + } else { + Flags.setOrigAlign(Align(1)); + if (PartIdx == NumParts - 1) + Flags.setSplitEnd(); + } + Args[i].Regs.push_back(Unmerge.getReg(PartIdx)); + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, + Args[i], Args[i].Flags[PartIdx], CCInfo)) + return false; + } } } @@ -361,239 +361,239 @@ bool CallLowering::handleAssignments(CCState &CCInfo, EVT VAVT = VA.getValVT(); const LLT OrigTy = getLLTForType(*Args[i].Ty, DL); - // Expected to be multiple regs for a single incoming arg. - // There should be Regs.size() ArgLocs per argument. - unsigned NumArgRegs = Args[i].Regs.size(); - - assert((j + (NumArgRegs - 1)) < ArgLocs.size() && - "Too many regs for number of args"); - for (unsigned Part = 0; Part < NumArgRegs; ++Part) { - // There should be Regs.size() ArgLocs per argument. - VA = ArgLocs[j + Part]; - if (VA.isMemLoc()) { - // Don't currently support loading/storing a type that needs to be split - // to the stack. Should be easy, just not implemented yet. - if (NumArgRegs > 1) { - LLVM_DEBUG( - dbgs() - << "Load/store a split arg to/from the stack not implemented yet\n"); - return false; + // Expected to be multiple regs for a single incoming arg. + // There should be Regs.size() ArgLocs per argument. + unsigned NumArgRegs = Args[i].Regs.size(); + + assert((j + (NumArgRegs - 1)) < ArgLocs.size() && + "Too many regs for number of args"); + for (unsigned Part = 0; Part < NumArgRegs; ++Part) { + // There should be Regs.size() ArgLocs per argument. + VA = ArgLocs[j + Part]; + if (VA.isMemLoc()) { + // Don't currently support loading/storing a type that needs to be split + // to the stack. Should be easy, just not implemented yet. + if (NumArgRegs > 1) { + LLVM_DEBUG( + dbgs() + << "Load/store a split arg to/from the stack not implemented yet\n"); + return false; } - - // FIXME: Use correct address space for pointer size - EVT LocVT = VA.getValVT(); - unsigned MemSize = LocVT == MVT::iPTR ? DL.getPointerSize() - : LocVT.getStoreSize(); - unsigned Offset = VA.getLocMemOffset(); - MachinePointerInfo MPO; - Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO); - Handler.assignValueToAddress(Args[i], StackAddr, - MemSize, MPO, VA); - continue; - } - - assert(VA.isRegLoc() && "custom loc should have been handled already"); - - // GlobalISel does not currently work for scalable vectors. - if (OrigVT.getFixedSizeInBits() >= VAVT.getFixedSizeInBits() || - !Handler.isIncomingArgumentHandler()) { - // This is an argument that might have been split. There should be - // Regs.size() ArgLocs per argument. - - // Insert the argument copies. If VAVT < OrigVT, we'll insert the merge - // to the original register after handling all of the parts. - Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); - continue; - } - - // This ArgLoc covers multiple pieces, so we need to split it. - const LLT VATy(VAVT.getSimpleVT()); - Register NewReg = - MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); - Handler.assignValueToReg(NewReg, VA.getLocReg(), VA); - // If it's a vector type, we either need to truncate the elements - // or do an unmerge to get the lower block of elements. - if (VATy.isVector() && - VATy.getNumElements() > OrigVT.getVectorNumElements()) { - // Just handle the case where the VA type is 2 * original type. - if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) { - LLVM_DEBUG(dbgs() - << "Incoming promoted vector arg has too many elts"); - return false; + + // FIXME: Use correct address space for pointer size + EVT LocVT = VA.getValVT(); + unsigned MemSize = LocVT == MVT::iPTR ? DL.getPointerSize() + : LocVT.getStoreSize(); + unsigned Offset = VA.getLocMemOffset(); + MachinePointerInfo MPO; + Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO); + Handler.assignValueToAddress(Args[i], StackAddr, + MemSize, MPO, VA); + continue; + } + + assert(VA.isRegLoc() && "custom loc should have been handled already"); + + // GlobalISel does not currently work for scalable vectors. + if (OrigVT.getFixedSizeInBits() >= VAVT.getFixedSizeInBits() || + !Handler.isIncomingArgumentHandler()) { + // This is an argument that might have been split. There should be + // Regs.size() ArgLocs per argument. + + // Insert the argument copies. If VAVT < OrigVT, we'll insert the merge + // to the original register after handling all of the parts. + Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + continue; + } + + // This ArgLoc covers multiple pieces, so we need to split it. + const LLT VATy(VAVT.getSimpleVT()); + Register NewReg = + MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); + Handler.assignValueToReg(NewReg, VA.getLocReg(), VA); + // If it's a vector type, we either need to truncate the elements + // or do an unmerge to get the lower block of elements. + if (VATy.isVector() && + VATy.getNumElements() > OrigVT.getVectorNumElements()) { + // Just handle the case where the VA type is 2 * original type. + if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) { + LLVM_DEBUG(dbgs() + << "Incoming promoted vector arg has too many elts"); + return false; } - auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg}); - MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0)); + auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg}); + MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0)); } else { - MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); + MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); } - } - - // Now that all pieces have been handled, re-pack any arguments into any - // wider, original registers. - if (Handler.isIncomingArgumentHandler()) { - if (VAVT.getFixedSizeInBits() < OrigVT.getFixedSizeInBits()) { - assert(NumArgRegs >= 2); - - // Merge the split registers into the expected larger result vreg - // of the original call. - MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs); + } + + // Now that all pieces have been handled, re-pack any arguments into any + // wider, original registers. + if (Handler.isIncomingArgumentHandler()) { + if (VAVT.getFixedSizeInBits() < OrigVT.getFixedSizeInBits()) { + assert(NumArgRegs >= 2); + + // Merge the split registers into the expected larger result vreg + // of the original call. + MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs); } - } - - j += NumArgRegs - 1; - } - - return true; -} - -void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, - ArrayRef<Register> VRegs, Register DemoteReg, - int FI) const { - MachineFunction &MF = MIRBuilder.getMF(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const DataLayout &DL = MF.getDataLayout(); - - SmallVector<EVT, 4> SplitVTs; - SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); - - assert(VRegs.size() == SplitVTs.size()); - - unsigned NumValues = SplitVTs.size(); - Align BaseAlign = DL.getPrefTypeAlign(RetTy); - Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace()); - LLT OffsetLLTy = getLLTForType(*DL.getIntPtrType(RetPtrTy), DL); - - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); - - for (unsigned I = 0; I < NumValues; ++I) { - Register Addr; - MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); - auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, - MRI.getType(VRegs[I]).getSizeInBytes(), - commonAlignment(BaseAlign, Offsets[I])); - MIRBuilder.buildLoad(VRegs[I], Addr, *MMO); - } -} - -void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, - ArrayRef<Register> VRegs, - Register DemoteReg) const { - MachineFunction &MF = MIRBuilder.getMF(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const DataLayout &DL = MF.getDataLayout(); - - SmallVector<EVT, 4> SplitVTs; - SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); - - assert(VRegs.size() == SplitVTs.size()); - - unsigned NumValues = SplitVTs.size(); - Align BaseAlign = DL.getPrefTypeAlign(RetTy); - unsigned AS = DL.getAllocaAddrSpace(); - LLT OffsetLLTy = - getLLTForType(*DL.getIntPtrType(RetTy->getPointerTo(AS)), DL); - - MachinePointerInfo PtrInfo(AS); - - for (unsigned I = 0; I < NumValues; ++I) { - Register Addr; - MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); - auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - MRI.getType(VRegs[I]).getSizeInBytes(), - commonAlignment(BaseAlign, Offsets[I])); - MIRBuilder.buildStore(VRegs[I], Addr, *MMO); - } -} - -void CallLowering::insertSRetIncomingArgument( - const Function &F, SmallVectorImpl<ArgInfo> &SplitArgs, Register &DemoteReg, - MachineRegisterInfo &MRI, const DataLayout &DL) const { - unsigned AS = DL.getAllocaAddrSpace(); - DemoteReg = MRI.createGenericVirtualRegister( - LLT::pointer(AS, DL.getPointerSizeInBits(AS))); - - Type *PtrTy = PointerType::get(F.getReturnType(), AS); - - SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(*TLI, DL, PtrTy, ValueVTs); - - // NOTE: Assume that a pointer won't get split into more than one VT. - assert(ValueVTs.size() == 1); - - ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext())); - setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, F); - DemoteArg.Flags[0].setSRet(); - SplitArgs.insert(SplitArgs.begin(), DemoteArg); -} - -void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder, - const CallBase &CB, - CallLoweringInfo &Info) const { - const DataLayout &DL = MIRBuilder.getDataLayout(); - Type *RetTy = CB.getType(); - unsigned AS = DL.getAllocaAddrSpace(); - LLT FramePtrTy = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); - - int FI = MIRBuilder.getMF().getFrameInfo().CreateStackObject( - DL.getTypeAllocSize(RetTy), DL.getPrefTypeAlign(RetTy), false); - - Register DemoteReg = MIRBuilder.buildFrameIndex(FramePtrTy, FI).getReg(0); - ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS)); - setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, CB); - DemoteArg.Flags[0].setSRet(); - - Info.OrigArgs.insert(Info.OrigArgs.begin(), DemoteArg); - Info.DemoteStackIndex = FI; - Info.DemoteRegister = DemoteReg; -} - -bool CallLowering::checkReturn(CCState &CCInfo, - SmallVectorImpl<BaseArgInfo> &Outs, - CCAssignFn *Fn) const { - for (unsigned I = 0, E = Outs.size(); I < E; ++I) { - MVT VT = MVT::getVT(Outs[I].Ty); - if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo)) + } + + j += NumArgRegs - 1; + } + + return true; +} + +void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, + ArrayRef<Register> VRegs, Register DemoteReg, + int FI) const { + MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const DataLayout &DL = MF.getDataLayout(); + + SmallVector<EVT, 4> SplitVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); + + assert(VRegs.size() == SplitVTs.size()); + + unsigned NumValues = SplitVTs.size(); + Align BaseAlign = DL.getPrefTypeAlign(RetTy); + Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace()); + LLT OffsetLLTy = getLLTForType(*DL.getIntPtrType(RetPtrTy), DL); + + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); + + for (unsigned I = 0; I < NumValues; ++I) { + Register Addr; + MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); + auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MRI.getType(VRegs[I]).getSizeInBytes(), + commonAlignment(BaseAlign, Offsets[I])); + MIRBuilder.buildLoad(VRegs[I], Addr, *MMO); + } +} + +void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, + ArrayRef<Register> VRegs, + Register DemoteReg) const { + MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const DataLayout &DL = MF.getDataLayout(); + + SmallVector<EVT, 4> SplitVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); + + assert(VRegs.size() == SplitVTs.size()); + + unsigned NumValues = SplitVTs.size(); + Align BaseAlign = DL.getPrefTypeAlign(RetTy); + unsigned AS = DL.getAllocaAddrSpace(); + LLT OffsetLLTy = + getLLTForType(*DL.getIntPtrType(RetTy->getPointerTo(AS)), DL); + + MachinePointerInfo PtrInfo(AS); + + for (unsigned I = 0; I < NumValues; ++I) { + Register Addr; + MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); + auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + MRI.getType(VRegs[I]).getSizeInBytes(), + commonAlignment(BaseAlign, Offsets[I])); + MIRBuilder.buildStore(VRegs[I], Addr, *MMO); + } +} + +void CallLowering::insertSRetIncomingArgument( + const Function &F, SmallVectorImpl<ArgInfo> &SplitArgs, Register &DemoteReg, + MachineRegisterInfo &MRI, const DataLayout &DL) const { + unsigned AS = DL.getAllocaAddrSpace(); + DemoteReg = MRI.createGenericVirtualRegister( + LLT::pointer(AS, DL.getPointerSizeInBits(AS))); + + Type *PtrTy = PointerType::get(F.getReturnType(), AS); + + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(*TLI, DL, PtrTy, ValueVTs); + + // NOTE: Assume that a pointer won't get split into more than one VT. + assert(ValueVTs.size() == 1); + + ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext())); + setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, F); + DemoteArg.Flags[0].setSRet(); + SplitArgs.insert(SplitArgs.begin(), DemoteArg); +} + +void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder, + const CallBase &CB, + CallLoweringInfo &Info) const { + const DataLayout &DL = MIRBuilder.getDataLayout(); + Type *RetTy = CB.getType(); + unsigned AS = DL.getAllocaAddrSpace(); + LLT FramePtrTy = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); + + int FI = MIRBuilder.getMF().getFrameInfo().CreateStackObject( + DL.getTypeAllocSize(RetTy), DL.getPrefTypeAlign(RetTy), false); + + Register DemoteReg = MIRBuilder.buildFrameIndex(FramePtrTy, FI).getReg(0); + ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS)); + setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, CB); + DemoteArg.Flags[0].setSRet(); + + Info.OrigArgs.insert(Info.OrigArgs.begin(), DemoteArg); + Info.DemoteStackIndex = FI; + Info.DemoteRegister = DemoteReg; +} + +bool CallLowering::checkReturn(CCState &CCInfo, + SmallVectorImpl<BaseArgInfo> &Outs, + CCAssignFn *Fn) const { + for (unsigned I = 0, E = Outs.size(); I < E; ++I) { + MVT VT = MVT::getVT(Outs[I].Ty); + if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo)) return false; } return true; } -void CallLowering::getReturnInfo(CallingConv::ID CallConv, Type *RetTy, - AttributeList Attrs, - SmallVectorImpl<BaseArgInfo> &Outs, - const DataLayout &DL) const { - LLVMContext &Context = RetTy->getContext(); - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - - SmallVector<EVT, 4> SplitVTs; - ComputeValueVTs(*TLI, DL, RetTy, SplitVTs); - addArgFlagsFromAttributes(Flags, Attrs, AttributeList::ReturnIndex); - - for (EVT VT : SplitVTs) { - unsigned NumParts = - TLI->getNumRegistersForCallingConv(Context, CallConv, VT); - MVT RegVT = TLI->getRegisterTypeForCallingConv(Context, CallConv, VT); - Type *PartTy = EVT(RegVT).getTypeForEVT(Context); - - for (unsigned I = 0; I < NumParts; ++I) { - Outs.emplace_back(PartTy, Flags); - } - } -} - -bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const { - const auto &F = MF.getFunction(); - Type *ReturnType = F.getReturnType(); - CallingConv::ID CallConv = F.getCallingConv(); - - SmallVector<BaseArgInfo, 4> SplitArgs; - getReturnInfo(CallConv, ReturnType, F.getAttributes(), SplitArgs, - MF.getDataLayout()); - return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg()); -} - +void CallLowering::getReturnInfo(CallingConv::ID CallConv, Type *RetTy, + AttributeList Attrs, + SmallVectorImpl<BaseArgInfo> &Outs, + const DataLayout &DL) const { + LLVMContext &Context = RetTy->getContext(); + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + + SmallVector<EVT, 4> SplitVTs; + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs); + addArgFlagsFromAttributes(Flags, Attrs, AttributeList::ReturnIndex); + + for (EVT VT : SplitVTs) { + unsigned NumParts = + TLI->getNumRegistersForCallingConv(Context, CallConv, VT); + MVT RegVT = TLI->getRegisterTypeForCallingConv(Context, CallConv, VT); + Type *PartTy = EVT(RegVT).getTypeForEVT(Context); + + for (unsigned I = 0; I < NumParts; ++I) { + Outs.emplace_back(PartTy, Flags); + } + } +} + +bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const { + const auto &F = MF.getFunction(); + Type *ReturnType = F.getReturnType(); + CallingConv::ID CallConv = F.getCallingConv(); + + SmallVector<BaseArgInfo, 4> SplitArgs; + getReturnInfo(CallConv, ReturnType, F.getAttributes(), SplitArgs, + MF.getDataLayout()); + return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg()); +} + bool CallLowering::analyzeArgInfo(CCState &CCState, SmallVectorImpl<ArgInfo> &Args, CCAssignFn &AssignFnFixed, @@ -611,58 +611,58 @@ bool CallLowering::analyzeArgInfo(CCState &CCState, return true; } -bool CallLowering::parametersInCSRMatch( - const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, - const SmallVectorImpl<CCValAssign> &OutLocs, - const SmallVectorImpl<ArgInfo> &OutArgs) const { - for (unsigned i = 0; i < OutLocs.size(); ++i) { - auto &ArgLoc = OutLocs[i]; - // If it's not a register, it's fine. - if (!ArgLoc.isRegLoc()) - continue; - - MCRegister PhysReg = ArgLoc.getLocReg(); - - // Only look at callee-saved registers. - if (MachineOperand::clobbersPhysReg(CallerPreservedMask, PhysReg)) - continue; - - LLVM_DEBUG( - dbgs() - << "... Call has an argument passed in a callee-saved register.\n"); - - // Check if it was copied from. - const ArgInfo &OutInfo = OutArgs[i]; - - if (OutInfo.Regs.size() > 1) { - LLVM_DEBUG( - dbgs() << "... Cannot handle arguments in multiple registers.\n"); - return false; - } - - // Check if we copy the register, walking through copies from virtual - // registers. Note that getDefIgnoringCopies does not ignore copies from - // physical registers. - MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI); - if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) { - LLVM_DEBUG( - dbgs() - << "... Parameter was not copied into a VReg, cannot tail call.\n"); - return false; - } - - // Got a copy. Verify that it's the same as the register we want. - Register CopyRHS = RegDef->getOperand(1).getReg(); - if (CopyRHS != PhysReg) { - LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into " - "VReg, cannot tail call.\n"); - return false; - } - } - - return true; -} - +bool CallLowering::parametersInCSRMatch( + const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, + const SmallVectorImpl<CCValAssign> &OutLocs, + const SmallVectorImpl<ArgInfo> &OutArgs) const { + for (unsigned i = 0; i < OutLocs.size(); ++i) { + auto &ArgLoc = OutLocs[i]; + // If it's not a register, it's fine. + if (!ArgLoc.isRegLoc()) + continue; + + MCRegister PhysReg = ArgLoc.getLocReg(); + + // Only look at callee-saved registers. + if (MachineOperand::clobbersPhysReg(CallerPreservedMask, PhysReg)) + continue; + + LLVM_DEBUG( + dbgs() + << "... Call has an argument passed in a callee-saved register.\n"); + + // Check if it was copied from. + const ArgInfo &OutInfo = OutArgs[i]; + + if (OutInfo.Regs.size() > 1) { + LLVM_DEBUG( + dbgs() << "... Cannot handle arguments in multiple registers.\n"); + return false; + } + + // Check if we copy the register, walking through copies from virtual + // registers. Note that getDefIgnoringCopies does not ignore copies from + // physical registers. + MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI); + if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) { + LLVM_DEBUG( + dbgs() + << "... Parameter was not copied into a VReg, cannot tail call.\n"); + return false; + } + + // Got a copy. Verify that it's the same as the register we want. + Register CopyRHS = RegDef->getOperand(1).getReg(); + if (CopyRHS != PhysReg) { + LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into " + "VReg, cannot tail call.\n"); + return false; + } + } + + return true; +} + bool CallLowering::resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl<ArgInfo> &InArgs, diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Combiner.cpp index f1071d96e5..86480b47e9 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Combiner.cpp @@ -153,8 +153,8 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, MFChanged |= Changed; } while (Changed); - assert(!CSEInfo || (!errorToBool(CSEInfo->verify()) && - "CSEInfo is not consistent. Likely missing calls to " - "observer on mutations")); + assert(!CSEInfo || (!errorToBool(CSEInfo->verify()) && + "CSEInfo is not consistent. Likely missing calls to " + "observer on mutations")); return MFChanged; } diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CombinerHelper.cpp index a9353bdfb7..8ea55b6abd 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -16,7 +16,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -44,75 +44,75 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, (void)this->KB; } -const TargetLowering &CombinerHelper::getTargetLowering() const { - return *Builder.getMF().getSubtarget().getTargetLowering(); -} - -/// \returns The little endian in-memory byte position of byte \p I in a -/// \p ByteWidth bytes wide type. -/// -/// E.g. Given a 4-byte type x, x[0] -> byte 0 -static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) { - assert(I < ByteWidth && "I must be in [0, ByteWidth)"); - return I; -} - -/// \returns The big endian in-memory byte position of byte \p I in a -/// \p ByteWidth bytes wide type. -/// -/// E.g. Given a 4-byte type x, x[0] -> byte 3 -static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) { - assert(I < ByteWidth && "I must be in [0, ByteWidth)"); - return ByteWidth - I - 1; -} - -/// Given a map from byte offsets in memory to indices in a load/store, -/// determine if that map corresponds to a little or big endian byte pattern. -/// -/// \param MemOffset2Idx maps memory offsets to address offsets. -/// \param LowestIdx is the lowest index in \p MemOffset2Idx. -/// -/// \returns true if the map corresponds to a big endian byte pattern, false -/// if it corresponds to a little endian byte pattern, and None otherwise. -/// -/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns -/// are as follows: -/// -/// AddrOffset Little endian Big endian -/// 0 0 3 -/// 1 1 2 -/// 2 2 1 -/// 3 3 0 -static Optional<bool> -isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, - int64_t LowestIdx) { - // Need at least two byte positions to decide on endianness. - unsigned Width = MemOffset2Idx.size(); - if (Width < 2) - return None; - bool BigEndian = true, LittleEndian = true; - for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) { - auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset); - if (MemOffsetAndIdx == MemOffset2Idx.end()) - return None; - const int64_t Idx = MemOffsetAndIdx->second - LowestIdx; - assert(Idx >= 0 && "Expected non-negative byte offset?"); - LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset); - BigEndian &= Idx == bigEndianByteAt(Width, MemOffset); - if (!BigEndian && !LittleEndian) - return None; - } - - assert((BigEndian != LittleEndian) && - "Pattern cannot be both big and little endian!"); - return BigEndian; -} - -bool CombinerHelper::isLegalOrBeforeLegalizer( - const LegalityQuery &Query) const { - return !LI || LI->getAction(Query).Action == LegalizeActions::Legal; -} - +const TargetLowering &CombinerHelper::getTargetLowering() const { + return *Builder.getMF().getSubtarget().getTargetLowering(); +} + +/// \returns The little endian in-memory byte position of byte \p I in a +/// \p ByteWidth bytes wide type. +/// +/// E.g. Given a 4-byte type x, x[0] -> byte 0 +static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) { + assert(I < ByteWidth && "I must be in [0, ByteWidth)"); + return I; +} + +/// \returns The big endian in-memory byte position of byte \p I in a +/// \p ByteWidth bytes wide type. +/// +/// E.g. Given a 4-byte type x, x[0] -> byte 3 +static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) { + assert(I < ByteWidth && "I must be in [0, ByteWidth)"); + return ByteWidth - I - 1; +} + +/// Given a map from byte offsets in memory to indices in a load/store, +/// determine if that map corresponds to a little or big endian byte pattern. +/// +/// \param MemOffset2Idx maps memory offsets to address offsets. +/// \param LowestIdx is the lowest index in \p MemOffset2Idx. +/// +/// \returns true if the map corresponds to a big endian byte pattern, false +/// if it corresponds to a little endian byte pattern, and None otherwise. +/// +/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns +/// are as follows: +/// +/// AddrOffset Little endian Big endian +/// 0 0 3 +/// 1 1 2 +/// 2 2 1 +/// 3 3 0 +static Optional<bool> +isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, + int64_t LowestIdx) { + // Need at least two byte positions to decide on endianness. + unsigned Width = MemOffset2Idx.size(); + if (Width < 2) + return None; + bool BigEndian = true, LittleEndian = true; + for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) { + auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset); + if (MemOffsetAndIdx == MemOffset2Idx.end()) + return None; + const int64_t Idx = MemOffsetAndIdx->second - LowestIdx; + assert(Idx >= 0 && "Expected non-negative byte offset?"); + LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset); + BigEndian &= Idx == bigEndianByteAt(Width, MemOffset); + if (!BigEndian && !LittleEndian) + return None; + } + + assert((BigEndian != LittleEndian) && + "Pattern cannot be both big and little endian!"); + return BigEndian; +} + +bool CombinerHelper::isLegalOrBeforeLegalizer( + const LegalityQuery &Query) const { + return !LI || LI->getAction(Query).Action == LegalizeActions::Legal; +} + void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const { Observer.changingAllUsesOfReg(MRI, FromReg); @@ -624,13 +624,13 @@ bool CombinerHelper::isPredecessor(const MachineInstr &DefMI, assert(DefMI.getParent() == UseMI.getParent()); if (&DefMI == &UseMI) return false; - const MachineBasicBlock &MBB = *DefMI.getParent(); - auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) { - return &MI == &DefMI || &MI == &UseMI; - }); - if (DefOrUse == MBB.end()) - llvm_unreachable("Block must contain both DefMI and UseMI!"); - return &*DefOrUse == &DefMI; + const MachineBasicBlock &MBB = *DefMI.getParent(); + auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) { + return &MI == &DefMI || &MI == &UseMI; + }); + if (DefOrUse == MBB.end()) + llvm_unreachable("Block must contain both DefMI and UseMI!"); + return &*DefOrUse == &DefMI; } bool CombinerHelper::dominates(const MachineInstr &DefMI, @@ -645,101 +645,101 @@ bool CombinerHelper::dominates(const MachineInstr &DefMI, return isPredecessor(DefMI, UseMI); } -bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); - Register SrcReg = MI.getOperand(1).getReg(); - Register LoadUser = SrcReg; - - if (MRI.getType(SrcReg).isVector()) - return false; - - Register TruncSrc; - if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) - LoadUser = TruncSrc; - - uint64_t SizeInBits = MI.getOperand(2).getImm(); - // If the source is a G_SEXTLOAD from the same bit width, then we don't - // need any extend at all, just a truncate. - if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) { - const auto &MMO = **LoadMI->memoperands_begin(); - // If truncating more than the original extended value, abort. - if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits()) - return false; - if (MMO.getSizeInBits() == SizeInBits) - return true; - } - return false; -} - -bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); - Builder.setInstrAndDebugLoc(MI); - Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::matchSextInRegOfLoad( - MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { +bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); - - // Only supports scalars for now. - if (MRI.getType(MI.getOperand(0).getReg()).isVector()) - return false; - Register SrcReg = MI.getOperand(1).getReg(); - MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); - if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg())) - return false; - - // If the sign extend extends from a narrower width than the load's width, - // then we can narrow the load width when we combine to a G_SEXTLOAD. - auto &MMO = **LoadDef->memoperands_begin(); - // Don't do this for non-simple loads. - if (MMO.isAtomic() || MMO.isVolatile()) - return false; - - // Avoid widening the load at all. - unsigned NewSizeBits = - std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits()); - - // Don't generate G_SEXTLOADs with a < 1 byte width. - if (NewSizeBits < 8) - return false; - // Don't bother creating a non-power-2 sextload, it will likely be broken up - // anyway for most targets. - if (!isPowerOf2_32(NewSizeBits)) - return false; - MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits); - return true; -} - -bool CombinerHelper::applySextInRegOfLoad( - MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { + Register LoadUser = SrcReg; + + if (MRI.getType(SrcReg).isVector()) + return false; + + Register TruncSrc; + if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) + LoadUser = TruncSrc; + + uint64_t SizeInBits = MI.getOperand(2).getImm(); + // If the source is a G_SEXTLOAD from the same bit width, then we don't + // need any extend at all, just a truncate. + if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) { + const auto &MMO = **LoadMI->memoperands_begin(); + // If truncating more than the original extended value, abort. + if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits()) + return false; + if (MMO.getSizeInBits() == SizeInBits) + return true; + } + return false; +} + +bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); - Register LoadReg; - unsigned ScalarSizeBits; - std::tie(LoadReg, ScalarSizeBits) = MatchInfo; - auto *LoadDef = MRI.getVRegDef(LoadReg); - assert(LoadDef && "Expected a load reg"); - - // If we have the following: - // %ld = G_LOAD %ptr, (load 2) - // %ext = G_SEXT_INREG %ld, 8 - // ==> - // %ld = G_SEXTLOAD %ptr (load 1) - - auto &MMO = **LoadDef->memoperands_begin(); - Builder.setInstrAndDebugLoc(MI); - auto &MF = Builder.getMF(); - auto PtrInfo = MMO.getPointerInfo(); - auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8); - Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), - LoadDef->getOperand(1).getReg(), *NewMMO); + Builder.setInstrAndDebugLoc(MI); + Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); MI.eraseFromParent(); return true; } +bool CombinerHelper::matchSextInRegOfLoad( + MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + + // Only supports scalars for now. + if (MRI.getType(MI.getOperand(0).getReg()).isVector()) + return false; + + Register SrcReg = MI.getOperand(1).getReg(); + MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); + if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg())) + return false; + + // If the sign extend extends from a narrower width than the load's width, + // then we can narrow the load width when we combine to a G_SEXTLOAD. + auto &MMO = **LoadDef->memoperands_begin(); + // Don't do this for non-simple loads. + if (MMO.isAtomic() || MMO.isVolatile()) + return false; + + // Avoid widening the load at all. + unsigned NewSizeBits = + std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits()); + + // Don't generate G_SEXTLOADs with a < 1 byte width. + if (NewSizeBits < 8) + return false; + // Don't bother creating a non-power-2 sextload, it will likely be broken up + // anyway for most targets. + if (!isPowerOf2_32(NewSizeBits)) + return false; + MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits); + return true; +} + +bool CombinerHelper::applySextInRegOfLoad( + MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + Register LoadReg; + unsigned ScalarSizeBits; + std::tie(LoadReg, ScalarSizeBits) = MatchInfo; + auto *LoadDef = MRI.getVRegDef(LoadReg); + assert(LoadDef && "Expected a load reg"); + + // If we have the following: + // %ld = G_LOAD %ptr, (load 2) + // %ext = G_SEXT_INREG %ld, 8 + // ==> + // %ld = G_SEXTLOAD %ptr (load 1) + + auto &MMO = **LoadDef->memoperands_begin(); + Builder.setInstrAndDebugLoc(MI); + auto &MF = Builder.getMF(); + auto PtrInfo = MMO.getPointerInfo(); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8); + Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), + LoadDef->getOperand(1).getReg(), *NewMMO); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, Register &Offset) { auto &MF = *MI.getParent()->getParent(); @@ -757,7 +757,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, return false; LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); - // FIXME: The following use traversal needs a bail out for patholigical cases. + // FIXME: The following use traversal needs a bail out for patholigical cases. for (auto &Use : MRI.use_nodbg_instructions(Base)) { if (Use.getOpcode() != TargetOpcode::G_PTR_ADD) continue; @@ -884,11 +884,11 @@ bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadS Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) return false; - // For now, no targets actually support these opcodes so don't waste time - // running these unless we're forced to for testing. - if (!ForceLegalIndexing) - return false; - + // For now, no targets actually support these opcodes so don't waste time + // running these unless we're forced to for testing. + if (!ForceLegalIndexing) + return false; + MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, MatchInfo.Offset); if (!MatchInfo.IsPre && @@ -941,7 +941,7 @@ void CombinerHelper::applyCombineIndexedLoadStore( LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); } -bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { +bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::G_BR) return false; @@ -956,7 +956,7 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { // The above pattern does not have a fall through to the successor bb2, always // resulting in a branch no matter which path is taken. Here we try to find // and replace that pattern with conditional branch to bb3 and otherwise - // fallthrough to bb2. This is generally better for branch predictors. + // fallthrough to bb2. This is generally better for branch predictors. MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock::iterator BrIt(MI); @@ -968,36 +968,36 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) return false; - // Check that the next block is the conditional branch target. Also make sure - // that it isn't the same as the G_BR's target (otherwise, this will loop.) - MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB(); - return BrCondTarget != MI.getOperand(0).getMBB() && - MBB->isLayoutSuccessor(BrCondTarget); + // Check that the next block is the conditional branch target. Also make sure + // that it isn't the same as the G_BR's target (otherwise, this will loop.) + MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB(); + return BrCondTarget != MI.getOperand(0).getMBB() && + MBB->isLayoutSuccessor(BrCondTarget); } -void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) { +void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) { MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); MachineBasicBlock::iterator BrIt(MI); MachineInstr *BrCond = &*std::prev(BrIt); - Builder.setInstrAndDebugLoc(*BrCond); - LLT Ty = MRI.getType(BrCond->getOperand(0).getReg()); - // FIXME: Does int/fp matter for this? If so, we might need to restrict - // this to i1 only since we might not know for sure what kind of - // compare generated the condition value. - auto True = Builder.buildConstant( - Ty, getICmpTrueVal(getTargetLowering(), false, false)); - auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True); - - auto *FallthroughBB = BrCond->getOperand(1).getMBB(); - Observer.changingInstr(MI); - MI.getOperand(0).setMBB(FallthroughBB); - Observer.changedInstr(MI); - - // Change the conditional branch to use the inverted condition and - // new target block. + Builder.setInstrAndDebugLoc(*BrCond); + LLT Ty = MRI.getType(BrCond->getOperand(0).getReg()); + // FIXME: Does int/fp matter for this? If so, we might need to restrict + // this to i1 only since we might not know for sure what kind of + // compare generated the condition value. + auto True = Builder.buildConstant( + Ty, getICmpTrueVal(getTargetLowering(), false, false)); + auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True); + + auto *FallthroughBB = BrCond->getOperand(1).getMBB(); + Observer.changingInstr(MI); + MI.getOperand(0).setMBB(FallthroughBB); + Observer.changedInstr(MI); + + // Change the conditional branch to use the inverted condition and + // new target block. Observer.changingInstr(*BrCond); - BrCond->getOperand(0).setReg(Xor.getReg(0)); + BrCond->getOperand(0).setReg(Xor.getReg(0)); BrCond->getOperand(1).setMBB(BrTarget); Observer.changedInstr(*BrCond); } @@ -1090,7 +1090,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { unsigned NumBits = Ty.getScalarSizeInBits(); auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); if (!Ty.isVector() && ValVRegAndVal) { - APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); + APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); APInt SplatVal = APInt::getSplat(NumBits, Scalar); return MIB.buildConstant(Ty, SplatVal).getReg(0); } @@ -1442,11 +1442,11 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, } bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { - const unsigned Opc = MI.getOpcode(); + const unsigned Opc = MI.getOpcode(); // This combine is fairly complex so it's not written with a separate // matcher function. - assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE || - Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction"); + assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE || + Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction"); auto MMOIt = MI.memoperands_begin(); const MachineMemOperand *MemOp = *MMOIt; @@ -1457,11 +1457,11 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { Align DstAlign = MemOp->getBaseAlign(); Align SrcAlign; - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - Register Len = MI.getOperand(2).getReg(); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register Len = MI.getOperand(2).getReg(); - if (Opc != TargetOpcode::G_MEMSET) { + if (Opc != TargetOpcode::G_MEMSET) { assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); MemOp = *(++MMOIt); SrcAlign = MemOp->getBaseAlign(); @@ -1471,7 +1471,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); if (!LenVRegAndVal) return false; // Leave it to the legalizer to lower it to a libcall. - unsigned KnownLen = LenVRegAndVal->Value.getZExtValue(); + unsigned KnownLen = LenVRegAndVal->Value.getZExtValue(); if (KnownLen == 0) { MI.eraseFromParent(); @@ -1481,78 +1481,78 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { if (MaxLen && KnownLen > MaxLen) return false; - if (Opc == TargetOpcode::G_MEMCPY) + if (Opc == TargetOpcode::G_MEMCPY) return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); - if (Opc == TargetOpcode::G_MEMMOVE) + if (Opc == TargetOpcode::G_MEMMOVE) return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); - if (Opc == TargetOpcode::G_MEMSET) + if (Opc == TargetOpcode::G_MEMSET) return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); return false; } -static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy, - const Register Op, - const MachineRegisterInfo &MRI) { - const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI); - if (!MaybeCst) - return None; - - APFloat V = MaybeCst->getValueAPF(); - switch (Opcode) { - default: - llvm_unreachable("Unexpected opcode!"); - case TargetOpcode::G_FNEG: { - V.changeSign(); - return V; - } - case TargetOpcode::G_FABS: { - V.clearSign(); - return V; - } - case TargetOpcode::G_FPTRUNC: - break; - case TargetOpcode::G_FSQRT: { - bool Unused; - V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); - V = APFloat(sqrt(V.convertToDouble())); - break; - } - case TargetOpcode::G_FLOG2: { - bool Unused; - V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); - V = APFloat(log2(V.convertToDouble())); - break; - } - } - // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise, - // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`, - // and `G_FLOG2` reach here. - bool Unused; - V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused); - return V; -} - -bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, - Optional<APFloat> &Cst) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI); - return Cst.hasValue(); -} - -bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, - Optional<APFloat> &Cst) { - assert(Cst.hasValue() && "Optional is unexpectedly empty!"); - Builder.setInstrAndDebugLoc(MI); - MachineFunction &MF = Builder.getMF(); - auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst); - Register DstReg = MI.getOperand(0).getReg(); - Builder.buildFConstant(DstReg, *FPVal); - MI.eraseFromParent(); - return true; -} - +static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy, + const Register Op, + const MachineRegisterInfo &MRI) { + const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI); + if (!MaybeCst) + return None; + + APFloat V = MaybeCst->getValueAPF(); + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode!"); + case TargetOpcode::G_FNEG: { + V.changeSign(); + return V; + } + case TargetOpcode::G_FABS: { + V.clearSign(); + return V; + } + case TargetOpcode::G_FPTRUNC: + break; + case TargetOpcode::G_FSQRT: { + bool Unused; + V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); + V = APFloat(sqrt(V.convertToDouble())); + break; + } + case TargetOpcode::G_FLOG2: { + bool Unused; + V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); + V = APFloat(log2(V.convertToDouble())); + break; + } + } + // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise, + // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`, + // and `G_FLOG2` reach here. + bool Unused; + V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused); + return V; +} + +bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, + Optional<APFloat> &Cst) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI); + return Cst.hasValue(); +} + +bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, + Optional<APFloat> &Cst) { + assert(Cst.hasValue() && "Optional is unexpectedly empty!"); + Builder.setInstrAndDebugLoc(MI); + MachineFunction &MF = Builder.getMF(); + auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst); + Register DstReg = MI.getOperand(0).getReg(); + Builder.buildFConstant(DstReg, *FPVal); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) { // We're trying to match the following pattern: @@ -1581,7 +1581,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, return false; // Pass the combined immediate to the apply function. - MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue(); + MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue(); MatchInfo.Base = Base; return true; } @@ -1599,211 +1599,211 @@ bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, return true; } -bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, - RegisterImmPair &MatchInfo) { - // We're trying to match the following pattern with any of - // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions: - // %t1 = SHIFT %base, G_CONSTANT imm1 - // %root = SHIFT %t1, G_CONSTANT imm2 - // --> - // %root = SHIFT %base, G_CONSTANT (imm1 + imm2) - - unsigned Opcode = MI.getOpcode(); - assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || - Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || - Opcode == TargetOpcode::G_USHLSAT) && - "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); - - Register Shl2 = MI.getOperand(1).getReg(); - Register Imm1 = MI.getOperand(2).getReg(); - auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); - if (!MaybeImmVal) - return false; - - MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2); - if (Shl2Def->getOpcode() != Opcode) - return false; - - Register Base = Shl2Def->getOperand(1).getReg(); - Register Imm2 = Shl2Def->getOperand(2).getReg(); - auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); - if (!MaybeImm2Val) - return false; - - // Pass the combined immediate to the apply function. - MatchInfo.Imm = - (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue(); - MatchInfo.Reg = Base; - - // There is no simple replacement for a saturating unsigned left shift that - // exceeds the scalar size. - if (Opcode == TargetOpcode::G_USHLSAT && - MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits()) - return false; - - return true; -} - -bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI, - RegisterImmPair &MatchInfo) { - unsigned Opcode = MI.getOpcode(); - assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || - Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || - Opcode == TargetOpcode::G_USHLSAT) && - "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); - - Builder.setInstrAndDebugLoc(MI); - LLT Ty = MRI.getType(MI.getOperand(1).getReg()); - unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits(); - auto Imm = MatchInfo.Imm; - - if (Imm >= ScalarSizeInBits) { - // Any logical shift that exceeds scalar size will produce zero. - if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) { - Builder.buildConstant(MI.getOperand(0), 0); - MI.eraseFromParent(); - return true; - } - // Arithmetic shift and saturating signed left shift have no effect beyond - // scalar size. - Imm = ScalarSizeInBits - 1; - } - - LLT ImmTy = MRI.getType(MI.getOperand(2).getReg()); - Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0); - Observer.changingInstr(MI); - MI.getOperand(1).setReg(MatchInfo.Reg); - MI.getOperand(2).setReg(NewImm); - Observer.changedInstr(MI); - return true; -} - -bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, - ShiftOfShiftedLogic &MatchInfo) { - // We're trying to match the following pattern with any of - // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination - // with any of G_AND/G_OR/G_XOR logic instructions. - // %t1 = SHIFT %X, G_CONSTANT C0 - // %t2 = LOGIC %t1, %Y - // %root = SHIFT %t2, G_CONSTANT C1 - // --> - // %t3 = SHIFT %X, G_CONSTANT (C0+C1) - // %t4 = SHIFT %Y, G_CONSTANT C1 - // %root = LOGIC %t3, %t4 - unsigned ShiftOpcode = MI.getOpcode(); - assert((ShiftOpcode == TargetOpcode::G_SHL || - ShiftOpcode == TargetOpcode::G_ASHR || - ShiftOpcode == TargetOpcode::G_LSHR || - ShiftOpcode == TargetOpcode::G_USHLSAT || - ShiftOpcode == TargetOpcode::G_SSHLSAT) && - "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); - - // Match a one-use bitwise logic op. - Register LogicDest = MI.getOperand(1).getReg(); - if (!MRI.hasOneNonDBGUse(LogicDest)) - return false; - - MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest); - unsigned LogicOpcode = LogicMI->getOpcode(); - if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR && - LogicOpcode != TargetOpcode::G_XOR) - return false; - - // Find a matching one-use shift by constant. - const Register C1 = MI.getOperand(2).getReg(); - auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI); - if (!MaybeImmVal) - return false; - - const uint64_t C1Val = MaybeImmVal->Value.getZExtValue(); - - auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) { - // Shift should match previous one and should be a one-use. - if (MI->getOpcode() != ShiftOpcode || - !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) - return false; - - // Must be a constant. - auto MaybeImmVal = - getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); - if (!MaybeImmVal) - return false; - - ShiftVal = MaybeImmVal->Value.getSExtValue(); - return true; - }; - - // Logic ops are commutative, so check each operand for a match. - Register LogicMIReg1 = LogicMI->getOperand(1).getReg(); - MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1); - Register LogicMIReg2 = LogicMI->getOperand(2).getReg(); - MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2); - uint64_t C0Val; - - if (matchFirstShift(LogicMIOp1, C0Val)) { - MatchInfo.LogicNonShiftReg = LogicMIReg2; - MatchInfo.Shift2 = LogicMIOp1; - } else if (matchFirstShift(LogicMIOp2, C0Val)) { - MatchInfo.LogicNonShiftReg = LogicMIReg1; - MatchInfo.Shift2 = LogicMIOp2; - } else - return false; - - MatchInfo.ValSum = C0Val + C1Val; - - // The fold is not valid if the sum of the shift values exceeds bitwidth. - if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits()) - return false; - - MatchInfo.Logic = LogicMI; - return true; -} - -bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, - ShiftOfShiftedLogic &MatchInfo) { - unsigned Opcode = MI.getOpcode(); - assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || - Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT || - Opcode == TargetOpcode::G_SSHLSAT) && - "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); - - LLT ShlType = MRI.getType(MI.getOperand(2).getReg()); - LLT DestType = MRI.getType(MI.getOperand(0).getReg()); - Builder.setInstrAndDebugLoc(MI); - - Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0); - - Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg(); - Register Shift1 = - Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0); - - Register Shift2Const = MI.getOperand(2).getReg(); - Register Shift2 = Builder - .buildInstr(Opcode, {DestType}, - {MatchInfo.LogicNonShiftReg, Shift2Const}) - .getReg(0); - - Register Dest = MI.getOperand(0).getReg(); - Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2}); - - // These were one use so it's safe to remove them. - MatchInfo.Shift2->eraseFromParent(); - MatchInfo.Logic->eraseFromParent(); - - MI.eraseFromParent(); - return true; -} - +bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, + RegisterImmPair &MatchInfo) { + // We're trying to match the following pattern with any of + // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions: + // %t1 = SHIFT %base, G_CONSTANT imm1 + // %root = SHIFT %t1, G_CONSTANT imm2 + // --> + // %root = SHIFT %base, G_CONSTANT (imm1 + imm2) + + unsigned Opcode = MI.getOpcode(); + assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || + Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || + Opcode == TargetOpcode::G_USHLSAT) && + "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); + + Register Shl2 = MI.getOperand(1).getReg(); + Register Imm1 = MI.getOperand(2).getReg(); + auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); + if (!MaybeImmVal) + return false; + + MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2); + if (Shl2Def->getOpcode() != Opcode) + return false; + + Register Base = Shl2Def->getOperand(1).getReg(); + Register Imm2 = Shl2Def->getOperand(2).getReg(); + auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); + if (!MaybeImm2Val) + return false; + + // Pass the combined immediate to the apply function. + MatchInfo.Imm = + (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue(); + MatchInfo.Reg = Base; + + // There is no simple replacement for a saturating unsigned left shift that + // exceeds the scalar size. + if (Opcode == TargetOpcode::G_USHLSAT && + MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits()) + return false; + + return true; +} + +bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI, + RegisterImmPair &MatchInfo) { + unsigned Opcode = MI.getOpcode(); + assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || + Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || + Opcode == TargetOpcode::G_USHLSAT) && + "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); + + Builder.setInstrAndDebugLoc(MI); + LLT Ty = MRI.getType(MI.getOperand(1).getReg()); + unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits(); + auto Imm = MatchInfo.Imm; + + if (Imm >= ScalarSizeInBits) { + // Any logical shift that exceeds scalar size will produce zero. + if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) { + Builder.buildConstant(MI.getOperand(0), 0); + MI.eraseFromParent(); + return true; + } + // Arithmetic shift and saturating signed left shift have no effect beyond + // scalar size. + Imm = ScalarSizeInBits - 1; + } + + LLT ImmTy = MRI.getType(MI.getOperand(2).getReg()); + Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(MatchInfo.Reg); + MI.getOperand(2).setReg(NewImm); + Observer.changedInstr(MI); + return true; +} + +bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, + ShiftOfShiftedLogic &MatchInfo) { + // We're trying to match the following pattern with any of + // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination + // with any of G_AND/G_OR/G_XOR logic instructions. + // %t1 = SHIFT %X, G_CONSTANT C0 + // %t2 = LOGIC %t1, %Y + // %root = SHIFT %t2, G_CONSTANT C1 + // --> + // %t3 = SHIFT %X, G_CONSTANT (C0+C1) + // %t4 = SHIFT %Y, G_CONSTANT C1 + // %root = LOGIC %t3, %t4 + unsigned ShiftOpcode = MI.getOpcode(); + assert((ShiftOpcode == TargetOpcode::G_SHL || + ShiftOpcode == TargetOpcode::G_ASHR || + ShiftOpcode == TargetOpcode::G_LSHR || + ShiftOpcode == TargetOpcode::G_USHLSAT || + ShiftOpcode == TargetOpcode::G_SSHLSAT) && + "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); + + // Match a one-use bitwise logic op. + Register LogicDest = MI.getOperand(1).getReg(); + if (!MRI.hasOneNonDBGUse(LogicDest)) + return false; + + MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest); + unsigned LogicOpcode = LogicMI->getOpcode(); + if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR && + LogicOpcode != TargetOpcode::G_XOR) + return false; + + // Find a matching one-use shift by constant. + const Register C1 = MI.getOperand(2).getReg(); + auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI); + if (!MaybeImmVal) + return false; + + const uint64_t C1Val = MaybeImmVal->Value.getZExtValue(); + + auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) { + // Shift should match previous one and should be a one-use. + if (MI->getOpcode() != ShiftOpcode || + !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) + return false; + + // Must be a constant. + auto MaybeImmVal = + getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); + if (!MaybeImmVal) + return false; + + ShiftVal = MaybeImmVal->Value.getSExtValue(); + return true; + }; + + // Logic ops are commutative, so check each operand for a match. + Register LogicMIReg1 = LogicMI->getOperand(1).getReg(); + MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1); + Register LogicMIReg2 = LogicMI->getOperand(2).getReg(); + MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2); + uint64_t C0Val; + + if (matchFirstShift(LogicMIOp1, C0Val)) { + MatchInfo.LogicNonShiftReg = LogicMIReg2; + MatchInfo.Shift2 = LogicMIOp1; + } else if (matchFirstShift(LogicMIOp2, C0Val)) { + MatchInfo.LogicNonShiftReg = LogicMIReg1; + MatchInfo.Shift2 = LogicMIOp2; + } else + return false; + + MatchInfo.ValSum = C0Val + C1Val; + + // The fold is not valid if the sum of the shift values exceeds bitwidth. + if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits()) + return false; + + MatchInfo.Logic = LogicMI; + return true; +} + +bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, + ShiftOfShiftedLogic &MatchInfo) { + unsigned Opcode = MI.getOpcode(); + assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || + Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT || + Opcode == TargetOpcode::G_SSHLSAT) && + "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); + + LLT ShlType = MRI.getType(MI.getOperand(2).getReg()); + LLT DestType = MRI.getType(MI.getOperand(0).getReg()); + Builder.setInstrAndDebugLoc(MI); + + Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0); + + Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg(); + Register Shift1 = + Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0); + + Register Shift2Const = MI.getOperand(2).getReg(); + Register Shift2 = Builder + .buildInstr(Opcode, {DestType}, + {MatchInfo.LogicNonShiftReg, Shift2Const}) + .getReg(0); + + Register Dest = MI.getOperand(0).getReg(); + Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2}); + + // These were one use so it's safe to remove them. + MatchInfo.Shift2->eraseFromParent(); + MatchInfo.Logic->eraseFromParent(); + + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) { assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); auto MaybeImmVal = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); - if (!MaybeImmVal) + if (!MaybeImmVal) return false; - - ShiftVal = MaybeImmVal->Value.exactLogBase2(); - return (static_cast<int32_t>(ShiftVal) != -1); + + ShiftVal = MaybeImmVal->Value.exactLogBase2(); + return (static_cast<int32_t>(ShiftVal) != -1); } bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI, @@ -1819,254 +1819,254 @@ bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI, return true; } -// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source -bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, - RegisterImmPair &MatchData) { - assert(MI.getOpcode() == TargetOpcode::G_SHL && KB); - - Register LHS = MI.getOperand(1).getReg(); - - Register ExtSrc; - if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) && - !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) && - !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc)))) - return false; - - // TODO: Should handle vector splat. - Register RHS = MI.getOperand(2).getReg(); - auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI); - if (!MaybeShiftAmtVal) - return false; - - if (LI) { - LLT SrcTy = MRI.getType(ExtSrc); - - // We only really care about the legality with the shifted value. We can - // pick any type the constant shift amount, so ask the target what to - // use. Otherwise we would have to guess and hope it is reported as legal. - LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy); - if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}})) - return false; - } - - int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue(); - MatchData.Reg = ExtSrc; - MatchData.Imm = ShiftAmt; - - unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes(); - return MinLeadingZeros >= ShiftAmt; -} - -bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, - const RegisterImmPair &MatchData) { - Register ExtSrcReg = MatchData.Reg; - int64_t ShiftAmtVal = MatchData.Imm; - - LLT ExtSrcTy = MRI.getType(ExtSrcReg); - Builder.setInstrAndDebugLoc(MI); - auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal); - auto NarrowShift = - Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags()); - Builder.buildZExt(MI.getOperand(0), NarrowShift); - MI.eraseFromParent(); - return true; -} - -static Register peekThroughBitcast(Register Reg, - const MachineRegisterInfo &MRI) { - while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg)))) - ; - - return Reg; -} - -bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( - MachineInstr &MI, SmallVectorImpl<Register> &Operands) { - assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && - "Expected an unmerge"); - Register SrcReg = - peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI); - - MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); - if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES && - SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR && - SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS) - return false; - - // Check the source type of the merge. - LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg()); - LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); - bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits(); - if (SrcMergeTy != Dst0Ty && !SameSize) - return false; - // They are the same now (modulo a bitcast). - // We can collect all the src registers. - for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx; - ++Idx) - Operands.push_back(SrcInstr->getOperand(Idx).getReg()); - return true; -} - -bool CombinerHelper::applyCombineUnmergeMergeToPlainValues( - MachineInstr &MI, SmallVectorImpl<Register> &Operands) { - assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && - "Expected an unmerge"); - assert((MI.getNumOperands() - 1 == Operands.size()) && - "Not enough operands to replace all defs"); - unsigned NumElems = MI.getNumOperands() - 1; - - LLT SrcTy = MRI.getType(Operands[0]); - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - bool CanReuseInputDirectly = DstTy == SrcTy; - Builder.setInstrAndDebugLoc(MI); - for (unsigned Idx = 0; Idx < NumElems; ++Idx) { - Register DstReg = MI.getOperand(Idx).getReg(); - Register SrcReg = Operands[Idx]; - if (CanReuseInputDirectly) - replaceRegWith(MRI, DstReg, SrcReg); - else - Builder.buildCast(DstReg, SrcReg); - } - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, - SmallVectorImpl<APInt> &Csts) { - unsigned SrcIdx = MI.getNumOperands() - 1; - Register SrcReg = MI.getOperand(SrcIdx).getReg(); - MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); - if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT && - SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT) - return false; - // Break down the big constant in smaller ones. - const MachineOperand &CstVal = SrcInstr->getOperand(1); - APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT - ? CstVal.getCImm()->getValue() - : CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); - - LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); - unsigned ShiftAmt = Dst0Ty.getSizeInBits(); - // Unmerge a constant. - for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) { - Csts.emplace_back(Val.trunc(ShiftAmt)); - Val = Val.lshr(ShiftAmt); - } - - return true; -} - -bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, - SmallVectorImpl<APInt> &Csts) { - assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && - "Expected an unmerge"); - assert((MI.getNumOperands() - 1 == Csts.size()) && - "Not enough operands to replace all defs"); - unsigned NumElems = MI.getNumOperands() - 1; - Builder.setInstrAndDebugLoc(MI); - for (unsigned Idx = 0; Idx < NumElems; ++Idx) { - Register DstReg = MI.getOperand(Idx).getReg(); - Builder.buildConstant(DstReg, Csts[Idx]); - } - - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && - "Expected an unmerge"); - // Check that all the lanes are dead except the first one. - for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { - if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg())) - return false; - } - return true; -} - -bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { - Builder.setInstrAndDebugLoc(MI); - Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); - // Truncating a vector is going to truncate every single lane, - // whereas we want the full lowbits. - // Do the operation on a scalar instead. - LLT SrcTy = MRI.getType(SrcReg); - if (SrcTy.isVector()) - SrcReg = - Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0); - - Register Dst0Reg = MI.getOperand(0).getReg(); - LLT Dst0Ty = MRI.getType(Dst0Reg); - if (Dst0Ty.isVector()) { - auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg); - Builder.buildCast(Dst0Reg, MIB); - } else - Builder.buildTrunc(Dst0Reg, SrcReg); - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && - "Expected an unmerge"); - Register Dst0Reg = MI.getOperand(0).getReg(); - LLT Dst0Ty = MRI.getType(Dst0Reg); - // G_ZEXT on vector applies to each lane, so it will - // affect all destinations. Therefore we won't be able - // to simplify the unmerge to just the first definition. - if (Dst0Ty.isVector()) - return false; - Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - if (SrcTy.isVector()) - return false; - - Register ZExtSrcReg; - if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg)))) - return false; - - // Finally we can replace the first definition with - // a zext of the source if the definition is big enough to hold - // all of ZExtSrc bits. - LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); - return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits(); -} - -bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && - "Expected an unmerge"); - - Register Dst0Reg = MI.getOperand(0).getReg(); - - MachineInstr *ZExtInstr = - MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg()); - assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT && - "Expecting a G_ZEXT"); - - Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg(); - LLT Dst0Ty = MRI.getType(Dst0Reg); - LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); - - Builder.setInstrAndDebugLoc(MI); - - if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) { - Builder.buildZExt(Dst0Reg, ZExtSrcReg); - } else { - assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() && - "ZExt src doesn't fit in destination"); - replaceRegWith(MRI, Dst0Reg, ZExtSrcReg); - } - - Register ZeroReg; - for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { - if (!ZeroReg) - ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0); - replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg); - } - MI.eraseFromParent(); - return true; -} - +// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source +bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, + RegisterImmPair &MatchData) { + assert(MI.getOpcode() == TargetOpcode::G_SHL && KB); + + Register LHS = MI.getOperand(1).getReg(); + + Register ExtSrc; + if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) && + !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) && + !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc)))) + return false; + + // TODO: Should handle vector splat. + Register RHS = MI.getOperand(2).getReg(); + auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI); + if (!MaybeShiftAmtVal) + return false; + + if (LI) { + LLT SrcTy = MRI.getType(ExtSrc); + + // We only really care about the legality with the shifted value. We can + // pick any type the constant shift amount, so ask the target what to + // use. Otherwise we would have to guess and hope it is reported as legal. + LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy); + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}})) + return false; + } + + int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue(); + MatchData.Reg = ExtSrc; + MatchData.Imm = ShiftAmt; + + unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes(); + return MinLeadingZeros >= ShiftAmt; +} + +bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, + const RegisterImmPair &MatchData) { + Register ExtSrcReg = MatchData.Reg; + int64_t ShiftAmtVal = MatchData.Imm; + + LLT ExtSrcTy = MRI.getType(ExtSrcReg); + Builder.setInstrAndDebugLoc(MI); + auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal); + auto NarrowShift = + Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags()); + Builder.buildZExt(MI.getOperand(0), NarrowShift); + MI.eraseFromParent(); + return true; +} + +static Register peekThroughBitcast(Register Reg, + const MachineRegisterInfo &MRI) { + while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg)))) + ; + + return Reg; +} + +bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( + MachineInstr &MI, SmallVectorImpl<Register> &Operands) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + Register SrcReg = + peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI); + + MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); + if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES && + SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR && + SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS) + return false; + + // Check the source type of the merge. + LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg()); + LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); + bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits(); + if (SrcMergeTy != Dst0Ty && !SameSize) + return false; + // They are the same now (modulo a bitcast). + // We can collect all the src registers. + for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx; + ++Idx) + Operands.push_back(SrcInstr->getOperand(Idx).getReg()); + return true; +} + +bool CombinerHelper::applyCombineUnmergeMergeToPlainValues( + MachineInstr &MI, SmallVectorImpl<Register> &Operands) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + assert((MI.getNumOperands() - 1 == Operands.size()) && + "Not enough operands to replace all defs"); + unsigned NumElems = MI.getNumOperands() - 1; + + LLT SrcTy = MRI.getType(Operands[0]); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + bool CanReuseInputDirectly = DstTy == SrcTy; + Builder.setInstrAndDebugLoc(MI); + for (unsigned Idx = 0; Idx < NumElems; ++Idx) { + Register DstReg = MI.getOperand(Idx).getReg(); + Register SrcReg = Operands[Idx]; + if (CanReuseInputDirectly) + replaceRegWith(MRI, DstReg, SrcReg); + else + Builder.buildCast(DstReg, SrcReg); + } + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl<APInt> &Csts) { + unsigned SrcIdx = MI.getNumOperands() - 1; + Register SrcReg = MI.getOperand(SrcIdx).getReg(); + MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); + if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT && + SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT) + return false; + // Break down the big constant in smaller ones. + const MachineOperand &CstVal = SrcInstr->getOperand(1); + APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT + ? CstVal.getCImm()->getValue() + : CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); + + LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); + unsigned ShiftAmt = Dst0Ty.getSizeInBits(); + // Unmerge a constant. + for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) { + Csts.emplace_back(Val.trunc(ShiftAmt)); + Val = Val.lshr(ShiftAmt); + } + + return true; +} + +bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl<APInt> &Csts) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + assert((MI.getNumOperands() - 1 == Csts.size()) && + "Not enough operands to replace all defs"); + unsigned NumElems = MI.getNumOperands() - 1; + Builder.setInstrAndDebugLoc(MI); + for (unsigned Idx = 0; Idx < NumElems; ++Idx) { + Register DstReg = MI.getOperand(Idx).getReg(); + Builder.buildConstant(DstReg, Csts[Idx]); + } + + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + // Check that all the lanes are dead except the first one. + for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { + if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg())) + return false; + } + return true; +} + +bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { + Builder.setInstrAndDebugLoc(MI); + Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); + // Truncating a vector is going to truncate every single lane, + // whereas we want the full lowbits. + // Do the operation on a scalar instead. + LLT SrcTy = MRI.getType(SrcReg); + if (SrcTy.isVector()) + SrcReg = + Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0); + + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT Dst0Ty = MRI.getType(Dst0Reg); + if (Dst0Ty.isVector()) { + auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg); + Builder.buildCast(Dst0Reg, MIB); + } else + Builder.buildTrunc(Dst0Reg, SrcReg); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT Dst0Ty = MRI.getType(Dst0Reg); + // G_ZEXT on vector applies to each lane, so it will + // affect all destinations. Therefore we won't be able + // to simplify the unmerge to just the first definition. + if (Dst0Ty.isVector()) + return false; + Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + if (SrcTy.isVector()) + return false; + + Register ZExtSrcReg; + if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg)))) + return false; + + // Finally we can replace the first definition with + // a zext of the source if the definition is big enough to hold + // all of ZExtSrc bits. + LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); + return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits(); +} + +bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + + Register Dst0Reg = MI.getOperand(0).getReg(); + + MachineInstr *ZExtInstr = + MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg()); + assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT && + "Expecting a G_ZEXT"); + + Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg(); + LLT Dst0Ty = MRI.getType(Dst0Reg); + LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); + + Builder.setInstrAndDebugLoc(MI); + + if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) { + Builder.buildZExt(Dst0Reg, ZExtSrcReg); + } else { + assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() && + "ZExt src doesn't fit in destination"); + replaceRegWith(MRI, Dst0Reg, ZExtSrcReg); + } + + Register ZeroReg; + for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { + if (!ZeroReg) + ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0); + replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg); + } + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) { @@ -2088,7 +2088,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, if (!MaybeImmVal) return false; - ShiftVal = MaybeImmVal->Value.getSExtValue(); + ShiftVal = MaybeImmVal->Value.getSExtValue(); return ShiftVal >= Size / 2 && ShiftVal < Size; } @@ -2177,296 +2177,296 @@ bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI, return false; } -bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { - assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); - Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - Register SrcReg = MI.getOperand(1).getReg(); - return mi_match(SrcReg, MRI, - m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg)))); -} - -bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { - assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); - Register DstReg = MI.getOperand(0).getReg(); - Builder.setInstr(MI); - Builder.buildCopy(DstReg, Reg); - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) { - assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); - Register SrcReg = MI.getOperand(1).getReg(); - return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg))); -} - -bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { - assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); - Register DstReg = MI.getOperand(0).getReg(); - Builder.setInstr(MI); - Builder.buildZExtOrTrunc(DstReg, Reg); - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::matchCombineAddP2IToPtrAdd( - MachineInstr &MI, std::pair<Register, bool> &PtrReg) { - assert(MI.getOpcode() == TargetOpcode::G_ADD); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); - LLT IntTy = MRI.getType(LHS); - - // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the - // instruction. - PtrReg.second = false; - for (Register SrcReg : {LHS, RHS}) { - if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) { - // Don't handle cases where the integer is implicitly converted to the - // pointer width. - LLT PtrTy = MRI.getType(PtrReg.first); - if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits()) - return true; - } - - PtrReg.second = true; - } - - return false; -} - -bool CombinerHelper::applyCombineAddP2IToPtrAdd( - MachineInstr &MI, std::pair<Register, bool> &PtrReg) { - Register Dst = MI.getOperand(0).getReg(); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); - - const bool DoCommute = PtrReg.second; - if (DoCommute) - std::swap(LHS, RHS); - LHS = PtrReg.first; - - LLT PtrTy = MRI.getType(LHS); - - Builder.setInstrAndDebugLoc(MI); - auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS); - Builder.buildPtrToInt(Dst, PtrAdd); - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, - int64_t &NewCst) { - assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); - MachineRegisterInfo &MRI = Builder.getMF().getRegInfo(); - - if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) { - int64_t Cst; - if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) { - NewCst = Cst + *RHSCst; - return true; - } - } - - return false; -} - -bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, - int64_t &NewCst) { - assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); - Register Dst = MI.getOperand(0).getReg(); - - Builder.setInstrAndDebugLoc(MI); - Builder.buildConstant(Dst, NewCst); - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { - assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - return mi_match(SrcReg, MRI, - m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))); -} - -bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { - assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); - Register DstReg = MI.getOperand(0).getReg(); - MI.eraseFromParent(); - replaceRegWith(MRI, DstReg, Reg); - return true; -} - -bool CombinerHelper::matchCombineExtOfExt( - MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { - assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || - MI.getOpcode() == TargetOpcode::G_SEXT || - MI.getOpcode() == TargetOpcode::G_ZEXT) && - "Expected a G_[ASZ]EXT"); - Register SrcReg = MI.getOperand(1).getReg(); - MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); - // Match exts with the same opcode, anyext([sz]ext) and sext(zext). - unsigned Opc = MI.getOpcode(); - unsigned SrcOpc = SrcMI->getOpcode(); - if (Opc == SrcOpc || - (Opc == TargetOpcode::G_ANYEXT && - (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) || - (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) { - MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc); - return true; - } - return false; -} - -bool CombinerHelper::applyCombineExtOfExt( - MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { - assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || - MI.getOpcode() == TargetOpcode::G_SEXT || - MI.getOpcode() == TargetOpcode::G_ZEXT) && - "Expected a G_[ASZ]EXT"); - - Register Reg = std::get<0>(MatchInfo); - unsigned SrcExtOp = std::get<1>(MatchInfo); - - // Combine exts with the same opcode. - if (MI.getOpcode() == SrcExtOp) { - Observer.changingInstr(MI); - MI.getOperand(1).setReg(Reg); - Observer.changedInstr(MI); - return true; - } - - // Combine: - // - anyext([sz]ext x) to [sz]ext x - // - sext(zext x) to zext x - if (MI.getOpcode() == TargetOpcode::G_ANYEXT || - (MI.getOpcode() == TargetOpcode::G_SEXT && - SrcExtOp == TargetOpcode::G_ZEXT)) { - Register DstReg = MI.getOperand(0).getReg(); - Builder.setInstrAndDebugLoc(MI); - Builder.buildInstr(SrcExtOp, {DstReg}, {Reg}); - MI.eraseFromParent(); - return true; - } - - return false; -} - -bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - - Builder.setInstrAndDebugLoc(MI); - Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg, - MI.getFlags()); - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) { - assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG"); - Register SrcReg = MI.getOperand(1).getReg(); - return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg))); -} - -bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { - assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); - Src = MI.getOperand(1).getReg(); - Register AbsSrc; - return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc))); -} - -bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { - assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); - Register Dst = MI.getOperand(0).getReg(); - MI.eraseFromParent(); - replaceRegWith(MRI, Dst, Src); - return true; -} - -bool CombinerHelper::matchCombineTruncOfExt( - MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); - Register SrcReg = MI.getOperand(1).getReg(); - MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); - unsigned SrcOpc = SrcMI->getOpcode(); - if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT || - SrcOpc == TargetOpcode::G_ZEXT) { - MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc); - return true; - } - return false; -} - -bool CombinerHelper::applyCombineTruncOfExt( - MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); - Register SrcReg = MatchInfo.first; - unsigned SrcExtOp = MatchInfo.second; - Register DstReg = MI.getOperand(0).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - LLT DstTy = MRI.getType(DstReg); - if (SrcTy == DstTy) { - MI.eraseFromParent(); - replaceRegWith(MRI, DstReg, SrcReg); - return true; - } - Builder.setInstrAndDebugLoc(MI); - if (SrcTy.getSizeInBits() < DstTy.getSizeInBits()) - Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg}); - else - Builder.buildTrunc(DstReg, SrcReg); - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::matchCombineTruncOfShl( - MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - Register ShiftSrc; - Register ShiftAmt; - - if (MRI.hasOneNonDBGUse(SrcReg) && - mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) && - isLegalOrBeforeLegalizer( - {TargetOpcode::G_SHL, - {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) { - KnownBits Known = KB->getKnownBits(ShiftAmt); - unsigned Size = DstTy.getSizeInBits(); - if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { - MatchInfo = std::make_pair(ShiftSrc, ShiftAmt); - return true; - } - } - return false; -} - -bool CombinerHelper::applyCombineTruncOfShl( - MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); - - Register ShiftSrc = MatchInfo.first; - Register ShiftAmt = MatchInfo.second; - Builder.setInstrAndDebugLoc(MI); - auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc); - Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags()); - MI.eraseFromParent(); - return true; -} - +bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + Register SrcReg = MI.getOperand(1).getReg(); + return mi_match(SrcReg, MRI, + m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg)))); +} + +bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInstr(MI); + Builder.buildCopy(DstReg, Reg); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); + Register SrcReg = MI.getOperand(1).getReg(); + return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg))); +} + +bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInstr(MI); + Builder.buildZExtOrTrunc(DstReg, Reg); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineAddP2IToPtrAdd( + MachineInstr &MI, std::pair<Register, bool> &PtrReg) { + assert(MI.getOpcode() == TargetOpcode::G_ADD); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT IntTy = MRI.getType(LHS); + + // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the + // instruction. + PtrReg.second = false; + for (Register SrcReg : {LHS, RHS}) { + if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) { + // Don't handle cases where the integer is implicitly converted to the + // pointer width. + LLT PtrTy = MRI.getType(PtrReg.first); + if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits()) + return true; + } + + PtrReg.second = true; + } + + return false; +} + +bool CombinerHelper::applyCombineAddP2IToPtrAdd( + MachineInstr &MI, std::pair<Register, bool> &PtrReg) { + Register Dst = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + const bool DoCommute = PtrReg.second; + if (DoCommute) + std::swap(LHS, RHS); + LHS = PtrReg.first; + + LLT PtrTy = MRI.getType(LHS); + + Builder.setInstrAndDebugLoc(MI); + auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS); + Builder.buildPtrToInt(Dst, PtrAdd); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, + int64_t &NewCst) { + assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + MachineRegisterInfo &MRI = Builder.getMF().getRegInfo(); + + if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) { + int64_t Cst; + if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) { + NewCst = Cst + *RHSCst; + return true; + } + } + + return false; +} + +bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, + int64_t &NewCst) { + assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); + Register Dst = MI.getOperand(0).getReg(); + + Builder.setInstrAndDebugLoc(MI); + Builder.buildConstant(Dst, NewCst); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + return mi_match(SrcReg, MRI, + m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))); +} + +bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); + Register DstReg = MI.getOperand(0).getReg(); + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, Reg); + return true; +} + +bool CombinerHelper::matchCombineExtOfExt( + MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { + assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || + MI.getOpcode() == TargetOpcode::G_SEXT || + MI.getOpcode() == TargetOpcode::G_ZEXT) && + "Expected a G_[ASZ]EXT"); + Register SrcReg = MI.getOperand(1).getReg(); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + // Match exts with the same opcode, anyext([sz]ext) and sext(zext). + unsigned Opc = MI.getOpcode(); + unsigned SrcOpc = SrcMI->getOpcode(); + if (Opc == SrcOpc || + (Opc == TargetOpcode::G_ANYEXT && + (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) || + (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) { + MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc); + return true; + } + return false; +} + +bool CombinerHelper::applyCombineExtOfExt( + MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { + assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || + MI.getOpcode() == TargetOpcode::G_SEXT || + MI.getOpcode() == TargetOpcode::G_ZEXT) && + "Expected a G_[ASZ]EXT"); + + Register Reg = std::get<0>(MatchInfo); + unsigned SrcExtOp = std::get<1>(MatchInfo); + + // Combine exts with the same opcode. + if (MI.getOpcode() == SrcExtOp) { + Observer.changingInstr(MI); + MI.getOperand(1).setReg(Reg); + Observer.changedInstr(MI); + return true; + } + + // Combine: + // - anyext([sz]ext x) to [sz]ext x + // - sext(zext x) to zext x + if (MI.getOpcode() == TargetOpcode::G_ANYEXT || + (MI.getOpcode() == TargetOpcode::G_SEXT && + SrcExtOp == TargetOpcode::G_ZEXT)) { + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInstrAndDebugLoc(MI); + Builder.buildInstr(SrcExtOp, {DstReg}, {Reg}); + MI.eraseFromParent(); + return true; + } + + return false; +} + +bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + + Builder.setInstrAndDebugLoc(MI); + Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg, + MI.getFlags()); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG"); + Register SrcReg = MI.getOperand(1).getReg(); + return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg))); +} + +bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { + assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); + Src = MI.getOperand(1).getReg(); + Register AbsSrc; + return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc))); +} + +bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { + assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); + Register Dst = MI.getOperand(0).getReg(); + MI.eraseFromParent(); + replaceRegWith(MRI, Dst, Src); + return true; +} + +bool CombinerHelper::matchCombineTruncOfExt( + MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); + Register SrcReg = MI.getOperand(1).getReg(); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + unsigned SrcOpc = SrcMI->getOpcode(); + if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT || + SrcOpc == TargetOpcode::G_ZEXT) { + MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc); + return true; + } + return false; +} + +bool CombinerHelper::applyCombineTruncOfExt( + MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); + Register SrcReg = MatchInfo.first; + unsigned SrcExtOp = MatchInfo.second; + Register DstReg = MI.getOperand(0).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + LLT DstTy = MRI.getType(DstReg); + if (SrcTy == DstTy) { + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, SrcReg); + return true; + } + Builder.setInstrAndDebugLoc(MI); + if (SrcTy.getSizeInBits() < DstTy.getSizeInBits()) + Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg}); + else + Builder.buildTrunc(DstReg, SrcReg); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineTruncOfShl( + MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + Register ShiftSrc; + Register ShiftAmt; + + if (MRI.hasOneNonDBGUse(SrcReg) && + mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) && + isLegalOrBeforeLegalizer( + {TargetOpcode::G_SHL, + {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) { + KnownBits Known = KB->getKnownBits(ShiftAmt); + unsigned Size = DstTy.getSizeInBits(); + if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { + MatchInfo = std::make_pair(ShiftSrc, ShiftAmt); + return true; + } + } + return false; +} + +bool CombinerHelper::applyCombineTruncOfShl( + MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + + Register ShiftSrc = MatchInfo.first; + Register ShiftAmt = MatchInfo.second; + Builder.setInstrAndDebugLoc(MI); + auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc); + Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags()); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { return MO.isReg() && @@ -2493,22 +2493,22 @@ bool CombinerHelper::matchUndefStore(MachineInstr &MI) { MRI); } -bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_SELECT); - return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(), - MRI); -} - -bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { - assert(MI.getOpcode() == TargetOpcode::G_SELECT); - if (auto MaybeCstCmp = - getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) { - OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2; - return true; - } - return false; -} - +bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SELECT); + return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(), + MRI); +} + +bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { + assert(MI.getOpcode() == TargetOpcode::G_SELECT); + if (auto MaybeCstCmp = + getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) { + OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2; + return true; + } + return false; +} + bool CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); return true; @@ -2605,16 +2605,16 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, return true; } -bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, - Register Replacement) { - assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); - Register OldReg = MI.getOperand(0).getReg(); - assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); - MI.eraseFromParent(); - replaceRegWith(MRI, OldReg, Replacement); - return true; -} - +bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, + Register Replacement) { + assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); + Register OldReg = MI.getOperand(0).getReg(); + assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); + MI.eraseFromParent(); + replaceRegWith(MRI, OldReg, Replacement); + return true; +} + bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_SELECT); // Match (cond ? x : x) @@ -2635,18 +2635,18 @@ bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) { MRI); } -bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) { - MachineOperand &MO = MI.getOperand(OpIdx); - return MO.isReg() && - getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); -} - -bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, - unsigned OpIdx) { - MachineOperand &MO = MI.getOperand(OpIdx); - return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB); -} - +bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + return MO.isReg() && + getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); +} + +bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, + unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB); +} + bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); @@ -2682,7 +2682,7 @@ bool CombinerHelper::matchSimplifyAddToSub( // ((0-A) + B) -> B - A // (A + (0-B)) -> A - B auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) { - if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS)))) + if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS)))) return false; NewLHS = MaybeNewLHS; return true; @@ -2691,67 +2691,67 @@ bool CombinerHelper::matchSimplifyAddToSub( return CheckFold(LHS, RHS) || CheckFold(RHS, LHS); } -bool CombinerHelper::matchCombineInsertVecElts( - MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT && - "Invalid opcode"); - Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?"); - unsigned NumElts = DstTy.getNumElements(); - // If this MI is part of a sequence of insert_vec_elts, then - // don't do the combine in the middle of the sequence. - if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() == - TargetOpcode::G_INSERT_VECTOR_ELT) - return false; - MachineInstr *CurrInst = &MI; - MachineInstr *TmpInst; - int64_t IntImm; - Register TmpReg; - MatchInfo.resize(NumElts); - while (mi_match( - CurrInst->getOperand(0).getReg(), MRI, - m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) { - if (IntImm >= NumElts) - return false; - if (!MatchInfo[IntImm]) - MatchInfo[IntImm] = TmpReg; - CurrInst = TmpInst; - } - // Variable index. - if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) - return false; - if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) { - for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) { - if (!MatchInfo[I - 1].isValid()) - MatchInfo[I - 1] = TmpInst->getOperand(I).getReg(); - } - return true; - } - // If we didn't end in a G_IMPLICIT_DEF, bail out. - return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; -} - -bool CombinerHelper::applyCombineInsertVecElts( - MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { - Builder.setInstr(MI); - Register UndefReg; - auto GetUndef = [&]() { - if (UndefReg) - return UndefReg; - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0); - return UndefReg; - }; - for (unsigned I = 0; I < MatchInfo.size(); ++I) { - if (!MatchInfo[I]) - MatchInfo[I] = GetUndef(); - } - Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo); - MI.eraseFromParent(); - return true; -} - +bool CombinerHelper::matchCombineInsertVecElts( + MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT && + "Invalid opcode"); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?"); + unsigned NumElts = DstTy.getNumElements(); + // If this MI is part of a sequence of insert_vec_elts, then + // don't do the combine in the middle of the sequence. + if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() == + TargetOpcode::G_INSERT_VECTOR_ELT) + return false; + MachineInstr *CurrInst = &MI; + MachineInstr *TmpInst; + int64_t IntImm; + Register TmpReg; + MatchInfo.resize(NumElts); + while (mi_match( + CurrInst->getOperand(0).getReg(), MRI, + m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) { + if (IntImm >= NumElts) + return false; + if (!MatchInfo[IntImm]) + MatchInfo[IntImm] = TmpReg; + CurrInst = TmpInst; + } + // Variable index. + if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) + return false; + if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) { + for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) { + if (!MatchInfo[I - 1].isValid()) + MatchInfo[I - 1] = TmpInst->getOperand(I).getReg(); + } + return true; + } + // If we didn't end in a G_IMPLICIT_DEF, bail out. + return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; +} + +bool CombinerHelper::applyCombineInsertVecElts( + MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { + Builder.setInstr(MI); + Register UndefReg; + auto GetUndef = [&]() { + if (UndefReg) + return UndefReg; + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0); + return UndefReg; + }; + for (unsigned I = 0; I < MatchInfo.size(); ++I) { + if (!MatchInfo[I]) + MatchInfo[I] = GetUndef(); + } + Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::applySimplifyAddToSub( MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { Builder.setInstr(MI); @@ -2762,812 +2762,812 @@ bool CombinerHelper::applySimplifyAddToSub( return true; } -bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( - MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { - // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ... - // - // Creates the new hand + logic instruction (but does not insert them.) - // - // On success, MatchInfo is populated with the new instructions. These are - // inserted in applyHoistLogicOpWithSameOpcodeHands. - unsigned LogicOpcode = MI.getOpcode(); - assert(LogicOpcode == TargetOpcode::G_AND || - LogicOpcode == TargetOpcode::G_OR || - LogicOpcode == TargetOpcode::G_XOR); - MachineIRBuilder MIB(MI); - Register Dst = MI.getOperand(0).getReg(); - Register LHSReg = MI.getOperand(1).getReg(); - Register RHSReg = MI.getOperand(2).getReg(); - - // Don't recompute anything. - if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg)) - return false; - - // Make sure we have (hand x, ...), (hand y, ...) - MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI); - MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI); - if (!LeftHandInst || !RightHandInst) - return false; - unsigned HandOpcode = LeftHandInst->getOpcode(); - if (HandOpcode != RightHandInst->getOpcode()) - return false; - if (!LeftHandInst->getOperand(1).isReg() || - !RightHandInst->getOperand(1).isReg()) - return false; - - // Make sure the types match up, and if we're doing this post-legalization, - // we end up with legal types. - Register X = LeftHandInst->getOperand(1).getReg(); - Register Y = RightHandInst->getOperand(1).getReg(); - LLT XTy = MRI.getType(X); - LLT YTy = MRI.getType(Y); - if (XTy != YTy) - return false; - if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}})) - return false; - - // Optional extra source register. - Register ExtraHandOpSrcReg; - switch (HandOpcode) { - default: - return false; - case TargetOpcode::G_ANYEXT: - case TargetOpcode::G_SEXT: - case TargetOpcode::G_ZEXT: { - // Match: logic (ext X), (ext Y) --> ext (logic X, Y) - break; - } - case TargetOpcode::G_AND: - case TargetOpcode::G_ASHR: - case TargetOpcode::G_LSHR: - case TargetOpcode::G_SHL: { - // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z - MachineOperand &ZOp = LeftHandInst->getOperand(2); - if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2))) - return false; - ExtraHandOpSrcReg = ZOp.getReg(); - break; - } - } - - // Record the steps to build the new instructions. - // - // Steps to build (logic x, y) - auto NewLogicDst = MRI.createGenericVirtualRegister(XTy); - OperandBuildSteps LogicBuildSteps = { - [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); }, - [=](MachineInstrBuilder &MIB) { MIB.addReg(X); }, - [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }}; - InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps); - - // Steps to build hand (logic x, y), ...z - OperandBuildSteps HandBuildSteps = { - [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); }, - [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }}; - if (ExtraHandOpSrcReg.isValid()) - HandBuildSteps.push_back( - [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); }); - InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps); - - MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps}); - return true; -} - -bool CombinerHelper::applyBuildInstructionSteps( - MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { - assert(MatchInfo.InstrsToBuild.size() && - "Expected at least one instr to build?"); - Builder.setInstr(MI); - for (auto &InstrToBuild : MatchInfo.InstrsToBuild) { - assert(InstrToBuild.Opcode && "Expected a valid opcode?"); - assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?"); - MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode); - for (auto &OperandFn : InstrToBuild.OperandFns) - OperandFn(Instr); - } - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::matchAshrShlToSextInreg( - MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_ASHR); - int64_t ShlCst, AshrCst; - Register Src; - // FIXME: detect splat constant vectors. - if (!mi_match(MI.getOperand(0).getReg(), MRI, - m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst)))) - return false; - if (ShlCst != AshrCst) - return false; - if (!isLegalOrBeforeLegalizer( - {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}})) - return false; - MatchInfo = std::make_tuple(Src, ShlCst); - return true; -} -bool CombinerHelper::applyAshShlToSextInreg( - MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_ASHR); - Register Src; - int64_t ShiftAmt; - std::tie(Src, ShiftAmt) = MatchInfo; - unsigned Size = MRI.getType(Src).getScalarSizeInBits(); - Builder.setInstrAndDebugLoc(MI); - Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt); - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, - Register &Replacement) { - // Given - // - // %y:_(sN) = G_SOMETHING - // %x:_(sN) = G_SOMETHING - // %res:_(sN) = G_AND %x, %y - // - // Eliminate the G_AND when it is known that x & y == x or x & y == y. - // - // Patterns like this can appear as a result of legalization. E.g. - // - // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y - // %one:_(s32) = G_CONSTANT i32 1 - // %and:_(s32) = G_AND %cmp, %one - // - // In this case, G_ICMP only produces a single bit, so x & 1 == x. - assert(MI.getOpcode() == TargetOpcode::G_AND); - if (!KB) - return false; - - Register AndDst = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(AndDst); - - // FIXME: This should be removed once GISelKnownBits supports vectors. - if (DstTy.isVector()) - return false; - - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); - KnownBits LHSBits = KB->getKnownBits(LHS); - KnownBits RHSBits = KB->getKnownBits(RHS); - - // Check that x & Mask == x. - // x & 1 == x, always - // x & 0 == x, only if x is also 0 - // Meaning Mask has no effect if every bit is either one in Mask or zero in x. - // - // Check if we can replace AndDst with the LHS of the G_AND - if (canReplaceReg(AndDst, LHS, MRI) && - (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { - Replacement = LHS; - return true; - } - - // Check if we can replace AndDst with the RHS of the G_AND - if (canReplaceReg(AndDst, RHS, MRI) && - (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { - Replacement = RHS; - return true; - } - - return false; -} - -bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) { - // Given - // - // %y:_(sN) = G_SOMETHING - // %x:_(sN) = G_SOMETHING - // %res:_(sN) = G_OR %x, %y - // - // Eliminate the G_OR when it is known that x | y == x or x | y == y. - assert(MI.getOpcode() == TargetOpcode::G_OR); - if (!KB) - return false; - - Register OrDst = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(OrDst); - - // FIXME: This should be removed once GISelKnownBits supports vectors. - if (DstTy.isVector()) - return false; - - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); - KnownBits LHSBits = KB->getKnownBits(LHS); - KnownBits RHSBits = KB->getKnownBits(RHS); - - // Check that x | Mask == x. - // x | 0 == x, always - // x | 1 == x, only if x is also 1 - // Meaning Mask has no effect if every bit is either zero in Mask or one in x. - // - // Check if we can replace OrDst with the LHS of the G_OR - if (canReplaceReg(OrDst, LHS, MRI) && - (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { - Replacement = LHS; - return true; - } - - // Check if we can replace OrDst with the RHS of the G_OR - if (canReplaceReg(OrDst, RHS, MRI) && - (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { - Replacement = RHS; - return true; - } - - return false; -} - -bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) { - // If the input is already sign extended, just drop the extension. - Register Src = MI.getOperand(1).getReg(); - unsigned ExtBits = MI.getOperand(2).getImm(); - unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits(); - return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1); -} - -static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, - int64_t Cst, bool IsVector, bool IsFP) { - // For i1, Cst will always be -1 regardless of boolean contents. - return (ScalarSizeBits == 1 && Cst == -1) || - isConstTrueVal(TLI, Cst, IsVector, IsFP); -} - -bool CombinerHelper::matchNotCmp(MachineInstr &MI, - SmallVectorImpl<Register> &RegsToNegate) { - assert(MI.getOpcode() == TargetOpcode::G_XOR); - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering(); - Register XorSrc; - Register CstReg; - // We match xor(src, true) here. - if (!mi_match(MI.getOperand(0).getReg(), MRI, - m_GXor(m_Reg(XorSrc), m_Reg(CstReg)))) - return false; - - if (!MRI.hasOneNonDBGUse(XorSrc)) - return false; - - // Check that XorSrc is the root of a tree of comparisons combined with ANDs - // and ORs. The suffix of RegsToNegate starting from index I is used a work - // list of tree nodes to visit. - RegsToNegate.push_back(XorSrc); - // Remember whether the comparisons are all integer or all floating point. - bool IsInt = false; - bool IsFP = false; - for (unsigned I = 0; I < RegsToNegate.size(); ++I) { - Register Reg = RegsToNegate[I]; - if (!MRI.hasOneNonDBGUse(Reg)) - return false; - MachineInstr *Def = MRI.getVRegDef(Reg); - switch (Def->getOpcode()) { - default: - // Don't match if the tree contains anything other than ANDs, ORs and - // comparisons. - return false; - case TargetOpcode::G_ICMP: - if (IsFP) - return false; - IsInt = true; - // When we apply the combine we will invert the predicate. - break; - case TargetOpcode::G_FCMP: - if (IsInt) - return false; - IsFP = true; - // When we apply the combine we will invert the predicate. - break; - case TargetOpcode::G_AND: - case TargetOpcode::G_OR: - // Implement De Morgan's laws: - // ~(x & y) -> ~x | ~y - // ~(x | y) -> ~x & ~y - // When we apply the combine we will change the opcode and recursively - // negate the operands. - RegsToNegate.push_back(Def->getOperand(1).getReg()); - RegsToNegate.push_back(Def->getOperand(2).getReg()); - break; - } - } - - // Now we know whether the comparisons are integer or floating point, check - // the constant in the xor. - int64_t Cst; - if (Ty.isVector()) { - MachineInstr *CstDef = MRI.getVRegDef(CstReg); - auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI); - if (!MaybeCst) - return false; - if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP)) - return false; - } else { - if (!mi_match(CstReg, MRI, m_ICst(Cst))) - return false; - if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP)) - return false; - } - - return true; -} - -bool CombinerHelper::applyNotCmp(MachineInstr &MI, - SmallVectorImpl<Register> &RegsToNegate) { - for (Register Reg : RegsToNegate) { - MachineInstr *Def = MRI.getVRegDef(Reg); - Observer.changingInstr(*Def); - // For each comparison, invert the opcode. For each AND and OR, change the - // opcode. - switch (Def->getOpcode()) { - default: - llvm_unreachable("Unexpected opcode"); - case TargetOpcode::G_ICMP: - case TargetOpcode::G_FCMP: { - MachineOperand &PredOp = Def->getOperand(1); - CmpInst::Predicate NewP = CmpInst::getInversePredicate( - (CmpInst::Predicate)PredOp.getPredicate()); - PredOp.setPredicate(NewP); - break; - } - case TargetOpcode::G_AND: - Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR)); - break; - case TargetOpcode::G_OR: - Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND)); - break; - } - Observer.changedInstr(*Def); - } - - replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); - MI.eraseFromParent(); - return true; -} - -bool CombinerHelper::matchXorOfAndWithSameReg( - MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { - // Match (xor (and x, y), y) (or any of its commuted cases) - assert(MI.getOpcode() == TargetOpcode::G_XOR); - Register &X = MatchInfo.first; - Register &Y = MatchInfo.second; - Register AndReg = MI.getOperand(1).getReg(); - Register SharedReg = MI.getOperand(2).getReg(); - - // Find a G_AND on either side of the G_XOR. - // Look for one of - // - // (xor (and x, y), SharedReg) - // (xor SharedReg, (and x, y)) - if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) { - std::swap(AndReg, SharedReg); - if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) - return false; - } - - // Only do this if we'll eliminate the G_AND. - if (!MRI.hasOneNonDBGUse(AndReg)) - return false; - - // We can combine if SharedReg is the same as either the LHS or RHS of the - // G_AND. - if (Y != SharedReg) - std::swap(X, Y); - return Y == SharedReg; -} - -bool CombinerHelper::applyXorOfAndWithSameReg( - MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { - // Fold (xor (and x, y), y) -> (and (not x), y) - Builder.setInstrAndDebugLoc(MI); - Register X, Y; - std::tie(X, Y) = MatchInfo; - auto Not = Builder.buildNot(MRI.getType(X), X); - Observer.changingInstr(MI); - MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND)); - MI.getOperand(1).setReg(Not->getOperand(0).getReg()); - MI.getOperand(2).setReg(Y); - Observer.changedInstr(MI); - return true; -} - -bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { - Register DstReg = MI.getOperand(0).getReg(); - LLT Ty = MRI.getType(DstReg); - const DataLayout &DL = Builder.getMF().getDataLayout(); - - if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace())) - return false; - - if (Ty.isPointer()) { - auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI); - return ConstVal && *ConstVal == 0; - } - - assert(Ty.isVector() && "Expecting a vector type"); - const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg()); - return isBuildVectorAllZeros(*VecMI, MRI); -} - -bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); - Builder.setInstrAndDebugLoc(MI); - Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2)); - MI.eraseFromParent(); - return true; -} - -/// The second source operand is known to be a power of 2. -bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { - Register DstReg = MI.getOperand(0).getReg(); - Register Src0 = MI.getOperand(1).getReg(); - Register Pow2Src1 = MI.getOperand(2).getReg(); - LLT Ty = MRI.getType(DstReg); - Builder.setInstrAndDebugLoc(MI); - - // Fold (urem x, pow2) -> (and x, pow2-1) - auto NegOne = Builder.buildConstant(Ty, -1); - auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne); - Builder.buildAnd(DstReg, Src0, Add); - MI.eraseFromParent(); - return true; -} - -Optional<SmallVector<Register, 8>> -CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { - assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!"); - // We want to detect if Root is part of a tree which represents a bunch - // of loads being merged into a larger load. We'll try to recognize patterns - // like, for example: - // - // Reg Reg - // \ / - // OR_1 Reg - // \ / - // OR_2 - // \ Reg - // .. / - // Root - // - // Reg Reg Reg Reg - // \ / \ / - // OR_1 OR_2 - // \ / - // \ / - // ... - // Root - // - // Each "Reg" may have been produced by a load + some arithmetic. This - // function will save each of them. - SmallVector<Register, 8> RegsToVisit; - SmallVector<const MachineInstr *, 7> Ors = {Root}; - - // In the "worst" case, we're dealing with a load for each byte. So, there - // are at most #bytes - 1 ORs. - const unsigned MaxIter = - MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1; - for (unsigned Iter = 0; Iter < MaxIter; ++Iter) { - if (Ors.empty()) - break; - const MachineInstr *Curr = Ors.pop_back_val(); - Register OrLHS = Curr->getOperand(1).getReg(); - Register OrRHS = Curr->getOperand(2).getReg(); - - // In the combine, we want to elimate the entire tree. - if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS)) - return None; - - // If it's a G_OR, save it and continue to walk. If it's not, then it's - // something that may be a load + arithmetic. - if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI)) - Ors.push_back(Or); - else - RegsToVisit.push_back(OrLHS); - if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI)) - Ors.push_back(Or); - else - RegsToVisit.push_back(OrRHS); - } - - // We're going to try and merge each register into a wider power-of-2 type, - // so we ought to have an even number of registers. - if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0) - return None; - return RegsToVisit; -} - -/// Helper function for findLoadOffsetsForLoadOrCombine. -/// -/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value, -/// and then moving that value into a specific byte offset. -/// -/// e.g. x[i] << 24 -/// -/// \returns The load instruction and the byte offset it is moved into. -static Optional<std::pair<MachineInstr *, int64_t>> -matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, - const MachineRegisterInfo &MRI) { - assert(MRI.hasOneNonDBGUse(Reg) && - "Expected Reg to only have one non-debug use?"); - Register MaybeLoad; - int64_t Shift; - if (!mi_match(Reg, MRI, - m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) { - Shift = 0; - MaybeLoad = Reg; - } - - if (Shift % MemSizeInBits != 0) - return None; - - // TODO: Handle other types of loads. - auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI); - if (!Load) - return None; - - const auto &MMO = **Load->memoperands_begin(); - if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits) - return None; - - return std::make_pair(Load, Shift / MemSizeInBits); -} - -Optional<std::pair<MachineInstr *, int64_t>> -CombinerHelper::findLoadOffsetsForLoadOrCombine( - SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, - const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) { - - // Each load found for the pattern. There should be one for each RegsToVisit. - SmallSetVector<const MachineInstr *, 8> Loads; - - // The lowest index used in any load. (The lowest "i" for each x[i].) - int64_t LowestIdx = INT64_MAX; - - // The load which uses the lowest index. - MachineInstr *LowestIdxLoad = nullptr; - - // Keeps track of the load indices we see. We shouldn't see any indices twice. - SmallSet<int64_t, 8> SeenIdx; - - // Ensure each load is in the same MBB. - // TODO: Support multiple MachineBasicBlocks. - MachineBasicBlock *MBB = nullptr; - const MachineMemOperand *MMO = nullptr; - - // Earliest instruction-order load in the pattern. - MachineInstr *EarliestLoad = nullptr; - - // Latest instruction-order load in the pattern. - MachineInstr *LatestLoad = nullptr; - - // Base pointer which every load should share. - Register BasePtr; - - // We want to find a load for each register. Each load should have some - // appropriate bit twiddling arithmetic. During this loop, we will also keep - // track of the load which uses the lowest index. Later, we will check if we - // can use its pointer in the final, combined load. - for (auto Reg : RegsToVisit) { - // Find the load, and find the position that it will end up in (e.g. a - // shifted) value. - auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI); - if (!LoadAndPos) - return None; - MachineInstr *Load; - int64_t DstPos; - std::tie(Load, DstPos) = *LoadAndPos; - - // TODO: Handle multiple MachineBasicBlocks. Currently not handled because - // it is difficult to check for stores/calls/etc between loads. - MachineBasicBlock *LoadMBB = Load->getParent(); - if (!MBB) - MBB = LoadMBB; - if (LoadMBB != MBB) - return None; - - // Make sure that the MachineMemOperands of every seen load are compatible. - const MachineMemOperand *LoadMMO = *Load->memoperands_begin(); - if (!MMO) - MMO = LoadMMO; - if (MMO->getAddrSpace() != LoadMMO->getAddrSpace()) - return None; - - // Find out what the base pointer and index for the load is. - Register LoadPtr; - int64_t Idx; - if (!mi_match(Load->getOperand(1).getReg(), MRI, - m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) { - LoadPtr = Load->getOperand(1).getReg(); - Idx = 0; - } - - // Don't combine things like a[i], a[i] -> a bigger load. - if (!SeenIdx.insert(Idx).second) - return None; - - // Every load must share the same base pointer; don't combine things like: - // - // a[i], b[i + 1] -> a bigger load. - if (!BasePtr.isValid()) - BasePtr = LoadPtr; - if (BasePtr != LoadPtr) - return None; - - if (Idx < LowestIdx) { - LowestIdx = Idx; - LowestIdxLoad = Load; - } - - // Keep track of the byte offset that this load ends up at. If we have seen - // the byte offset, then stop here. We do not want to combine: - // - // a[i] << 16, a[i + k] << 16 -> a bigger load. - if (!MemOffset2Idx.try_emplace(DstPos, Idx).second) - return None; - Loads.insert(Load); - - // Keep track of the position of the earliest/latest loads in the pattern. - // We will check that there are no load fold barriers between them later - // on. - // - // FIXME: Is there a better way to check for load fold barriers? - if (!EarliestLoad || dominates(*Load, *EarliestLoad)) - EarliestLoad = Load; - if (!LatestLoad || dominates(*LatestLoad, *Load)) - LatestLoad = Load; - } - - // We found a load for each register. Let's check if each load satisfies the - // pattern. - assert(Loads.size() == RegsToVisit.size() && - "Expected to find a load for each register?"); - assert(EarliestLoad != LatestLoad && EarliestLoad && - LatestLoad && "Expected at least two loads?"); - - // Check if there are any stores, calls, etc. between any of the loads. If - // there are, then we can't safely perform the combine. - // - // MaxIter is chosen based off the (worst case) number of iterations it - // typically takes to succeed in the LLVM test suite plus some padding. - // - // FIXME: Is there a better way to check for load fold barriers? - const unsigned MaxIter = 20; - unsigned Iter = 0; - for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(), - LatestLoad->getIterator())) { - if (Loads.count(&MI)) - continue; - if (MI.isLoadFoldBarrier()) - return None; - if (Iter++ == MaxIter) - return None; - } - - return std::make_pair(LowestIdxLoad, LowestIdx); -} - -bool CombinerHelper::matchLoadOrCombine( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_OR); - MachineFunction &MF = *MI.getMF(); - // Assuming a little-endian target, transform: - // s8 *a = ... - // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) - // => - // s32 val = *((i32)a) - // - // s8 *a = ... - // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] - // => - // s32 val = BSWAP(*((s32)a)) - Register Dst = MI.getOperand(0).getReg(); - LLT Ty = MRI.getType(Dst); - if (Ty.isVector()) - return false; - - // We need to combine at least two loads into this type. Since the smallest - // possible load is into a byte, we need at least a 16-bit wide type. - const unsigned WideMemSizeInBits = Ty.getSizeInBits(); - if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0) - return false; - - // Match a collection of non-OR instructions in the pattern. - auto RegsToVisit = findCandidatesForLoadOrCombine(&MI); - if (!RegsToVisit) - return false; - - // We have a collection of non-OR instructions. Figure out how wide each of - // the small loads should be based off of the number of potential loads we - // found. - const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size(); - if (NarrowMemSizeInBits % 8 != 0) - return false; - - // Check if each register feeding into each OR is a load from the same - // base pointer + some arithmetic. - // - // e.g. a[0], a[1] << 8, a[2] << 16, etc. - // - // Also verify that each of these ends up putting a[i] into the same memory - // offset as a load into a wide type would. - SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx; - MachineInstr *LowestIdxLoad; - int64_t LowestIdx; - auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine( - MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits); - if (!MaybeLoadInfo) - return false; - std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo; - - // We have a bunch of loads being OR'd together. Using the addresses + offsets - // we found before, check if this corresponds to a big or little endian byte - // pattern. If it does, then we can represent it using a load + possibly a - // BSWAP. - bool IsBigEndianTarget = MF.getDataLayout().isBigEndian(); - Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx); - if (!IsBigEndian.hasValue()) - return false; - bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian; - if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}})) - return false; - - // Make sure that the load from the lowest index produces offset 0 in the - // final value. - // - // This ensures that we won't combine something like this: - // - // load x[i] -> byte 2 - // load x[i+1] -> byte 0 ---> wide_load x[i] - // load x[i+2] -> byte 1 - const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits; - const unsigned ZeroByteOffset = - *IsBigEndian - ? bigEndianByteAt(NumLoadsInTy, 0) - : littleEndianByteAt(NumLoadsInTy, 0); - auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset); - if (ZeroOffsetIdx == MemOffset2Idx.end() || - ZeroOffsetIdx->second != LowestIdx) - return false; - - // We wil reuse the pointer from the load which ends up at byte offset 0. It - // may not use index 0. - Register Ptr = LowestIdxLoad->getOperand(1).getReg(); - const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin(); - LegalityQuery::MemDesc MMDesc; - MMDesc.SizeInBits = WideMemSizeInBits; - MMDesc.AlignInBits = MMO.getAlign().value() * 8; - MMDesc.Ordering = MMO.getOrdering(); - if (!isLegalOrBeforeLegalizer( - {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}})) - return false; - auto PtrInfo = MMO.getPointerInfo(); - auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8); - - // Load must be allowed and fast on the target. - LLVMContext &C = MF.getFunction().getContext(); - auto &DL = MF.getDataLayout(); - bool Fast = false; - if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) || - !Fast) - return false; - - MatchInfo = [=](MachineIRBuilder &MIB) { - Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst; - MIB.buildLoad(LoadDst, Ptr, *NewMMO); - if (NeedsBSwap) - MIB.buildBSwap(Dst, LoadDst); - }; - return true; -} - -bool CombinerHelper::applyLoadOrCombine( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { - Builder.setInstrAndDebugLoc(MI); - MatchInfo(Builder); - MI.eraseFromParent(); - return true; -} - +bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( + MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { + // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ... + // + // Creates the new hand + logic instruction (but does not insert them.) + // + // On success, MatchInfo is populated with the new instructions. These are + // inserted in applyHoistLogicOpWithSameOpcodeHands. + unsigned LogicOpcode = MI.getOpcode(); + assert(LogicOpcode == TargetOpcode::G_AND || + LogicOpcode == TargetOpcode::G_OR || + LogicOpcode == TargetOpcode::G_XOR); + MachineIRBuilder MIB(MI); + Register Dst = MI.getOperand(0).getReg(); + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + + // Don't recompute anything. + if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg)) + return false; + + // Make sure we have (hand x, ...), (hand y, ...) + MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI); + MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI); + if (!LeftHandInst || !RightHandInst) + return false; + unsigned HandOpcode = LeftHandInst->getOpcode(); + if (HandOpcode != RightHandInst->getOpcode()) + return false; + if (!LeftHandInst->getOperand(1).isReg() || + !RightHandInst->getOperand(1).isReg()) + return false; + + // Make sure the types match up, and if we're doing this post-legalization, + // we end up with legal types. + Register X = LeftHandInst->getOperand(1).getReg(); + Register Y = RightHandInst->getOperand(1).getReg(); + LLT XTy = MRI.getType(X); + LLT YTy = MRI.getType(Y); + if (XTy != YTy) + return false; + if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}})) + return false; + + // Optional extra source register. + Register ExtraHandOpSrcReg; + switch (HandOpcode) { + default: + return false; + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: { + // Match: logic (ext X), (ext Y) --> ext (logic X, Y) + break; + } + case TargetOpcode::G_AND: + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_SHL: { + // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z + MachineOperand &ZOp = LeftHandInst->getOperand(2); + if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2))) + return false; + ExtraHandOpSrcReg = ZOp.getReg(); + break; + } + } + + // Record the steps to build the new instructions. + // + // Steps to build (logic x, y) + auto NewLogicDst = MRI.createGenericVirtualRegister(XTy); + OperandBuildSteps LogicBuildSteps = { + [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(X); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }}; + InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps); + + // Steps to build hand (logic x, y), ...z + OperandBuildSteps HandBuildSteps = { + [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }}; + if (ExtraHandOpSrcReg.isValid()) + HandBuildSteps.push_back( + [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); }); + InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps); + + MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps}); + return true; +} + +bool CombinerHelper::applyBuildInstructionSteps( + MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { + assert(MatchInfo.InstrsToBuild.size() && + "Expected at least one instr to build?"); + Builder.setInstr(MI); + for (auto &InstrToBuild : MatchInfo.InstrsToBuild) { + assert(InstrToBuild.Opcode && "Expected a valid opcode?"); + assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?"); + MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode); + for (auto &OperandFn : InstrToBuild.OperandFns) + OperandFn(Instr); + } + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchAshrShlToSextInreg( + MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_ASHR); + int64_t ShlCst, AshrCst; + Register Src; + // FIXME: detect splat constant vectors. + if (!mi_match(MI.getOperand(0).getReg(), MRI, + m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst)))) + return false; + if (ShlCst != AshrCst) + return false; + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}})) + return false; + MatchInfo = std::make_tuple(Src, ShlCst); + return true; +} +bool CombinerHelper::applyAshShlToSextInreg( + MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_ASHR); + Register Src; + int64_t ShiftAmt; + std::tie(Src, ShiftAmt) = MatchInfo; + unsigned Size = MRI.getType(Src).getScalarSizeInBits(); + Builder.setInstrAndDebugLoc(MI); + Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, + Register &Replacement) { + // Given + // + // %y:_(sN) = G_SOMETHING + // %x:_(sN) = G_SOMETHING + // %res:_(sN) = G_AND %x, %y + // + // Eliminate the G_AND when it is known that x & y == x or x & y == y. + // + // Patterns like this can appear as a result of legalization. E.g. + // + // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y + // %one:_(s32) = G_CONSTANT i32 1 + // %and:_(s32) = G_AND %cmp, %one + // + // In this case, G_ICMP only produces a single bit, so x & 1 == x. + assert(MI.getOpcode() == TargetOpcode::G_AND); + if (!KB) + return false; + + Register AndDst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(AndDst); + + // FIXME: This should be removed once GISelKnownBits supports vectors. + if (DstTy.isVector()) + return false; + + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + KnownBits LHSBits = KB->getKnownBits(LHS); + KnownBits RHSBits = KB->getKnownBits(RHS); + + // Check that x & Mask == x. + // x & 1 == x, always + // x & 0 == x, only if x is also 0 + // Meaning Mask has no effect if every bit is either one in Mask or zero in x. + // + // Check if we can replace AndDst with the LHS of the G_AND + if (canReplaceReg(AndDst, LHS, MRI) && + (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { + Replacement = LHS; + return true; + } + + // Check if we can replace AndDst with the RHS of the G_AND + if (canReplaceReg(AndDst, RHS, MRI) && + (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { + Replacement = RHS; + return true; + } + + return false; +} + +bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) { + // Given + // + // %y:_(sN) = G_SOMETHING + // %x:_(sN) = G_SOMETHING + // %res:_(sN) = G_OR %x, %y + // + // Eliminate the G_OR when it is known that x | y == x or x | y == y. + assert(MI.getOpcode() == TargetOpcode::G_OR); + if (!KB) + return false; + + Register OrDst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(OrDst); + + // FIXME: This should be removed once GISelKnownBits supports vectors. + if (DstTy.isVector()) + return false; + + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + KnownBits LHSBits = KB->getKnownBits(LHS); + KnownBits RHSBits = KB->getKnownBits(RHS); + + // Check that x | Mask == x. + // x | 0 == x, always + // x | 1 == x, only if x is also 1 + // Meaning Mask has no effect if every bit is either zero in Mask or one in x. + // + // Check if we can replace OrDst with the LHS of the G_OR + if (canReplaceReg(OrDst, LHS, MRI) && + (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { + Replacement = LHS; + return true; + } + + // Check if we can replace OrDst with the RHS of the G_OR + if (canReplaceReg(OrDst, RHS, MRI) && + (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { + Replacement = RHS; + return true; + } + + return false; +} + +bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) { + // If the input is already sign extended, just drop the extension. + Register Src = MI.getOperand(1).getReg(); + unsigned ExtBits = MI.getOperand(2).getImm(); + unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits(); + return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1); +} + +static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, + int64_t Cst, bool IsVector, bool IsFP) { + // For i1, Cst will always be -1 regardless of boolean contents. + return (ScalarSizeBits == 1 && Cst == -1) || + isConstTrueVal(TLI, Cst, IsVector, IsFP); +} + +bool CombinerHelper::matchNotCmp(MachineInstr &MI, + SmallVectorImpl<Register> &RegsToNegate) { + assert(MI.getOpcode() == TargetOpcode::G_XOR); + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering(); + Register XorSrc; + Register CstReg; + // We match xor(src, true) here. + if (!mi_match(MI.getOperand(0).getReg(), MRI, + m_GXor(m_Reg(XorSrc), m_Reg(CstReg)))) + return false; + + if (!MRI.hasOneNonDBGUse(XorSrc)) + return false; + + // Check that XorSrc is the root of a tree of comparisons combined with ANDs + // and ORs. The suffix of RegsToNegate starting from index I is used a work + // list of tree nodes to visit. + RegsToNegate.push_back(XorSrc); + // Remember whether the comparisons are all integer or all floating point. + bool IsInt = false; + bool IsFP = false; + for (unsigned I = 0; I < RegsToNegate.size(); ++I) { + Register Reg = RegsToNegate[I]; + if (!MRI.hasOneNonDBGUse(Reg)) + return false; + MachineInstr *Def = MRI.getVRegDef(Reg); + switch (Def->getOpcode()) { + default: + // Don't match if the tree contains anything other than ANDs, ORs and + // comparisons. + return false; + case TargetOpcode::G_ICMP: + if (IsFP) + return false; + IsInt = true; + // When we apply the combine we will invert the predicate. + break; + case TargetOpcode::G_FCMP: + if (IsInt) + return false; + IsFP = true; + // When we apply the combine we will invert the predicate. + break; + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + // Implement De Morgan's laws: + // ~(x & y) -> ~x | ~y + // ~(x | y) -> ~x & ~y + // When we apply the combine we will change the opcode and recursively + // negate the operands. + RegsToNegate.push_back(Def->getOperand(1).getReg()); + RegsToNegate.push_back(Def->getOperand(2).getReg()); + break; + } + } + + // Now we know whether the comparisons are integer or floating point, check + // the constant in the xor. + int64_t Cst; + if (Ty.isVector()) { + MachineInstr *CstDef = MRI.getVRegDef(CstReg); + auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI); + if (!MaybeCst) + return false; + if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP)) + return false; + } else { + if (!mi_match(CstReg, MRI, m_ICst(Cst))) + return false; + if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP)) + return false; + } + + return true; +} + +bool CombinerHelper::applyNotCmp(MachineInstr &MI, + SmallVectorImpl<Register> &RegsToNegate) { + for (Register Reg : RegsToNegate) { + MachineInstr *Def = MRI.getVRegDef(Reg); + Observer.changingInstr(*Def); + // For each comparison, invert the opcode. For each AND and OR, change the + // opcode. + switch (Def->getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case TargetOpcode::G_ICMP: + case TargetOpcode::G_FCMP: { + MachineOperand &PredOp = Def->getOperand(1); + CmpInst::Predicate NewP = CmpInst::getInversePredicate( + (CmpInst::Predicate)PredOp.getPredicate()); + PredOp.setPredicate(NewP); + break; + } + case TargetOpcode::G_AND: + Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR)); + break; + case TargetOpcode::G_OR: + Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND)); + break; + } + Observer.changedInstr(*Def); + } + + replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchXorOfAndWithSameReg( + MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { + // Match (xor (and x, y), y) (or any of its commuted cases) + assert(MI.getOpcode() == TargetOpcode::G_XOR); + Register &X = MatchInfo.first; + Register &Y = MatchInfo.second; + Register AndReg = MI.getOperand(1).getReg(); + Register SharedReg = MI.getOperand(2).getReg(); + + // Find a G_AND on either side of the G_XOR. + // Look for one of + // + // (xor (and x, y), SharedReg) + // (xor SharedReg, (and x, y)) + if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) { + std::swap(AndReg, SharedReg); + if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) + return false; + } + + // Only do this if we'll eliminate the G_AND. + if (!MRI.hasOneNonDBGUse(AndReg)) + return false; + + // We can combine if SharedReg is the same as either the LHS or RHS of the + // G_AND. + if (Y != SharedReg) + std::swap(X, Y); + return Y == SharedReg; +} + +bool CombinerHelper::applyXorOfAndWithSameReg( + MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { + // Fold (xor (and x, y), y) -> (and (not x), y) + Builder.setInstrAndDebugLoc(MI); + Register X, Y; + std::tie(X, Y) = MatchInfo; + auto Not = Builder.buildNot(MRI.getType(X), X); + Observer.changingInstr(MI); + MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND)); + MI.getOperand(1).setReg(Not->getOperand(0).getReg()); + MI.getOperand(2).setReg(Y); + Observer.changedInstr(MI); + return true; +} + +bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + const DataLayout &DL = Builder.getMF().getDataLayout(); + + if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace())) + return false; + + if (Ty.isPointer()) { + auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI); + return ConstVal && *ConstVal == 0; + } + + assert(Ty.isVector() && "Expecting a vector type"); + const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg()); + return isBuildVectorAllZeros(*VecMI, MRI); +} + +bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); + Builder.setInstrAndDebugLoc(MI); + Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2)); + MI.eraseFromParent(); + return true; +} + +/// The second source operand is known to be a power of 2. +bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Pow2Src1 = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(DstReg); + Builder.setInstrAndDebugLoc(MI); + + // Fold (urem x, pow2) -> (and x, pow2-1) + auto NegOne = Builder.buildConstant(Ty, -1); + auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne); + Builder.buildAnd(DstReg, Src0, Add); + MI.eraseFromParent(); + return true; +} + +Optional<SmallVector<Register, 8>> +CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { + assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!"); + // We want to detect if Root is part of a tree which represents a bunch + // of loads being merged into a larger load. We'll try to recognize patterns + // like, for example: + // + // Reg Reg + // \ / + // OR_1 Reg + // \ / + // OR_2 + // \ Reg + // .. / + // Root + // + // Reg Reg Reg Reg + // \ / \ / + // OR_1 OR_2 + // \ / + // \ / + // ... + // Root + // + // Each "Reg" may have been produced by a load + some arithmetic. This + // function will save each of them. + SmallVector<Register, 8> RegsToVisit; + SmallVector<const MachineInstr *, 7> Ors = {Root}; + + // In the "worst" case, we're dealing with a load for each byte. So, there + // are at most #bytes - 1 ORs. + const unsigned MaxIter = + MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1; + for (unsigned Iter = 0; Iter < MaxIter; ++Iter) { + if (Ors.empty()) + break; + const MachineInstr *Curr = Ors.pop_back_val(); + Register OrLHS = Curr->getOperand(1).getReg(); + Register OrRHS = Curr->getOperand(2).getReg(); + + // In the combine, we want to elimate the entire tree. + if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS)) + return None; + + // If it's a G_OR, save it and continue to walk. If it's not, then it's + // something that may be a load + arithmetic. + if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI)) + Ors.push_back(Or); + else + RegsToVisit.push_back(OrLHS); + if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI)) + Ors.push_back(Or); + else + RegsToVisit.push_back(OrRHS); + } + + // We're going to try and merge each register into a wider power-of-2 type, + // so we ought to have an even number of registers. + if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0) + return None; + return RegsToVisit; +} + +/// Helper function for findLoadOffsetsForLoadOrCombine. +/// +/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value, +/// and then moving that value into a specific byte offset. +/// +/// e.g. x[i] << 24 +/// +/// \returns The load instruction and the byte offset it is moved into. +static Optional<std::pair<MachineInstr *, int64_t>> +matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, + const MachineRegisterInfo &MRI) { + assert(MRI.hasOneNonDBGUse(Reg) && + "Expected Reg to only have one non-debug use?"); + Register MaybeLoad; + int64_t Shift; + if (!mi_match(Reg, MRI, + m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) { + Shift = 0; + MaybeLoad = Reg; + } + + if (Shift % MemSizeInBits != 0) + return None; + + // TODO: Handle other types of loads. + auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI); + if (!Load) + return None; + + const auto &MMO = **Load->memoperands_begin(); + if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits) + return None; + + return std::make_pair(Load, Shift / MemSizeInBits); +} + +Optional<std::pair<MachineInstr *, int64_t>> +CombinerHelper::findLoadOffsetsForLoadOrCombine( + SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, + const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) { + + // Each load found for the pattern. There should be one for each RegsToVisit. + SmallSetVector<const MachineInstr *, 8> Loads; + + // The lowest index used in any load. (The lowest "i" for each x[i].) + int64_t LowestIdx = INT64_MAX; + + // The load which uses the lowest index. + MachineInstr *LowestIdxLoad = nullptr; + + // Keeps track of the load indices we see. We shouldn't see any indices twice. + SmallSet<int64_t, 8> SeenIdx; + + // Ensure each load is in the same MBB. + // TODO: Support multiple MachineBasicBlocks. + MachineBasicBlock *MBB = nullptr; + const MachineMemOperand *MMO = nullptr; + + // Earliest instruction-order load in the pattern. + MachineInstr *EarliestLoad = nullptr; + + // Latest instruction-order load in the pattern. + MachineInstr *LatestLoad = nullptr; + + // Base pointer which every load should share. + Register BasePtr; + + // We want to find a load for each register. Each load should have some + // appropriate bit twiddling arithmetic. During this loop, we will also keep + // track of the load which uses the lowest index. Later, we will check if we + // can use its pointer in the final, combined load. + for (auto Reg : RegsToVisit) { + // Find the load, and find the position that it will end up in (e.g. a + // shifted) value. + auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI); + if (!LoadAndPos) + return None; + MachineInstr *Load; + int64_t DstPos; + std::tie(Load, DstPos) = *LoadAndPos; + + // TODO: Handle multiple MachineBasicBlocks. Currently not handled because + // it is difficult to check for stores/calls/etc between loads. + MachineBasicBlock *LoadMBB = Load->getParent(); + if (!MBB) + MBB = LoadMBB; + if (LoadMBB != MBB) + return None; + + // Make sure that the MachineMemOperands of every seen load are compatible. + const MachineMemOperand *LoadMMO = *Load->memoperands_begin(); + if (!MMO) + MMO = LoadMMO; + if (MMO->getAddrSpace() != LoadMMO->getAddrSpace()) + return None; + + // Find out what the base pointer and index for the load is. + Register LoadPtr; + int64_t Idx; + if (!mi_match(Load->getOperand(1).getReg(), MRI, + m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) { + LoadPtr = Load->getOperand(1).getReg(); + Idx = 0; + } + + // Don't combine things like a[i], a[i] -> a bigger load. + if (!SeenIdx.insert(Idx).second) + return None; + + // Every load must share the same base pointer; don't combine things like: + // + // a[i], b[i + 1] -> a bigger load. + if (!BasePtr.isValid()) + BasePtr = LoadPtr; + if (BasePtr != LoadPtr) + return None; + + if (Idx < LowestIdx) { + LowestIdx = Idx; + LowestIdxLoad = Load; + } + + // Keep track of the byte offset that this load ends up at. If we have seen + // the byte offset, then stop here. We do not want to combine: + // + // a[i] << 16, a[i + k] << 16 -> a bigger load. + if (!MemOffset2Idx.try_emplace(DstPos, Idx).second) + return None; + Loads.insert(Load); + + // Keep track of the position of the earliest/latest loads in the pattern. + // We will check that there are no load fold barriers between them later + // on. + // + // FIXME: Is there a better way to check for load fold barriers? + if (!EarliestLoad || dominates(*Load, *EarliestLoad)) + EarliestLoad = Load; + if (!LatestLoad || dominates(*LatestLoad, *Load)) + LatestLoad = Load; + } + + // We found a load for each register. Let's check if each load satisfies the + // pattern. + assert(Loads.size() == RegsToVisit.size() && + "Expected to find a load for each register?"); + assert(EarliestLoad != LatestLoad && EarliestLoad && + LatestLoad && "Expected at least two loads?"); + + // Check if there are any stores, calls, etc. between any of the loads. If + // there are, then we can't safely perform the combine. + // + // MaxIter is chosen based off the (worst case) number of iterations it + // typically takes to succeed in the LLVM test suite plus some padding. + // + // FIXME: Is there a better way to check for load fold barriers? + const unsigned MaxIter = 20; + unsigned Iter = 0; + for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(), + LatestLoad->getIterator())) { + if (Loads.count(&MI)) + continue; + if (MI.isLoadFoldBarrier()) + return None; + if (Iter++ == MaxIter) + return None; + } + + return std::make_pair(LowestIdxLoad, LowestIdx); +} + +bool CombinerHelper::matchLoadOrCombine( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_OR); + MachineFunction &MF = *MI.getMF(); + // Assuming a little-endian target, transform: + // s8 *a = ... + // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) + // => + // s32 val = *((i32)a) + // + // s8 *a = ... + // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] + // => + // s32 val = BSWAP(*((s32)a)) + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + if (Ty.isVector()) + return false; + + // We need to combine at least two loads into this type. Since the smallest + // possible load is into a byte, we need at least a 16-bit wide type. + const unsigned WideMemSizeInBits = Ty.getSizeInBits(); + if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0) + return false; + + // Match a collection of non-OR instructions in the pattern. + auto RegsToVisit = findCandidatesForLoadOrCombine(&MI); + if (!RegsToVisit) + return false; + + // We have a collection of non-OR instructions. Figure out how wide each of + // the small loads should be based off of the number of potential loads we + // found. + const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size(); + if (NarrowMemSizeInBits % 8 != 0) + return false; + + // Check if each register feeding into each OR is a load from the same + // base pointer + some arithmetic. + // + // e.g. a[0], a[1] << 8, a[2] << 16, etc. + // + // Also verify that each of these ends up putting a[i] into the same memory + // offset as a load into a wide type would. + SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx; + MachineInstr *LowestIdxLoad; + int64_t LowestIdx; + auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine( + MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits); + if (!MaybeLoadInfo) + return false; + std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo; + + // We have a bunch of loads being OR'd together. Using the addresses + offsets + // we found before, check if this corresponds to a big or little endian byte + // pattern. If it does, then we can represent it using a load + possibly a + // BSWAP. + bool IsBigEndianTarget = MF.getDataLayout().isBigEndian(); + Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx); + if (!IsBigEndian.hasValue()) + return false; + bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian; + if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}})) + return false; + + // Make sure that the load from the lowest index produces offset 0 in the + // final value. + // + // This ensures that we won't combine something like this: + // + // load x[i] -> byte 2 + // load x[i+1] -> byte 0 ---> wide_load x[i] + // load x[i+2] -> byte 1 + const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits; + const unsigned ZeroByteOffset = + *IsBigEndian + ? bigEndianByteAt(NumLoadsInTy, 0) + : littleEndianByteAt(NumLoadsInTy, 0); + auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset); + if (ZeroOffsetIdx == MemOffset2Idx.end() || + ZeroOffsetIdx->second != LowestIdx) + return false; + + // We wil reuse the pointer from the load which ends up at byte offset 0. It + // may not use index 0. + Register Ptr = LowestIdxLoad->getOperand(1).getReg(); + const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin(); + LegalityQuery::MemDesc MMDesc; + MMDesc.SizeInBits = WideMemSizeInBits; + MMDesc.AlignInBits = MMO.getAlign().value() * 8; + MMDesc.Ordering = MMO.getOrdering(); + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}})) + return false; + auto PtrInfo = MMO.getPointerInfo(); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8); + + // Load must be allowed and fast on the target. + LLVMContext &C = MF.getFunction().getContext(); + auto &DL = MF.getDataLayout(); + bool Fast = false; + if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) || + !Fast) + return false; + + MatchInfo = [=](MachineIRBuilder &MIB) { + Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst; + MIB.buildLoad(LoadDst, Ptr, *NewMMO); + if (NeedsBSwap) + MIB.buildBSwap(Dst, LoadDst); + }; + return true; +} + +bool CombinerHelper::applyLoadOrCombine( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + Builder.setInstrAndDebugLoc(MI); + MatchInfo(Builder); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp index 59f4d60a41..6bc72e4aa9 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp @@ -16,7 +16,7 @@ using namespace llvm; void GISelChangeObserver::changingAllUsesOfReg( - const MachineRegisterInfo &MRI, Register Reg) { + const MachineRegisterInfo &MRI, Register Reg) { for (auto &ChangingMI : MRI.use_instructions(Reg)) { changingInstr(ChangingMI); ChangingAllUsesOfReg.insert(&ChangingMI); diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 2de20489e1..e38ede1b67 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -94,25 +94,25 @@ dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { << "\n"; } -/// Compute known bits for the intersection of \p Src0 and \p Src1 -void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1, - KnownBits &Known, - const APInt &DemandedElts, - unsigned Depth) { - // Test src1 first, since we canonicalize simpler expressions to the RHS. - computeKnownBitsImpl(Src1, Known, DemandedElts, Depth); - - // If we don't know any bits, early out. - if (Known.isUnknown()) - return; - - KnownBits Known2; - computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth); - - // Only known if known in both the LHS and RHS. - Known = KnownBits::commonBits(Known, Known2); -} - +/// Compute known bits for the intersection of \p Src0 and \p Src1 +void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1, + KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth) { + // Test src1 first, since we canonicalize simpler expressions to the RHS. + computeKnownBitsImpl(Src1, Known, DemandedElts, Depth); + + // If we don't know any bits, early out. + if (Known.isUnknown()) + return; + + KnownBits Known2; + computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth); + + // Only known if known in both the LHS and RHS. + Known = KnownBits::commonBits(Known, Known2); +} + void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, const APInt &DemandedElts, unsigned Depth) { @@ -200,7 +200,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, // For COPYs we don't do anything, don't increase the depth. computeKnownBitsImpl(SrcReg, Known2, DemandedElts, Depth + (Opcode != TargetOpcode::COPY)); - Known = KnownBits::commonBits(Known, Known2); + Known = KnownBits::commonBits(Known, Known2); // If we reach a point where we don't know anything // just stop looking through the operands. if (Known.One == 0 && Known.Zero == 0) @@ -217,7 +217,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, auto CstVal = getConstantVRegVal(R, MRI); if (!CstVal) break; - Known = KnownBits::makeConstant(*CstVal); + Known = KnownBits::makeConstant(*CstVal); break; } case TargetOpcode::G_FRAME_INDEX: { @@ -284,52 +284,52 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Depth + 1); computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, Depth + 1); - Known = KnownBits::computeForMul(Known, Known2); + Known = KnownBits::computeForMul(Known, Known2); break; } case TargetOpcode::G_SELECT: { - computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(), - Known, DemandedElts, Depth + 1); - break; - } - case TargetOpcode::G_SMIN: { - // TODO: Handle clamp pattern with number of sign bits - KnownBits KnownRHS; - computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(), + Known, DemandedElts, Depth + 1); + break; + } + case TargetOpcode::G_SMIN: { + // TODO: Handle clamp pattern with number of sign bits + KnownBits KnownRHS; + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, Depth + 1); - computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, + computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, Depth + 1); - Known = KnownBits::smin(Known, KnownRHS); - break; - } - case TargetOpcode::G_SMAX: { - // TODO: Handle clamp pattern with number of sign bits - KnownBits KnownRHS; - computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, - Depth + 1); - computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, - Depth + 1); - Known = KnownBits::smax(Known, KnownRHS); - break; - } - case TargetOpcode::G_UMIN: { - KnownBits KnownRHS; - computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, - DemandedElts, Depth + 1); - computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, - DemandedElts, Depth + 1); - Known = KnownBits::umin(Known, KnownRHS); - break; - } - case TargetOpcode::G_UMAX: { - KnownBits KnownRHS; - computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, - DemandedElts, Depth + 1); - computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, - DemandedElts, Depth + 1); - Known = KnownBits::umax(Known, KnownRHS); + Known = KnownBits::smin(Known, KnownRHS); break; } + case TargetOpcode::G_SMAX: { + // TODO: Handle clamp pattern with number of sign bits + KnownBits KnownRHS; + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, + Depth + 1); + Known = KnownBits::smax(Known, KnownRHS); + break; + } + case TargetOpcode::G_UMIN: { + KnownBits KnownRHS; + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, + DemandedElts, Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, + DemandedElts, Depth + 1); + Known = KnownBits::umin(Known, KnownRHS); + break; + } + case TargetOpcode::G_UMAX: { + KnownBits KnownRHS; + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, + DemandedElts, Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, + DemandedElts, Depth + 1); + Known = KnownBits::umax(Known, KnownRHS); + break; + } case TargetOpcode::G_FCMP: case TargetOpcode::G_ICMP: { if (TL.getBooleanContents(DstTy.isVector(), @@ -347,58 +347,58 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Known = Known.sext(BitWidth); break; } - case TargetOpcode::G_SEXT_INREG: { - computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, - Depth + 1); - Known = Known.sextInReg(MI.getOperand(2).getImm()); - break; - } + case TargetOpcode::G_SEXT_INREG: { + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + Known = Known.sextInReg(MI.getOperand(2).getImm()); + break; + } case TargetOpcode::G_ANYEXT: { computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, Depth + 1); - Known = Known.anyext(BitWidth); + Known = Known.anyext(BitWidth); break; } case TargetOpcode::G_LOAD: { - const MachineMemOperand *MMO = *MI.memoperands_begin(); - if (const MDNode *Ranges = MMO->getRanges()) { - computeKnownBitsFromRangeMetadata(*Ranges, Known); + const MachineMemOperand *MMO = *MI.memoperands_begin(); + if (const MDNode *Ranges = MMO->getRanges()) { + computeKnownBitsFromRangeMetadata(*Ranges, Known); } - + break; } case TargetOpcode::G_ZEXTLOAD: { // Everything above the retrieved bits is zero - Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits()); + Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits()); break; } - case TargetOpcode::G_ASHR: { - KnownBits LHSKnown, RHSKnown; - computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, - Depth + 1); + case TargetOpcode::G_ASHR: { + KnownBits LHSKnown, RHSKnown; + computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, + Depth + 1); computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, Depth + 1); - Known = KnownBits::ashr(LHSKnown, RHSKnown); - break; - } - case TargetOpcode::G_LSHR: { - KnownBits LHSKnown, RHSKnown; - computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, - Depth + 1); - computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, - Depth + 1); - Known = KnownBits::lshr(LHSKnown, RHSKnown); - break; - } - case TargetOpcode::G_SHL: { - KnownBits LHSKnown, RHSKnown; - computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, - Depth + 1); - computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, + Known = KnownBits::ashr(LHSKnown, RHSKnown); + break; + } + case TargetOpcode::G_LSHR: { + KnownBits LHSKnown, RHSKnown; + computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, Depth + 1); - Known = KnownBits::shl(LHSKnown, RHSKnown); + computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, + Depth + 1); + Known = KnownBits::lshr(LHSKnown, RHSKnown); break; } + case TargetOpcode::G_SHL: { + KnownBits LHSKnown, RHSKnown; + computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, + Depth + 1); + Known = KnownBits::shl(LHSKnown, RHSKnown); + break; + } case TargetOpcode::G_INTTOPTR: case TargetOpcode::G_PTRTOINT: // Fall through and handle them the same as zext/trunc. @@ -418,50 +418,50 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Known.Zero.setBitsFrom(SrcBitWidth); break; } - case TargetOpcode::G_MERGE_VALUES: { - unsigned NumOps = MI.getNumOperands(); - unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); - - for (unsigned I = 0; I != NumOps - 1; ++I) { - KnownBits SrcOpKnown; - computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown, - DemandedElts, Depth + 1); - Known.insertBits(SrcOpKnown, I * OpSize); - } - break; - } - case TargetOpcode::G_UNMERGE_VALUES: { - unsigned NumOps = MI.getNumOperands(); - Register SrcReg = MI.getOperand(NumOps - 1).getReg(); - if (MRI.getType(SrcReg).isVector()) - return; // TODO: Handle vectors. - - KnownBits SrcOpKnown; - computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1); - - // Figure out the result operand index - unsigned DstIdx = 0; - for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R; - ++DstIdx) - ; - - Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx); - break; - } - case TargetOpcode::G_BSWAP: { - Register SrcReg = MI.getOperand(1).getReg(); - computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); - Known.byteSwap(); - break; - } - case TargetOpcode::G_BITREVERSE: { - Register SrcReg = MI.getOperand(1).getReg(); - computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); - Known.reverseBits(); - break; - } - } - + case TargetOpcode::G_MERGE_VALUES: { + unsigned NumOps = MI.getNumOperands(); + unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + + for (unsigned I = 0; I != NumOps - 1; ++I) { + KnownBits SrcOpKnown; + computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown, + DemandedElts, Depth + 1); + Known.insertBits(SrcOpKnown, I * OpSize); + } + break; + } + case TargetOpcode::G_UNMERGE_VALUES: { + unsigned NumOps = MI.getNumOperands(); + Register SrcReg = MI.getOperand(NumOps - 1).getReg(); + if (MRI.getType(SrcReg).isVector()) + return; // TODO: Handle vectors. + + KnownBits SrcOpKnown; + computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1); + + // Figure out the result operand index + unsigned DstIdx = 0; + for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R; + ++DstIdx) + ; + + Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx); + break; + } + case TargetOpcode::G_BSWAP: { + Register SrcReg = MI.getOperand(1).getReg(); + computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); + Known.byteSwap(); + break; + } + case TargetOpcode::G_BITREVERSE: { + Register SrcReg = MI.getOperand(1).getReg(); + computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); + Known.reverseBits(); + break; + } + } + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); LLVM_DEBUG(dumpResult(MI, Known, Depth)); @@ -469,17 +469,17 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, ComputeKnownBitsCache[R] = Known; } -/// Compute number of sign bits for the intersection of \p Src0 and \p Src1 -unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1, - const APInt &DemandedElts, - unsigned Depth) { - // Test src1 first, since we canonicalize simpler expressions to the RHS. - unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth); - if (Src1SignBits == 1) - return 1; - return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits); -} - +/// Compute number of sign bits for the intersection of \p Src0 and \p Src1 +unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1, + const APInt &DemandedElts, + unsigned Depth) { + // Test src1 first, since we canonicalize simpler expressions to the RHS. + unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth); + if (Src1SignBits == 1) + return 1; + return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits); +} + unsigned GISelKnownBits::computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth) { @@ -523,31 +523,31 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp; } - case TargetOpcode::G_SEXT_INREG: { - // Max of the input and what this extends. - Register Src = MI.getOperand(1).getReg(); - unsigned SrcBits = MI.getOperand(2).getImm(); - unsigned InRegBits = TyBits - SrcBits + 1; - return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits); - } + case TargetOpcode::G_SEXT_INREG: { + // Max of the input and what this extends. + Register Src = MI.getOperand(1).getReg(); + unsigned SrcBits = MI.getOperand(2).getImm(); + unsigned InRegBits = TyBits - SrcBits + 1; + return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits); + } case TargetOpcode::G_SEXTLOAD: { - // FIXME: We need an in-memory type representation. - if (DstTy.isVector()) - return 1; - - // e.g. i16->i32 = '17' bits known. - const MachineMemOperand *MMO = *MI.memoperands_begin(); - return TyBits - MMO->getSizeInBits() + 1; - } - case TargetOpcode::G_ZEXTLOAD: { - // FIXME: We need an in-memory type representation. - if (DstTy.isVector()) - return 1; - - // e.g. i16->i32 = '16' bits known. - const MachineMemOperand *MMO = *MI.memoperands_begin(); - return TyBits - MMO->getSizeInBits(); - } + // FIXME: We need an in-memory type representation. + if (DstTy.isVector()) + return 1; + + // e.g. i16->i32 = '17' bits known. + const MachineMemOperand *MMO = *MI.memoperands_begin(); + return TyBits - MMO->getSizeInBits() + 1; + } + case TargetOpcode::G_ZEXTLOAD: { + // FIXME: We need an in-memory type representation. + if (DstTy.isVector()) + return 1; + + // e.g. i16->i32 = '16' bits known. + const MachineMemOperand *MMO = *MI.memoperands_begin(); + return TyBits - MMO->getSizeInBits(); + } case TargetOpcode::G_TRUNC: { Register Src = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(Src); @@ -560,11 +560,11 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, return NumSrcSignBits - (NumSrcBits - DstTyBits); break; } - case TargetOpcode::G_SELECT: { - return computeNumSignBitsMin(MI.getOperand(2).getReg(), - MI.getOperand(3).getReg(), DemandedElts, - Depth + 1); - } + case TargetOpcode::G_SELECT: { + return computeNumSignBitsMin(MI.getOperand(2).getReg(), + MI.getOperand(3).getReg(), DemandedElts, + Depth + 1); + } case TargetOpcode::G_INTRINSIC: case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: default: { diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/IRTranslator.cpp index b7883cbc31..c81add2e6b 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -29,11 +29,11 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" -#include "llvm/CodeGen/SwitchLoweringUtils.h" +#include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -50,13 +50,13 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InlineAsm.h" -#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -74,7 +74,7 @@ #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> -#include <cstddef> +#include <cstddef> #include <cstdint> #include <iterator> #include <string> @@ -95,8 +95,8 @@ INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(StackProtector) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", false, false) @@ -117,8 +117,8 @@ static void reportTranslationError(MachineFunction &MF, ORE.emit(R); } -IRTranslator::IRTranslator(CodeGenOpt::Level optlevel) - : MachineFunctionPass(ID), OptLevel(optlevel) {} +IRTranslator::IRTranslator(CodeGenOpt::Level optlevel) + : MachineFunctionPass(ID), OptLevel(optlevel) {} #ifndef NDEBUG namespace { @@ -162,17 +162,17 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<StackProtector>(); AU.addRequired<TargetPassConfig>(); AU.addRequired<GISelCSEAnalysisWrapperPass>(); - if (OptLevel != CodeGenOpt::None) - AU.addRequired<BranchProbabilityInfoWrapperPass>(); + if (OptLevel != CodeGenOpt::None) + AU.addRequired<BranchProbabilityInfoWrapperPass>(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } IRTranslator::ValueToVRegInfo::VRegListT & IRTranslator::allocateVRegs(const Value &Val) { - auto VRegsIt = VMap.findVRegs(Val); - if (VRegsIt != VMap.vregs_end()) - return *VRegsIt->second; + auto VRegsIt = VMap.findVRegs(Val); + if (VRegsIt != VMap.vregs_end()) + return *VRegsIt->second; auto *Regs = VMap.getVRegs(Val); auto *Offsets = VMap.getOffsets(Val); SmallVector<LLT, 4> SplitTys; @@ -234,9 +234,9 @@ ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) { } int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { - auto MapEntry = FrameIndices.find(&AI); - if (MapEntry != FrameIndices.end()) - return MapEntry->second; + auto MapEntry = FrameIndices.find(&AI); + if (MapEntry != FrameIndices.end()) + return MapEntry->second; uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType()); uint64_t Size = @@ -306,8 +306,8 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U, return true; } -bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U, - MachineIRBuilder &MIRBuilder) { +bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U, + MachineIRBuilder &MIRBuilder) { Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Res = getOrCreateVReg(U); uint16_t Flags = 0; @@ -315,14 +315,14 @@ bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U, const Instruction &I = cast<Instruction>(U); Flags = MachineInstr::copyFlagsFromInstruction(I); } - MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags); + MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags); return true; } -bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { - return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder); -} - +bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { + return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder); +} + bool IRTranslator::translateCompare(const User &U, MachineIRBuilder &MIRBuilder) { auto *CI = dyn_cast<CmpInst>(&U); @@ -368,289 +368,289 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. - return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg); -} - -void IRTranslator::emitBranchForMergedCondition( - const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - BranchProbability TProb, BranchProbability FProb, bool InvertCond) { - // If the leaf of the tree is a comparison, merge the condition into - // the caseblock. - if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { - CmpInst::Predicate Condition; - if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { - Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate(); - } else { - const FCmpInst *FC = cast<FCmpInst>(Cond); - Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate(); - } - - SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0), - BOp->getOperand(1), nullptr, TBB, FBB, CurBB, - CurBuilder->getDebugLoc(), TProb, FProb); - SL->SwitchCases.push_back(CB); - return; - } - - // Create a CaseBlock record representing this branch. - CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; - SwitchCG::CaseBlock CB( - Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()), - nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb); - SL->SwitchCases.push_back(CB); -} - -static bool isValInBlock(const Value *V, const BasicBlock *BB) { - if (const Instruction *I = dyn_cast<Instruction>(V)) - return I->getParent() == BB; - return true; -} - -void IRTranslator::findMergedConditions( - const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - Instruction::BinaryOps Opc, BranchProbability TProb, - BranchProbability FProb, bool InvertCond) { - using namespace PatternMatch; - assert((Opc == Instruction::And || Opc == Instruction::Or) && - "Expected Opc to be AND/OR"); - // Skip over not part of the tree and remember to invert op and operands at - // next level. - Value *NotCond; - if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) && - isValInBlock(NotCond, CurBB->getBasicBlock())) { - findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, - !InvertCond); - return; - } - - const Instruction *BOp = dyn_cast<Instruction>(Cond); - const Value *BOpOp0, *BOpOp1; - // Compute the effective opcode for Cond, taking into account whether it needs - // to be inverted, e.g. - // and (not (or A, B)), C - // gets lowered as - // and (and (not A, not B), C) - Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0; - if (BOp) { - BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1))) - ? Instruction::And - : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1))) - ? Instruction::Or - : (Instruction::BinaryOps)0); - if (InvertCond) { - if (BOpc == Instruction::And) - BOpc = Instruction::Or; - else if (BOpc == Instruction::Or) - BOpc = Instruction::And; - } - } - - // If this node is not part of the or/and tree, emit it as a branch. - // Note that all nodes in the tree should have same opcode. - bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse(); - if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() || - !isValInBlock(BOpOp0, CurBB->getBasicBlock()) || - !isValInBlock(BOpOp1, CurBB->getBasicBlock())) { - emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb, - InvertCond); - return; - } - - // Create TmpBB after CurBB. - MachineFunction::iterator BBI(CurBB); - MachineBasicBlock *TmpBB = - MF->CreateMachineBasicBlock(CurBB->getBasicBlock()); - CurBB->getParent()->insert(++BBI, TmpBB); - - if (Opc == Instruction::Or) { - // Codegen X | Y as: - // BB1: - // jmp_if_X TBB - // jmp TmpBB - // TmpBB: - // jmp_if_Y TBB - // jmp FBB - // - - // We have flexibility in setting Prob for BB1 and Prob for TmpBB. - // The requirement is that - // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) - // = TrueProb for original BB. - // Assuming the original probabilities are A and B, one choice is to set - // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to - // A/(1+B) and 2B/(1+B). This choice assumes that - // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. - // Another choice is to assume TrueProb for BB1 equals to TrueProb for - // TmpBB, but the math is more complicated. - - auto NewTrueProb = TProb / 2; - auto NewFalseProb = TProb / 2 + FProb; - // Emit the LHS condition. - findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb, - NewFalseProb, InvertCond); - - // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). - SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; - BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); - // Emit the RHS condition into TmpBB. - findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], - Probs[1], InvertCond); - } else { - assert(Opc == Instruction::And && "Unknown merge op!"); - // Codegen X & Y as: - // BB1: - // jmp_if_X TmpBB - // jmp FBB - // TmpBB: - // jmp_if_Y TBB - // jmp FBB - // - // This requires creation of TmpBB after CurBB. - - // We have flexibility in setting Prob for BB1 and Prob for TmpBB. - // The requirement is that - // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) - // = FalseProb for original BB. - // Assuming the original probabilities are A and B, one choice is to set - // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to - // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == - // TrueProb for BB1 * FalseProb for TmpBB. - - auto NewTrueProb = TProb + FProb / 2; - auto NewFalseProb = FProb / 2; - // Emit the LHS condition. - findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb, - NewFalseProb, InvertCond); - - // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). - SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; - BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); - // Emit the RHS condition into TmpBB. - findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], - Probs[1], InvertCond); - } -} - -bool IRTranslator::shouldEmitAsBranches( - const std::vector<SwitchCG::CaseBlock> &Cases) { - // For multiple cases, it's better to emit as branches. - if (Cases.size() != 2) - return true; - - // If this is two comparisons of the same values or'd or and'd together, they - // will get folded into a single comparison, so don't emit two blocks. - if ((Cases[0].CmpLHS == Cases[1].CmpLHS && - Cases[0].CmpRHS == Cases[1].CmpRHS) || - (Cases[0].CmpRHS == Cases[1].CmpLHS && - Cases[0].CmpLHS == Cases[1].CmpRHS)) { - return false; - } - - // Handle: (X != null) | (Y != null) --> (X|Y) != 0 - // Handle: (X == null) & (Y == null) --> (X|Y) == 0 - if (Cases[0].CmpRHS == Cases[1].CmpRHS && - Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred && - isa<Constant>(Cases[0].CmpRHS) && - cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { - if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ && - Cases[0].TrueBB == Cases[1].ThisBB) - return false; - if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE && - Cases[0].FalseBB == Cases[1].ThisBB) - return false; - } - - return true; -} - + return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg); +} + +void IRTranslator::emitBranchForMergedCondition( + const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, + BranchProbability TProb, BranchProbability FProb, bool InvertCond) { + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { + CmpInst::Predicate Condition; + if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { + Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate(); + } else { + const FCmpInst *FC = cast<FCmpInst>(Cond); + Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate(); + } + + SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0), + BOp->getOperand(1), nullptr, TBB, FBB, CurBB, + CurBuilder->getDebugLoc(), TProb, FProb); + SL->SwitchCases.push_back(CB); + return; + } + + // Create a CaseBlock record representing this branch. + CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; + SwitchCG::CaseBlock CB( + Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()), + nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb); + SL->SwitchCases.push_back(CB); +} + +static bool isValInBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() == BB; + return true; +} + +void IRTranslator::findMergedConditions( + const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, BranchProbability TProb, + BranchProbability FProb, bool InvertCond) { + using namespace PatternMatch; + assert((Opc == Instruction::And || Opc == Instruction::Or) && + "Expected Opc to be AND/OR"); + // Skip over not part of the tree and remember to invert op and operands at + // next level. + Value *NotCond; + if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) && + isValInBlock(NotCond, CurBB->getBasicBlock())) { + findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, + !InvertCond); + return; + } + + const Instruction *BOp = dyn_cast<Instruction>(Cond); + const Value *BOpOp0, *BOpOp1; + // Compute the effective opcode for Cond, taking into account whether it needs + // to be inverted, e.g. + // and (not (or A, B)), C + // gets lowered as + // and (and (not A, not B), C) + Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0; + if (BOp) { + BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1))) + ? Instruction::And + : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1))) + ? Instruction::Or + : (Instruction::BinaryOps)0); + if (InvertCond) { + if (BOpc == Instruction::And) + BOpc = Instruction::Or; + else if (BOpc == Instruction::Or) + BOpc = Instruction::And; + } + } + + // If this node is not part of the or/and tree, emit it as a branch. + // Note that all nodes in the tree should have same opcode. + bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse(); + if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() || + !isValInBlock(BOpOp0, CurBB->getBasicBlock()) || + !isValInBlock(BOpOp1, CurBB->getBasicBlock())) { + emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb, + InvertCond); + return; + } + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI(CurBB); + MachineBasicBlock *TmpBB = + MF->CreateMachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // BB1: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to + // A/(1+B) and 2B/(1+B). This choice assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + + auto NewTrueProb = TProb / 2; + auto NewFalseProb = TProb / 2 + FProb; + // Emit the LHS condition. + findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb, + NewFalseProb, InvertCond); + + // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). + SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], + Probs[1], InvertCond); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // BB1: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to + // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == + // TrueProb for BB1 * FalseProb for TmpBB. + + auto NewTrueProb = TProb + FProb / 2; + auto NewFalseProb = FProb / 2; + // Emit the LHS condition. + findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb, + NewFalseProb, InvertCond); + + // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). + SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], + Probs[1], InvertCond); + } +} + +bool IRTranslator::shouldEmitAsBranches( + const std::vector<SwitchCG::CaseBlock> &Cases) { + // For multiple cases, it's better to emit as branches. + if (Cases.size() != 2) + return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + // Handle: (X != null) | (Y != null) --> (X|Y) != 0 + // Handle: (X == null) & (Y == null) --> (X|Y) == 0 + if (Cases[0].CmpRHS == Cases[1].CmpRHS && + Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred && + isa<Constant>(Cases[0].CmpRHS) && + cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { + if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ && + Cases[0].TrueBB == Cases[1].ThisBB) + return false; + if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE && + Cases[0].FalseBB == Cases[1].ThisBB) + return false; + } + + return true; +} + bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { const BranchInst &BrInst = cast<BranchInst>(U); - auto &CurMBB = MIRBuilder.getMBB(); - auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0)); - - if (BrInst.isUnconditional()) { - // If the unconditional target is the layout successor, fallthrough. - if (!CurMBB.isLayoutSuccessor(Succ0MBB)) - MIRBuilder.buildBr(*Succ0MBB); - - // Link successors. - for (const BasicBlock *Succ : successors(&BrInst)) - CurMBB.addSuccessor(&getMBB(*Succ)); - return true; - } - - // If this condition is one of the special cases we handle, do special stuff - // now. - const Value *CondVal = BrInst.getCondition(); - MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1)); - - const auto &TLI = *MF->getSubtarget().getTargetLowering(); - - // If this is a series of conditions that are or'd or and'd together, emit - // this as a sequence of branches instead of setcc's with and/or operations. - // As long as jumps are not expensive (exceptions for multi-use logic ops, - // unpredictable branches, and vector extracts because those jumps are likely - // expensive for any target), this should improve performance. - // For example, instead of something like: - // cmp A, B - // C = seteq - // cmp D, E - // F = setle - // or C, F - // jnz foo - // Emit: - // cmp A, B - // je foo - // cmp D, E - // jle foo - using namespace PatternMatch; - const Instruction *CondI = dyn_cast<Instruction>(CondVal); - if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() && - !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) { - Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0; - Value *Vec; - const Value *BOp0, *BOp1; - if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1)))) - Opcode = Instruction::And; - else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1)))) - Opcode = Instruction::Or; - - if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && - match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { - findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode, - getEdgeProbability(&CurMBB, Succ0MBB), - getEdgeProbability(&CurMBB, Succ1MBB), - /*InvertCond=*/false); - assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!"); - - // Allow some cases to be rejected. - if (shouldEmitAsBranches(SL->SwitchCases)) { - // Emit the branch for this block. - emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder); - SL->SwitchCases.erase(SL->SwitchCases.begin()); - return true; - } - - // Okay, we decided not to do this, remove any inserted MBB's and clear - // SwitchCases. - for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I) - MF->erase(SL->SwitchCases[I].ThisBB); - - SL->SwitchCases.clear(); - } - } - - // Create a CaseBlock record representing this branch. - SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal, - ConstantInt::getTrue(MF->getFunction().getContext()), - nullptr, Succ0MBB, Succ1MBB, &CurMBB, - CurBuilder->getDebugLoc()); - - // Use emitSwitchCase to actually insert the fast branch sequence for this - // cond branch. - emitSwitchCase(CB, &CurMBB, *CurBuilder); + auto &CurMBB = MIRBuilder.getMBB(); + auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0)); + + if (BrInst.isUnconditional()) { + // If the unconditional target is the layout successor, fallthrough. + if (!CurMBB.isLayoutSuccessor(Succ0MBB)) + MIRBuilder.buildBr(*Succ0MBB); + + // Link successors. + for (const BasicBlock *Succ : successors(&BrInst)) + CurMBB.addSuccessor(&getMBB(*Succ)); + return true; + } + + // If this condition is one of the special cases we handle, do special stuff + // now. + const Value *CondVal = BrInst.getCondition(); + MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1)); + + const auto &TLI = *MF->getSubtarget().getTargetLowering(); + + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // As long as jumps are not expensive (exceptions for multi-use logic ops, + // unpredictable branches, and vector extracts because those jumps are likely + // expensive for any target), this should improve performance. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + using namespace PatternMatch; + const Instruction *CondI = dyn_cast<Instruction>(CondVal); + if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() && + !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) { + Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0; + Value *Vec; + const Value *BOp0, *BOp1; + if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1)))) + Opcode = Instruction::And; + else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1)))) + Opcode = Instruction::Or; + + if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && + match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { + findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode, + getEdgeProbability(&CurMBB, Succ0MBB), + getEdgeProbability(&CurMBB, Succ1MBB), + /*InvertCond=*/false); + assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (shouldEmitAsBranches(SL->SwitchCases)) { + // Emit the branch for this block. + emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder); + SL->SwitchCases.erase(SL->SwitchCases.begin()); + return true; + } + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I) + MF->erase(SL->SwitchCases[I].ThisBB); + + SL->SwitchCases.clear(); + } + } + + // Create a CaseBlock record representing this branch. + SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal, + ConstantInt::getTrue(MF->getFunction().getContext()), + nullptr, Succ0MBB, Succ1MBB, &CurMBB, + CurBuilder->getDebugLoc()); + + // Use emitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + emitSwitchCase(CB, &CurMBB, *CurBuilder); return true; } @@ -715,7 +715,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) { } SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr); - SL->findBitTestClusters(Clusters, &SI); + SL->findBitTestClusters(Clusters, &SI); LLVM_DEBUG({ dbgs() << "Case clusters: "; @@ -836,22 +836,22 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, const LLT i1Ty = LLT::scalar(1); // Build the compare. if (!CB.CmpMHS) { - const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS); - // For conditional branch lowering, we might try to do something silly like - // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, - // just re-use the existing condition vreg. - if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && - CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) { - Cond = CondLHS; - } else { - Register CondRHS = getOrCreateVReg(*CB.CmpRHS); - if (CmpInst::isFPPredicate(CB.PredInfo.Pred)) - Cond = - MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); - else - Cond = - MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); - } + const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS); + // For conditional branch lowering, we might try to do something silly like + // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, + // just re-use the existing condition vreg. + if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && + CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) { + Cond = CondLHS; + } else { + Register CondRHS = getOrCreateVReg(*CB.CmpRHS); + if (CmpInst::isFPPredicate(CB.PredInfo.Pred)) + Cond = + MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + else + Cond = + MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + } } else { assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE && "Can only handle SLE ranges"); @@ -884,8 +884,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb); CB.ThisBB->normalizeSuccProbs(); - addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()}, - CB.ThisBB); + addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()}, + CB.ThisBB); MIB.buildBrCond(Cond, *CB.TrueBB); MIB.buildBr(*CB.FalseBB); @@ -998,156 +998,156 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, return true; } -void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B, - MachineBasicBlock *SwitchBB) { - MachineIRBuilder &MIB = *CurBuilder; - MIB.setMBB(*SwitchBB); - - // Subtract the minimum value. - Register SwitchOpReg = getOrCreateVReg(*B.SValue); - - LLT SwitchOpTy = MRI->getType(SwitchOpReg); - Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0); - auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg); - - // Ensure that the type will fit the mask value. - LLT MaskTy = SwitchOpTy; - for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) { - if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) { - // Switch table case range are encoded into series of masks. - // Just use pointer type, it's guaranteed to fit. - MaskTy = LLT::scalar(64); - break; - } - } - Register SubReg = RangeSub.getReg(0); - if (SwitchOpTy != MaskTy) - SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0); - - B.RegVT = getMVTForLLT(MaskTy); - B.Reg = SubReg; - - MachineBasicBlock *MBB = B.Cases[0].ThisBB; - - if (!B.OmitRangeCheck) - addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); - addSuccessorWithProb(SwitchBB, MBB, B.Prob); - - SwitchBB->normalizeSuccProbs(); - - if (!B.OmitRangeCheck) { - // Conditional branch to the default block. - auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range); - auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1), - RangeSub, RangeCst); - MIB.buildBrCond(RangeCmp, *B.Default); - } - - // Avoid emitting unnecessary branches to the next block. - if (MBB != SwitchBB->getNextNode()) - MIB.buildBr(*MBB); -} - -void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB, - MachineBasicBlock *NextMBB, - BranchProbability BranchProbToNext, - Register Reg, SwitchCG::BitTestCase &B, - MachineBasicBlock *SwitchBB) { - MachineIRBuilder &MIB = *CurBuilder; - MIB.setMBB(*SwitchBB); - - LLT SwitchTy = getLLTForMVT(BB.RegVT); - Register Cmp; - unsigned PopCount = countPopulation(B.Mask); - if (PopCount == 1) { - // Testing for a single bit; just compare the shift count with what it - // would need to be to shift a 1 bit in that position. - auto MaskTrailingZeros = - MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask)); - Cmp = - MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros) - .getReg(0); - } else if (PopCount == BB.Range) { - // There is only one zero bit in the range, test for it directly. - auto MaskTrailingOnes = - MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask)); - Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes) - .getReg(0); - } else { - // Make desired shift. - auto CstOne = MIB.buildConstant(SwitchTy, 1); - auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg); - - // Emit bit tests and jumps. - auto CstMask = MIB.buildConstant(SwitchTy, B.Mask); - auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask); - auto CstZero = MIB.buildConstant(SwitchTy, 0); - Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero) - .getReg(0); - } - - // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. - addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); - // The branch probability from SwitchBB to NextMBB is BranchProbToNext. - addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); - // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is - // one as they are relative probabilities (and thus work more like weights), - // and hence we need to normalize them to let the sum of them become one. - SwitchBB->normalizeSuccProbs(); - - // Record the fact that the IR edge from the header to the bit test target - // will go through our new block. Neeeded for PHIs to have nodes added. - addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()}, - SwitchBB); - - MIB.buildBrCond(Cmp, *B.TargetBB); - - // Avoid emitting unnecessary branches to the next block. - if (NextMBB != SwitchBB->getNextNode()) - MIB.buildBr(*NextMBB); -} - -bool IRTranslator::lowerBitTestWorkItem( - SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB, - MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB, - MachineIRBuilder &MIB, MachineFunction::iterator BBI, - BranchProbability DefaultProb, BranchProbability UnhandledProbs, - SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough, - bool FallthroughUnreachable) { - using namespace SwitchCG; - MachineFunction *CurMF = SwitchMBB->getParent(); - // FIXME: Optimize away range check based on pivot comparisons. - BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; - // The bit test blocks haven't been inserted yet; insert them here. - for (BitTestCase &BTC : BTB->Cases) - CurMF->insert(BBI, BTC.ThisBB); - - // Fill in fields of the BitTestBlock. - BTB->Parent = CurMBB; - BTB->Default = Fallthrough; - - BTB->DefaultProb = UnhandledProbs; - // If the cases in bit test don't form a contiguous range, we evenly - // distribute the probability on the edge to Fallthrough to two - // successors of CurMBB. - if (!BTB->ContiguousRange) { - BTB->Prob += DefaultProb / 2; - BTB->DefaultProb -= DefaultProb / 2; - } - - if (FallthroughUnreachable) { - // Skip the range check if the fallthrough block is unreachable. - BTB->OmitRangeCheck = true; - } - - // If we're in the right place, emit the bit test header right now. - if (CurMBB == SwitchMBB) { - emitBitTestHeader(*BTB, SwitchMBB); - BTB->Emitted = true; - } - return true; -} - +void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B, + MachineBasicBlock *SwitchBB) { + MachineIRBuilder &MIB = *CurBuilder; + MIB.setMBB(*SwitchBB); + + // Subtract the minimum value. + Register SwitchOpReg = getOrCreateVReg(*B.SValue); + + LLT SwitchOpTy = MRI->getType(SwitchOpReg); + Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0); + auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg); + + // Ensure that the type will fit the mask value. + LLT MaskTy = SwitchOpTy; + for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) { + if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) { + // Switch table case range are encoded into series of masks. + // Just use pointer type, it's guaranteed to fit. + MaskTy = LLT::scalar(64); + break; + } + } + Register SubReg = RangeSub.getReg(0); + if (SwitchOpTy != MaskTy) + SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0); + + B.RegVT = getMVTForLLT(MaskTy); + B.Reg = SubReg; + + MachineBasicBlock *MBB = B.Cases[0].ThisBB; + + if (!B.OmitRangeCheck) + addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); + addSuccessorWithProb(SwitchBB, MBB, B.Prob); + + SwitchBB->normalizeSuccProbs(); + + if (!B.OmitRangeCheck) { + // Conditional branch to the default block. + auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range); + auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1), + RangeSub, RangeCst); + MIB.buildBrCond(RangeCmp, *B.Default); + } + + // Avoid emitting unnecessary branches to the next block. + if (MBB != SwitchBB->getNextNode()) + MIB.buildBr(*MBB); +} + +void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB, + MachineBasicBlock *NextMBB, + BranchProbability BranchProbToNext, + Register Reg, SwitchCG::BitTestCase &B, + MachineBasicBlock *SwitchBB) { + MachineIRBuilder &MIB = *CurBuilder; + MIB.setMBB(*SwitchBB); + + LLT SwitchTy = getLLTForMVT(BB.RegVT); + Register Cmp; + unsigned PopCount = countPopulation(B.Mask); + if (PopCount == 1) { + // Testing for a single bit; just compare the shift count with what it + // would need to be to shift a 1 bit in that position. + auto MaskTrailingZeros = + MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask)); + Cmp = + MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros) + .getReg(0); + } else if (PopCount == BB.Range) { + // There is only one zero bit in the range, test for it directly. + auto MaskTrailingOnes = + MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask)); + Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes) + .getReg(0); + } else { + // Make desired shift. + auto CstOne = MIB.buildConstant(SwitchTy, 1); + auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg); + + // Emit bit tests and jumps. + auto CstMask = MIB.buildConstant(SwitchTy, B.Mask); + auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask); + auto CstZero = MIB.buildConstant(SwitchTy, 0); + Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero) + .getReg(0); + } + + // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. + addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); + // The branch probability from SwitchBB to NextMBB is BranchProbToNext. + addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); + // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is + // one as they are relative probabilities (and thus work more like weights), + // and hence we need to normalize them to let the sum of them become one. + SwitchBB->normalizeSuccProbs(); + + // Record the fact that the IR edge from the header to the bit test target + // will go through our new block. Neeeded for PHIs to have nodes added. + addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()}, + SwitchBB); + + MIB.buildBrCond(Cmp, *B.TargetBB); + + // Avoid emitting unnecessary branches to the next block. + if (NextMBB != SwitchBB->getNextNode()) + MIB.buildBr(*NextMBB); +} + +bool IRTranslator::lowerBitTestWorkItem( + SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB, + MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB, + MachineIRBuilder &MIB, MachineFunction::iterator BBI, + BranchProbability DefaultProb, BranchProbability UnhandledProbs, + SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough, + bool FallthroughUnreachable) { + using namespace SwitchCG; + MachineFunction *CurMF = SwitchMBB->getParent(); + // FIXME: Optimize away range check based on pivot comparisons. + BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; + // The bit test blocks haven't been inserted yet; insert them here. + for (BitTestCase &BTC : BTB->Cases) + CurMF->insert(BBI, BTC.ThisBB); + + // Fill in fields of the BitTestBlock. + BTB->Parent = CurMBB; + BTB->Default = Fallthrough; + + BTB->DefaultProb = UnhandledProbs; + // If the cases in bit test don't form a contiguous range, we evenly + // distribute the probability on the edge to Fallthrough to two + // successors of CurMBB. + if (!BTB->ContiguousRange) { + BTB->Prob += DefaultProb / 2; + BTB->DefaultProb -= DefaultProb / 2; + } + + if (FallthroughUnreachable) { + // Skip the range check if the fallthrough block is unreachable. + BTB->OmitRangeCheck = true; + } + + // If we're in the right place, emit the bit test header right now. + if (CurMBB == SwitchMBB) { + emitBitTestHeader(*BTB, SwitchMBB); + BTB->Emitted = true; + } + return true; +} + bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond, MachineBasicBlock *SwitchMBB, @@ -1208,15 +1208,15 @@ bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, switch (I->Kind) { case CC_BitTests: { - if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI, - DefaultProb, UnhandledProbs, I, Fallthrough, - FallthroughUnreachable)) { - LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch"); - return false; - } - break; + if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI, + DefaultProb, UnhandledProbs, I, Fallthrough, + FallthroughUnreachable)) { + LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch"); + return false; + } + break; } - + case CC_JumpTable: { if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI, UnhandledProbs, I, Fallthrough, @@ -1557,34 +1557,34 @@ bool IRTranslator::translateGetElementPtr(const User &U, bool IRTranslator::translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, - unsigned Opcode) { + unsigned Opcode) { // If the source is undef, then just emit a nop. if (isa<UndefValue>(CI.getArgOperand(1))) return true; - SmallVector<Register, 3> SrcRegs; - - unsigned MinPtrSize = UINT_MAX; - for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) { - Register SrcReg = getOrCreateVReg(**AI); - LLT SrcTy = MRI->getType(SrcReg); - if (SrcTy.isPointer()) - MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize); - SrcRegs.push_back(SrcReg); - } - - LLT SizeTy = LLT::scalar(MinPtrSize); - - // The size operand should be the minimum of the pointer sizes. - Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1]; - if (MRI->getType(SizeOpReg) != SizeTy) - SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0); - - auto ICall = MIRBuilder.buildInstr(Opcode); - for (Register SrcReg : SrcRegs) - ICall.addUse(SrcReg); - + SmallVector<Register, 3> SrcRegs; + + unsigned MinPtrSize = UINT_MAX; + for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) { + Register SrcReg = getOrCreateVReg(**AI); + LLT SrcTy = MRI->getType(SrcReg); + if (SrcTy.isPointer()) + MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize); + SrcRegs.push_back(SrcReg); + } + + LLT SizeTy = LLT::scalar(MinPtrSize); + + // The size operand should be the minimum of the pointer sizes. + Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1]; + if (MRI->getType(SizeOpReg) != SizeTy) + SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0); + + auto ICall = MIRBuilder.buildInstr(Opcode); + for (Register SrcReg : SrcRegs) + ICall.addUse(SrcReg); + Align DstAlign; Align SrcAlign; unsigned IsVol = @@ -1612,7 +1612,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, ICall.addMemOperand(MF->getMachineMemOperand( MachinePointerInfo(CI.getArgOperand(0)), MachineMemOperand::MOStore | VolFlag, 1, DstAlign)); - if (Opcode != TargetOpcode::G_MEMSET) + if (Opcode != TargetOpcode::G_MEMSET) ICall.addMemOperand(MF->getMachineMemOperand( MachinePointerInfo(CI.getArgOperand(1)), MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign)); @@ -1651,16 +1651,16 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, return true; } -bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI, - MachineIRBuilder &MIRBuilder) { - Register Dst = getOrCreateVReg(CI); - Register Src0 = getOrCreateVReg(*CI.getOperand(0)); - Register Src1 = getOrCreateVReg(*CI.getOperand(1)); - uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue(); - MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale }); - return true; -} - +bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI, + MachineIRBuilder &MIRBuilder) { + Register Dst = getOrCreateVReg(CI); + Register Src0 = getOrCreateVReg(*CI.getOperand(0)); + Register Src1 = getOrCreateVReg(*CI.getOperand(1)); + uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue(); + MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale }); + return true; +} + unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { switch (ID) { default: @@ -1711,14 +1711,14 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_FNEARBYINT; case Intrinsic::pow: return TargetOpcode::G_FPOW; - case Intrinsic::powi: - return TargetOpcode::G_FPOWI; + case Intrinsic::powi: + return TargetOpcode::G_FPOWI; case Intrinsic::rint: return TargetOpcode::G_FRINT; case Intrinsic::round: return TargetOpcode::G_INTRINSIC_ROUND; - case Intrinsic::roundeven: - return TargetOpcode::G_INTRINSIC_ROUNDEVEN; + case Intrinsic::roundeven: + return TargetOpcode::G_INTRINSIC_ROUNDEVEN; case Intrinsic::sin: return TargetOpcode::G_FSIN; case Intrinsic::sqrt: @@ -1729,31 +1729,31 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_READCYCLECOUNTER; case Intrinsic::ptrmask: return TargetOpcode::G_PTRMASK; - case Intrinsic::lrint: - return TargetOpcode::G_INTRINSIC_LRINT; - // FADD/FMUL require checking the FMF, so are handled elsewhere. - case Intrinsic::vector_reduce_fmin: - return TargetOpcode::G_VECREDUCE_FMIN; - case Intrinsic::vector_reduce_fmax: - return TargetOpcode::G_VECREDUCE_FMAX; - case Intrinsic::vector_reduce_add: - return TargetOpcode::G_VECREDUCE_ADD; - case Intrinsic::vector_reduce_mul: - return TargetOpcode::G_VECREDUCE_MUL; - case Intrinsic::vector_reduce_and: - return TargetOpcode::G_VECREDUCE_AND; - case Intrinsic::vector_reduce_or: - return TargetOpcode::G_VECREDUCE_OR; - case Intrinsic::vector_reduce_xor: - return TargetOpcode::G_VECREDUCE_XOR; - case Intrinsic::vector_reduce_smax: - return TargetOpcode::G_VECREDUCE_SMAX; - case Intrinsic::vector_reduce_smin: - return TargetOpcode::G_VECREDUCE_SMIN; - case Intrinsic::vector_reduce_umax: - return TargetOpcode::G_VECREDUCE_UMAX; - case Intrinsic::vector_reduce_umin: - return TargetOpcode::G_VECREDUCE_UMIN; + case Intrinsic::lrint: + return TargetOpcode::G_INTRINSIC_LRINT; + // FADD/FMUL require checking the FMF, so are handled elsewhere. + case Intrinsic::vector_reduce_fmin: + return TargetOpcode::G_VECREDUCE_FMIN; + case Intrinsic::vector_reduce_fmax: + return TargetOpcode::G_VECREDUCE_FMAX; + case Intrinsic::vector_reduce_add: + return TargetOpcode::G_VECREDUCE_ADD; + case Intrinsic::vector_reduce_mul: + return TargetOpcode::G_VECREDUCE_MUL; + case Intrinsic::vector_reduce_and: + return TargetOpcode::G_VECREDUCE_AND; + case Intrinsic::vector_reduce_or: + return TargetOpcode::G_VECREDUCE_OR; + case Intrinsic::vector_reduce_xor: + return TargetOpcode::G_VECREDUCE_XOR; + case Intrinsic::vector_reduce_smax: + return TargetOpcode::G_VECREDUCE_SMAX; + case Intrinsic::vector_reduce_smin: + return TargetOpcode::G_VECREDUCE_SMIN; + case Intrinsic::vector_reduce_umax: + return TargetOpcode::G_VECREDUCE_UMAX; + case Intrinsic::vector_reduce_umin: + return TargetOpcode::G_VECREDUCE_UMIN; } return Intrinsic::not_intrinsic; } @@ -1846,7 +1846,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, // Get the underlying objects for the location passed on the lifetime // marker. SmallVector<const Value *, 4> Allocas; - getUnderlyingObjects(CI.getArgOperand(1), Allocas); + getUnderlyingObjects(CI.getArgOperand(1), Allocas); // Iterate over each underlying object, creating lifetime markers for each // static alloca. Quit if we find a non-static alloca. @@ -1960,37 +1960,37 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder); case Intrinsic::ssub_sat: return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder); - case Intrinsic::ushl_sat: - return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder); - case Intrinsic::sshl_sat: - return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder); - case Intrinsic::umin: - return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder); - case Intrinsic::umax: - return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder); - case Intrinsic::smin: - return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder); - case Intrinsic::smax: - return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder); - case Intrinsic::abs: - // TODO: Preserve "int min is poison" arg in GMIR? - return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder); - case Intrinsic::smul_fix: - return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder); - case Intrinsic::umul_fix: - return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder); - case Intrinsic::smul_fix_sat: - return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder); - case Intrinsic::umul_fix_sat: - return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder); - case Intrinsic::sdiv_fix: - return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder); - case Intrinsic::udiv_fix: - return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder); - case Intrinsic::sdiv_fix_sat: - return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder); - case Intrinsic::udiv_fix_sat: - return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder); + case Intrinsic::ushl_sat: + return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder); + case Intrinsic::sshl_sat: + return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder); + case Intrinsic::umin: + return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder); + case Intrinsic::umax: + return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder); + case Intrinsic::smin: + return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder); + case Intrinsic::smax: + return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder); + case Intrinsic::abs: + // TODO: Preserve "int min is poison" arg in GMIR? + return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder); + case Intrinsic::smul_fix: + return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder); + case Intrinsic::umul_fix: + return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder); + case Intrinsic::smul_fix_sat: + return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder); + case Intrinsic::umul_fix_sat: + return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder); + case Intrinsic::sdiv_fix: + return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder); + case Intrinsic::udiv_fix: + return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder); + case Intrinsic::sdiv_fix_sat: + return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder); + case Intrinsic::udiv_fix_sat: + return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder); case Intrinsic::fmuladd: { const TargetMachine &TM = MF->getTarget(); const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); @@ -2014,24 +2014,24 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, } return true; } - case Intrinsic::convert_from_fp16: - // FIXME: This intrinsic should probably be removed from the IR. - MIRBuilder.buildFPExt(getOrCreateVReg(CI), - getOrCreateVReg(*CI.getArgOperand(0)), - MachineInstr::copyFlagsFromInstruction(CI)); - return true; - case Intrinsic::convert_to_fp16: - // FIXME: This intrinsic should probably be removed from the IR. - MIRBuilder.buildFPTrunc(getOrCreateVReg(CI), - getOrCreateVReg(*CI.getArgOperand(0)), - MachineInstr::copyFlagsFromInstruction(CI)); - return true; + case Intrinsic::convert_from_fp16: + // FIXME: This intrinsic should probably be removed from the IR. + MIRBuilder.buildFPExt(getOrCreateVReg(CI), + getOrCreateVReg(*CI.getArgOperand(0)), + MachineInstr::copyFlagsFromInstruction(CI)); + return true; + case Intrinsic::convert_to_fp16: + // FIXME: This intrinsic should probably be removed from the IR. + MIRBuilder.buildFPTrunc(getOrCreateVReg(CI), + getOrCreateVReg(*CI.getArgOperand(0)), + MachineInstr::copyFlagsFromInstruction(CI)); + return true; case Intrinsic::memcpy: - return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY); + return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY); case Intrinsic::memmove: - return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE); + return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE); case Intrinsic::memset: - return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET); + return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET); case Intrinsic::eh_typeid_for: { GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0)); Register Reg = getOrCreateVReg(CI); @@ -2114,18 +2114,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, } case Intrinsic::invariant_end: return true; - case Intrinsic::expect: - case Intrinsic::annotation: - case Intrinsic::ptr_annotation: - case Intrinsic::launder_invariant_group: - case Intrinsic::strip_invariant_group: { - // Drop the intrinsic, but forward the value. - MIRBuilder.buildCopy(getOrCreateVReg(CI), - getOrCreateVReg(*CI.getArgOperand(0))); - return true; - } + case Intrinsic::expect: + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: { + // Drop the intrinsic, but forward the value. + MIRBuilder.buildCopy(getOrCreateVReg(CI), + getOrCreateVReg(*CI.getArgOperand(0))); + return true; + } case Intrinsic::assume: - case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::var_annotation: case Intrinsic::sideeffect: // Discard annotate attributes, assumptions, and artificial side-effects. @@ -2145,68 +2145,68 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addUse(getOrCreateVReg(*CI.getArgOperand(1))); return true; } - case Intrinsic::localescape: { - MachineBasicBlock &EntryMBB = MF->front(); - StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName()); - - // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission - // is the same on all targets. - for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) { - Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts(); - if (isa<ConstantPointerNull>(Arg)) - continue; // Skip null pointers. They represent a hole in index space. - - int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg)); - MCSymbol *FrameAllocSym = - MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName, - Idx); - - // This should be inserted at the start of the entry block. - auto LocalEscape = - MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE) - .addSym(FrameAllocSym) - .addFrameIndex(FI); - - EntryMBB.insert(EntryMBB.begin(), LocalEscape); - } - - return true; - } - case Intrinsic::vector_reduce_fadd: - case Intrinsic::vector_reduce_fmul: { - // Need to check for the reassoc flag to decide whether we want a - // sequential reduction opcode or not. - Register Dst = getOrCreateVReg(CI); - Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0)); - Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1)); - unsigned Opc = 0; - if (!CI.hasAllowReassoc()) { - // The sequential ordering case. - Opc = ID == Intrinsic::vector_reduce_fadd - ? TargetOpcode::G_VECREDUCE_SEQ_FADD - : TargetOpcode::G_VECREDUCE_SEQ_FMUL; - MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc}, - MachineInstr::copyFlagsFromInstruction(CI)); - return true; - } - // We split the operation into a separate G_FADD/G_FMUL + the reduce, - // since the associativity doesn't matter. - unsigned ScalarOpc; - if (ID == Intrinsic::vector_reduce_fadd) { - Opc = TargetOpcode::G_VECREDUCE_FADD; - ScalarOpc = TargetOpcode::G_FADD; - } else { - Opc = TargetOpcode::G_VECREDUCE_FMUL; - ScalarOpc = TargetOpcode::G_FMUL; - } - LLT DstTy = MRI->getType(Dst); - auto Rdx = MIRBuilder.buildInstr( - Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI)); - MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx}, - MachineInstr::copyFlagsFromInstruction(CI)); - - return true; - } + case Intrinsic::localescape: { + MachineBasicBlock &EntryMBB = MF->front(); + StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName()); + + // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission + // is the same on all targets. + for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) { + Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts(); + if (isa<ConstantPointerNull>(Arg)) + continue; // Skip null pointers. They represent a hole in index space. + + int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg)); + MCSymbol *FrameAllocSym = + MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName, + Idx); + + // This should be inserted at the start of the entry block. + auto LocalEscape = + MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE) + .addSym(FrameAllocSym) + .addFrameIndex(FI); + + EntryMBB.insert(EntryMBB.begin(), LocalEscape); + } + + return true; + } + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: { + // Need to check for the reassoc flag to decide whether we want a + // sequential reduction opcode or not. + Register Dst = getOrCreateVReg(CI); + Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0)); + Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1)); + unsigned Opc = 0; + if (!CI.hasAllowReassoc()) { + // The sequential ordering case. + Opc = ID == Intrinsic::vector_reduce_fadd + ? TargetOpcode::G_VECREDUCE_SEQ_FADD + : TargetOpcode::G_VECREDUCE_SEQ_FMUL; + MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc}, + MachineInstr::copyFlagsFromInstruction(CI)); + return true; + } + // We split the operation into a separate G_FADD/G_FMUL + the reduce, + // since the associativity doesn't matter. + unsigned ScalarOpc; + if (ID == Intrinsic::vector_reduce_fadd) { + Opc = TargetOpcode::G_VECREDUCE_FADD; + ScalarOpc = TargetOpcode::G_FADD; + } else { + Opc = TargetOpcode::G_VECREDUCE_FMUL; + ScalarOpc = TargetOpcode::G_FMUL; + } + LLT DstTy = MRI->getType(Dst); + auto Rdx = MIRBuilder.buildInstr( + Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI)); + MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx}, + MachineInstr::copyFlagsFromInstruction(CI)); + + return true; + } #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" @@ -2328,11 +2328,11 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { } else { MIB.addFPImm(cast<ConstantFP>(Arg.value())); } - } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) { - auto *MDN = dyn_cast<MDNode>(MD->getMetadata()); - if (!MDN) // This was probably an MDString. - return false; - MIB.addMetadata(MDN); + } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) { + auto *MDN = dyn_cast<MDNode>(MD->getMetadata()); + if (!MDN) // This was probably an MDString. + return false; + MIB.addMetadata(MDN); } else { ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value()); if (VRegs.size() > 1) @@ -2357,62 +2357,62 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { return true; } -bool IRTranslator::findUnwindDestinations( - const BasicBlock *EHPadBB, - BranchProbability Prob, - SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> - &UnwindDests) { - EHPersonality Personality = classifyEHPersonality( - EHPadBB->getParent()->getFunction().getPersonalityFn()); - bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; - bool IsCoreCLR = Personality == EHPersonality::CoreCLR; - bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX; - bool IsSEH = isAsynchronousEHPersonality(Personality); - - if (IsWasmCXX) { - // Ignore this for now. - return false; - } - - while (EHPadBB) { - const Instruction *Pad = EHPadBB->getFirstNonPHI(); - BasicBlock *NewEHPadBB = nullptr; - if (isa<LandingPadInst>(Pad)) { - // Stop on landingpads. They are not funclets. - UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob); - break; - } - if (isa<CleanupPadInst>(Pad)) { - // Stop on cleanup pads. Cleanups are always funclet entries for all known - // personalities. - UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob); - UnwindDests.back().first->setIsEHScopeEntry(); - UnwindDests.back().first->setIsEHFuncletEntry(); - break; - } - if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { - // Add the catchpad handlers to the possible destinations. - for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { - UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob); - // For MSVC++ and the CLR, catchblocks are funclets and need prologues. - if (IsMSVCCXX || IsCoreCLR) - UnwindDests.back().first->setIsEHFuncletEntry(); - if (!IsSEH) - UnwindDests.back().first->setIsEHScopeEntry(); - } - NewEHPadBB = CatchSwitch->getUnwindDest(); - } else { - continue; - } - - BranchProbabilityInfo *BPI = FuncInfo.BPI; - if (BPI && NewEHPadBB) - Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); - EHPadBB = NewEHPadBB; - } - return true; -} - +bool IRTranslator::findUnwindDestinations( + const BasicBlock *EHPadBB, + BranchProbability Prob, + SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> + &UnwindDests) { + EHPersonality Personality = classifyEHPersonality( + EHPadBB->getParent()->getFunction().getPersonalityFn()); + bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; + bool IsCoreCLR = Personality == EHPersonality::CoreCLR; + bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX; + bool IsSEH = isAsynchronousEHPersonality(Personality); + + if (IsWasmCXX) { + // Ignore this for now. + return false; + } + + while (EHPadBB) { + const Instruction *Pad = EHPadBB->getFirstNonPHI(); + BasicBlock *NewEHPadBB = nullptr; + if (isa<LandingPadInst>(Pad)) { + // Stop on landingpads. They are not funclets. + UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob); + break; + } + if (isa<CleanupPadInst>(Pad)) { + // Stop on cleanup pads. Cleanups are always funclet entries for all known + // personalities. + UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob); + UnwindDests.back().first->setIsEHScopeEntry(); + UnwindDests.back().first->setIsEHFuncletEntry(); + break; + } + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + // Add the catchpad handlers to the possible destinations. + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob); + // For MSVC++ and the CLR, catchblocks are funclets and need prologues. + if (IsMSVCCXX || IsCoreCLR) + UnwindDests.back().first->setIsEHFuncletEntry(); + if (!IsSEH) + UnwindDests.back().first->setIsEHScopeEntry(); + } + NewEHPadBB = CatchSwitch->getUnwindDest(); + } else { + continue; + } + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (BPI && NewEHPadBB) + Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); + EHPadBB = NewEHPadBB; + } + return true; +} + bool IRTranslator::translateInvoke(const User &U, MachineIRBuilder &MIRBuilder) { const InvokeInst &I = cast<InvokeInst>(U); @@ -2438,7 +2438,7 @@ bool IRTranslator::translateInvoke(const User &U, return false; // FIXME: support Windows exception handling. - if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI())) + if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI())) return false; // Emit the actual call, bracketed by EH_LABELs so that the MF knows about @@ -2452,26 +2452,26 @@ bool IRTranslator::translateInvoke(const User &U, MCSymbol *EndSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); - SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; - BranchProbabilityInfo *BPI = FuncInfo.BPI; - MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB(); - BranchProbability EHPadBBProb = - BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB) - : BranchProbability::getZero(); - - if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests)) - return false; - + SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; + BranchProbabilityInfo *BPI = FuncInfo.BPI; + MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB(); + BranchProbability EHPadBBProb = + BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB) + : BranchProbability::getZero(); + + if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests)) + return false; + MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB), &ReturnMBB = getMBB(*ReturnBB); - // Update successor info. - addSuccessorWithProb(InvokeMBB, &ReturnMBB); - for (auto &UnwindDest : UnwindDests) { - UnwindDest.first->setIsEHPad(); - addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); - } - InvokeMBB->normalizeSuccProbs(); - + // Update successor info. + addSuccessorWithProb(InvokeMBB, &ReturnMBB); + for (auto &UnwindDest : UnwindDests) { + UnwindDest.first->setIsEHPad(); + addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); + } + InvokeMBB->normalizeSuccProbs(); + MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol); MIRBuilder.buildBr(ReturnMBB); return true; @@ -2511,12 +2511,12 @@ bool IRTranslator::translateLandingPad(const User &U, MIRBuilder.buildInstr(TargetOpcode::EH_LABEL) .addSym(MF->addLandingPad(&MBB)); - // If the unwinder does not preserve all registers, ensure that the - // function marks the clobbered registers as used. - const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); - if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF)) - MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask); - + // If the unwinder does not preserve all registers, ensure that the + // function marks the clobbered registers as used. + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF)) + MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask); + LLT Ty = getLLTForType(*LP.getType(), *DL); Register Undef = MRI->createGenericVirtualRegister(Ty); MIRBuilder.buildUndef(Undef); @@ -2855,8 +2855,8 @@ bool IRTranslator::translate(const Instruction &Inst) { // We only emit constants into the entry block from here. To prevent jumpy // debug behaviour set the line to 0. if (const DebugLoc &DL = Inst.getDebugLoc()) - EntryBuilder->setDebugLoc(DILocation::get( - Inst.getContext(), 0, 0, DL.getScope(), DL.getInlinedAt())); + EntryBuilder->setDebugLoc(DILocation::get( + Inst.getContext(), 0, 0, DL.getScope(), DL.getInlinedAt())); else EntryBuilder->setDebugLoc(DebugLoc()); @@ -2934,57 +2934,57 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { } void IRTranslator::finalizeBasicBlock() { - for (auto &BTB : SL->BitTestCases) { - // Emit header first, if it wasn't already emitted. - if (!BTB.Emitted) - emitBitTestHeader(BTB, BTB.Parent); - - BranchProbability UnhandledProb = BTB.Prob; - for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) { - UnhandledProb -= BTB.Cases[j].ExtraProb; - // Set the current basic block to the mbb we wish to insert the code into - MachineBasicBlock *MBB = BTB.Cases[j].ThisBB; - // If all cases cover a contiguous range, it is not necessary to jump to - // the default block after the last bit test fails. This is because the - // range check during bit test header creation has guaranteed that every - // case here doesn't go outside the range. In this case, there is no need - // to perform the last bit test, as it will always be true. Instead, make - // the second-to-last bit-test fall through to the target of the last bit - // test, and delete the last bit test. - - MachineBasicBlock *NextMBB; - if (BTB.ContiguousRange && j + 2 == ej) { - // Second-to-last bit-test with contiguous range: fall through to the - // target of the final bit test. - NextMBB = BTB.Cases[j + 1].TargetBB; - } else if (j + 1 == ej) { - // For the last bit test, fall through to Default. - NextMBB = BTB.Default; - } else { - // Otherwise, fall through to the next bit test. - NextMBB = BTB.Cases[j + 1].ThisBB; - } - - emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB); - - // FIXME delete this block below? - if (BTB.ContiguousRange && j + 2 == ej) { - // Since we're not going to use the final bit test, remove it. - BTB.Cases.pop_back(); - break; - } - } - // This is "default" BB. We have two jumps to it. From "header" BB and from - // last "case" BB, unless the latter was skipped. - CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(), - BTB.Default->getBasicBlock()}; - addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent); - if (!BTB.ContiguousRange) { - addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB); - } - } - SL->BitTestCases.clear(); - + for (auto &BTB : SL->BitTestCases) { + // Emit header first, if it wasn't already emitted. + if (!BTB.Emitted) + emitBitTestHeader(BTB, BTB.Parent); + + BranchProbability UnhandledProb = BTB.Prob; + for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) { + UnhandledProb -= BTB.Cases[j].ExtraProb; + // Set the current basic block to the mbb we wish to insert the code into + MachineBasicBlock *MBB = BTB.Cases[j].ThisBB; + // If all cases cover a contiguous range, it is not necessary to jump to + // the default block after the last bit test fails. This is because the + // range check during bit test header creation has guaranteed that every + // case here doesn't go outside the range. In this case, there is no need + // to perform the last bit test, as it will always be true. Instead, make + // the second-to-last bit-test fall through to the target of the last bit + // test, and delete the last bit test. + + MachineBasicBlock *NextMBB; + if (BTB.ContiguousRange && j + 2 == ej) { + // Second-to-last bit-test with contiguous range: fall through to the + // target of the final bit test. + NextMBB = BTB.Cases[j + 1].TargetBB; + } else if (j + 1 == ej) { + // For the last bit test, fall through to Default. + NextMBB = BTB.Default; + } else { + // Otherwise, fall through to the next bit test. + NextMBB = BTB.Cases[j + 1].ThisBB; + } + + emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB); + + // FIXME delete this block below? + if (BTB.ContiguousRange && j + 2 == ej) { + // Since we're not going to use the final bit test, remove it. + BTB.Cases.pop_back(); + break; + } + } + // This is "default" BB. We have two jumps to it. From "header" BB and from + // last "case" BB, unless the latter was skipped. + CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(), + BTB.Default->getBasicBlock()}; + addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent); + if (!BTB.ContiguousRange) { + addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB); + } + } + SL->BitTestCases.clear(); + for (auto &JTCase : SL->JTCases) { // Emit header first, if it wasn't already emitted. if (!JTCase.first.Emitted) @@ -2993,10 +2993,10 @@ void IRTranslator::finalizeBasicBlock() { emitJumpTable(JTCase.second, JTCase.second.MBB); } SL->JTCases.clear(); - - for (auto &SwCase : SL->SwitchCases) - emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder); - SL->SwitchCases.clear(); + + for (auto &SwCase : SL->SwitchCases) + emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder); + SL->SwitchCases.clear(); } void IRTranslator::finalizeFunction() { @@ -3058,24 +3058,24 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MRI = &MF->getRegInfo(); DL = &F.getParent()->getDataLayout(); ORE = std::make_unique<OptimizationRemarkEmitter>(&F); - const TargetMachine &TM = MF->getTarget(); - TM.resetTargetOptions(F); - EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F); + const TargetMachine &TM = MF->getTarget(); + TM.resetTargetOptions(F); + EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F); FuncInfo.MF = MF; - if (EnableOpts) - FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); - else - FuncInfo.BPI = nullptr; - - FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF); - + if (EnableOpts) + FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + else + FuncInfo.BPI = nullptr; + + FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF); + const auto &TLI = *MF->getSubtarget().getTargetLowering(); - + SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo); SL->init(TLI, TM, *DL); - + assert(PendingPHIs.empty() && "stale PHIs"); if (!DL->isLittleEndian()) { @@ -3142,7 +3142,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { } } - if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) { + if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); R << "unable to lower arguments: " << ore::NV("Prototype", F.getType()); diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index bb4d41cfd6..8bdf9f8862 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -562,11 +562,11 @@ bool InlineAsmLowering::lowerInlineAsm( } unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs); - if (OpInfo.Regs.front().isVirtual()) { - // Put the register class of the virtual registers in the flag word. - const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front()); - Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); - } + if (OpInfo.Regs.front().isVirtual()) { + // Put the register class of the virtual registers in the flag word. + const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front()); + Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); + } Inst.addImm(Flag); if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder)) return false; @@ -662,7 +662,7 @@ bool InlineAsmLowering::lowerAsmOperandForConstraint( default: return false; case 'i': // Simple Integer or Relocatable Constant - case 'n': // immediate integer with a known value. + case 'n': // immediate integer with a known value. if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { assert(CI->getBitWidth() <= 64 && "expected immediate to fit into 64-bits"); diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 25fae54871..bbd09edaf1 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -41,7 +41,7 @@ static cl::opt<std::string> cl::desc("Record GlobalISel rule coverage files of this " "prefix if instrumentation was generated")); #else -static const std::string CoveragePrefix; +static const std::string CoveragePrefix; #endif char InstructionSelect::ID = 0; diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 4fec9e628d..1f39b5bf2c 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -38,7 +38,7 @@ bool InstructionSelector::isOperandImmEqual( const MachineRegisterInfo &MRI) const { if (MO.isReg() && MO.getReg()) if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI)) - return VRegVal->Value.getSExtValue() == Value; + return VRegVal->Value.getSExtValue() == Value; return false; } diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 1993f60332..5d2979e053 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -10,17 +10,17 @@ // //===----------------------------------------------------------------------===// -// Enable optimizations to work around MSVC debug mode bug in 32-bit: -// https://developercommunity.visualstudio.com/content/problem/1179643/msvc-copies-overaligned-non-trivially-copyable-par.html -// FIXME: Remove this when the issue is closed. -#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86) -// We have to disable runtime checks in order to enable optimizations. This is -// done for the entire file because the problem is actually observed in STL -// template functions. -#pragma runtime_checks("", off) -#pragma optimize("gs", on) -#endif - +// Enable optimizations to work around MSVC debug mode bug in 32-bit: +// https://developercommunity.visualstudio.com/content/problem/1179643/msvc-copies-overaligned-non-trivially-copyable-par.html +// FIXME: Remove this when the issue is closed. +#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86) +// We have to disable runtime checks in order to enable optimizations. This is +// done for the entire file because the problem is actually observed in STL +// template functions. +#pragma runtime_checks("", off) +#pragma optimize("gs", on) +#endif + #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" using namespace llvm; @@ -35,7 +35,7 @@ LegalityPredicates::typeInSet(unsigned TypeIdx, std::initializer_list<LLT> TypesInit) { SmallVector<LLT, 4> Types = TypesInit; return [=](const LegalityQuery &Query) { - return llvm::is_contained(Types, Query.Types[TypeIdx]); + return llvm::is_contained(Types, Query.Types[TypeIdx]); }; } @@ -45,7 +45,7 @@ LegalityPredicate LegalityPredicates::typePairInSet( SmallVector<std::pair<LLT, LLT>, 4> Types = TypesInit; return [=](const LegalityQuery &Query) { std::pair<LLT, LLT> Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1]}; - return llvm::is_contained(Types, Match); + return llvm::is_contained(Types, Match); }; } @@ -57,10 +57,10 @@ LegalityPredicate LegalityPredicates::typePairAndMemDescInSet( TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1], Query.MMODescrs[MMOIdx].SizeInBits, Query.MMODescrs[MMOIdx].AlignInBits}; - return llvm::any_of(TypesAndMemDesc, - [=](const TypePairAndMemDesc &Entry) -> bool { - return Match.isCompatible(Entry); - }); + return llvm::any_of(TypesAndMemDesc, + [=](const TypePairAndMemDesc &Entry) -> bool { + return Match.isCompatible(Entry); + }); }; } diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizeMutations.cpp index f3ba3f0801..a5169a9239 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -43,16 +43,16 @@ LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx, }; } -LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx, - unsigned FromTypeIdx) { - return [=](const LegalityQuery &Query) { - const LLT OldTy = Query.Types[TypeIdx]; - const LLT NewTy = Query.Types[FromTypeIdx]; - const LLT NewEltTy = LLT::scalar(NewTy.getScalarSizeInBits()); - return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy)); - }; -} - +LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx, + unsigned FromTypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT OldTy = Query.Types[TypeIdx]; + const LLT NewTy = Query.Types[FromTypeIdx]; + const LLT NewEltTy = LLT::scalar(NewTy.getScalarSizeInBits()); + return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy)); + }; +} + LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min) { return [=](const LegalityQuery &Query) { diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Legalizer.cpp index 5ba9367cac..c0629d955d 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -284,7 +284,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, WrapperObserver)) { WorkListObserver.printNewInstrs(); for (auto *DeadMI : DeadInstructions) { - LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI); + LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI); RemoveDeadInstFromLists(DeadMI); DeadMI->eraseFromParentAndMarkDBGValuesForRemoval(); } diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 66871ca3b9..995abb85d0 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -16,7 +16,7 @@ #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -30,7 +30,7 @@ using namespace llvm; using namespace LegalizeActions; -using namespace MIPatternMatch; +using namespace MIPatternMatch; /// Try to break down \p OrigTy into \p NarrowTy sized pieces. /// @@ -77,8 +77,8 @@ static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) { return Type::getFloatTy(Ctx); case 64: return Type::getDoubleTy(Ctx); - case 80: - return Type::getX86_FP80Ty(Ctx); + case 80: + return Type::getX86_FP80Ty(Ctx); case 128: return Type::getFP128Ty(Ctx); default: @@ -90,15 +90,15 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &Builder) : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()), - LI(*MF.getSubtarget().getLegalizerInfo()), - TLI(*MF.getSubtarget().getTargetLowering()) { } + LI(*MF.getSubtarget().getLegalizerInfo()), + TLI(*MF.getSubtarget().getTargetLowering()) { } LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, GISelChangeObserver &Observer, MachineIRBuilder &B) - : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI), - TLI(*MF.getSubtarget().getTargetLowering()) { } - + : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI), + TLI(*MF.getSubtarget().getTargetLowering()) { } + LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { LLVM_DEBUG(dbgs() << "Legalizing: " << MI); @@ -240,20 +240,20 @@ void LegalizerHelper::insertParts(Register DstReg, } } -/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. +/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. static void getUnmergeResults(SmallVectorImpl<Register> &Regs, const MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); - const int StartIdx = Regs.size(); + const int StartIdx = Regs.size(); const int NumResults = MI.getNumOperands() - 1; - Regs.resize(Regs.size() + NumResults); + Regs.resize(Regs.size() + NumResults); for (int I = 0; I != NumResults; ++I) - Regs[StartIdx + I] = MI.getOperand(I).getReg(); + Regs[StartIdx + I] = MI.getOperand(I).getReg(); } -void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, - LLT GCDTy, Register SrcReg) { +void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, + LLT GCDTy, Register SrcReg) { LLT SrcTy = MRI.getType(SrcReg); if (SrcTy == GCDTy) { // If the source already evenly divides the result type, we don't need to do @@ -264,13 +264,13 @@ void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); getUnmergeResults(Parts, *Unmerge); } -} +} -LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, - LLT NarrowTy, Register SrcReg) { - LLT SrcTy = MRI.getType(SrcReg); - LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); - extractGCDType(Parts, GCDTy, SrcReg); +LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, + LLT NarrowTy, Register SrcReg) { + LLT SrcTy = MRI.getType(SrcReg); + LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); + extractGCDType(Parts, GCDTy, SrcReg); return GCDTy; } @@ -384,14 +384,14 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy, } if (LCMTy.isVector()) { - unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits(); - SmallVector<Register, 8> UnmergeDefs(NumDefs); - UnmergeDefs[0] = DstReg; - for (unsigned I = 1; I != NumDefs; ++I) - UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy); - - MIRBuilder.buildUnmerge(UnmergeDefs, - MIRBuilder.buildMerge(LCMTy, RemergeRegs)); + unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits(); + SmallVector<Register, 8> UnmergeDefs(NumDefs); + UnmergeDefs[0] = DstReg; + for (unsigned I = 1; I != NumDefs; ++I) + UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy); + + MIRBuilder.buildUnmerge(UnmergeDefs, + MIRBuilder.buildMerge(LCMTy, RemergeRegs)); return; } @@ -399,20 +399,20 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy, } static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { -#define RTLIBCASE_INT(LibcallPrefix) \ - do { \ - switch (Size) { \ - case 32: \ - return RTLIB::LibcallPrefix##32; \ - case 64: \ - return RTLIB::LibcallPrefix##64; \ - case 128: \ - return RTLIB::LibcallPrefix##128; \ - default: \ - llvm_unreachable("unexpected size"); \ - } \ - } while (0) - +#define RTLIBCASE_INT(LibcallPrefix) \ + do { \ + switch (Size) { \ + case 32: \ + return RTLIB::LibcallPrefix##32; \ + case 64: \ + return RTLIB::LibcallPrefix##64; \ + case 128: \ + return RTLIB::LibcallPrefix##128; \ + default: \ + llvm_unreachable("unexpected size"); \ + } \ + } while (0) + #define RTLIBCASE(LibcallPrefix) \ do { \ switch (Size) { \ @@ -420,8 +420,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { return RTLIB::LibcallPrefix##32; \ case 64: \ return RTLIB::LibcallPrefix##64; \ - case 80: \ - return RTLIB::LibcallPrefix##80; \ + case 80: \ + return RTLIB::LibcallPrefix##80; \ case 128: \ return RTLIB::LibcallPrefix##128; \ default: \ @@ -431,15 +431,15 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { case TargetOpcode::G_SDIV: - RTLIBCASE_INT(SDIV_I); + RTLIBCASE_INT(SDIV_I); case TargetOpcode::G_UDIV: - RTLIBCASE_INT(UDIV_I); + RTLIBCASE_INT(UDIV_I); case TargetOpcode::G_SREM: - RTLIBCASE_INT(SREM_I); + RTLIBCASE_INT(SREM_I); case TargetOpcode::G_UREM: - RTLIBCASE_INT(UREM_I); + RTLIBCASE_INT(UREM_I); case TargetOpcode::G_CTLZ_ZERO_UNDEF: - RTLIBCASE_INT(CTLZ_I); + RTLIBCASE_INT(CTLZ_I); case TargetOpcode::G_FADD: RTLIBCASE(ADD_F); case TargetOpcode::G_FSUB: @@ -482,16 +482,16 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(RINT_F); case TargetOpcode::G_FNEARBYINT: RTLIBCASE(NEARBYINT_F); - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: - RTLIBCASE(ROUNDEVEN_F); + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + RTLIBCASE(ROUNDEVEN_F); } llvm_unreachable("Unknown libcall function"); } /// True if an instruction is in tail position in its caller. Intended for /// legalizing libcalls as tail calls when possible. -static bool isLibCallInTailPosition(const TargetInstrInfo &TII, - MachineInstr &MI) { +static bool isLibCallInTailPosition(const TargetInstrInfo &TII, + MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const Function &F = MBB.getParent()->getFunction(); @@ -566,7 +566,7 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, SmallVector<CallLowering::ArgInfo, 3> Args; // Add all the args, except for the last which is an imm denoting 'tail'. - for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) { + for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) { Register Reg = MI.getOperand(i).getReg(); // Need derive an IR type for call lowering. @@ -582,14 +582,14 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); RTLIB::Libcall RTLibcall; - switch (MI.getOpcode()) { - case TargetOpcode::G_MEMCPY: + switch (MI.getOpcode()) { + case TargetOpcode::G_MEMCPY: RTLibcall = RTLIB::MEMCPY; break; - case TargetOpcode::G_MEMMOVE: - RTLibcall = RTLIB::MEMMOVE; - break; - case TargetOpcode::G_MEMSET: + case TargetOpcode::G_MEMMOVE: + RTLibcall = RTLIB::MEMMOVE; + break; + case TargetOpcode::G_MEMSET: RTLibcall = RTLIB::MEMSET; break; default: @@ -601,8 +601,8 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); Info.Callee = MachineOperand::CreateES(Name); Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)); - Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() && - isLibCallInTailPosition(MIRBuilder.getTII(), MI); + Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() && + isLibCallInTailPosition(MIRBuilder.getTII(), MI); std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); if (!CLI.lowerCall(MIRBuilder, Info)) @@ -695,11 +695,11 @@ LegalizerHelper::libcall(MachineInstr &MI) { case TargetOpcode::G_FMAXNUM: case TargetOpcode::G_FSQRT: case TargetOpcode::G_FRINT: - case TargetOpcode::G_FNEARBYINT: - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); - if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { - LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); + if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { + LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); return UnableToLegalize; } auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); @@ -748,14 +748,14 @@ LegalizerHelper::libcall(MachineInstr &MI) { return Status; break; } - case TargetOpcode::G_MEMCPY: - case TargetOpcode::G_MEMMOVE: - case TargetOpcode::G_MEMSET: { - LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI); - MI.eraseFromParent(); - return Result; - } + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMSET: { + LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI); + MI.eraseFromParent(); + return Result; } + } MI.eraseFromParent(); return Legalized; @@ -935,7 +935,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, case TargetOpcode::G_INSERT: return narrowScalarInsert(MI, TypeIdx, NarrowTy); case TargetOpcode::G_LOAD: { - auto &MMO = **MI.memoperands_begin(); + auto &MMO = **MI.memoperands_begin(); Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); if (DstTy.isVector()) @@ -959,15 +959,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); auto &MMO = **MI.memoperands_begin(); - unsigned MemSize = MMO.getSizeInBits(); - - if (MemSize == NarrowSize) { + unsigned MemSize = MMO.getSizeInBits(); + + if (MemSize == NarrowSize) { MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); - } else if (MemSize < NarrowSize) { + } else if (MemSize < NarrowSize) { MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO); - } else if (MemSize > NarrowSize) { - // FIXME: Need to split the load. - return UnableToLegalize; + } else if (MemSize > NarrowSize) { + // FIXME: Need to split the load. + return UnableToLegalize; } if (ZExt) @@ -1063,11 +1063,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_PHI: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + unsigned NumParts = SizeOp0 / NarrowSize; SmallVector<Register, 2> DstRegs(NumParts); SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2); @@ -1248,7 +1248,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_PTR_ADD: + case TargetOpcode::G_PTR_ADD: case TargetOpcode::G_PTRMASK: { if (TypeIdx != 1) return UnableToLegalize; @@ -1257,17 +1257,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_FPTOUI: - case TargetOpcode::G_FPTOSI: - return narrowScalarFPTOI(MI, TypeIdx, NarrowTy); - case TargetOpcode::G_FPEXT: - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT); - Observer.changedInstr(MI); - return Legalized; - } + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_FPTOSI: + return narrowScalarFPTOI(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_FPEXT: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT); + Observer.changedInstr(MI); + return Legalized; + } } Register LegalizerHelper::coerceToScalar(Register Val) { @@ -1328,7 +1328,7 @@ void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); + MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); } void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, @@ -1496,40 +1496,40 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, return Legalized; } -Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) { - Register WideReg = MRI.createGenericVirtualRegister(WideTy); - LLT OrigTy = MRI.getType(OrigReg); - LLT LCMTy = getLCMType(WideTy, OrigTy); - - const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits(); - const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits(); - - Register UnmergeSrc = WideReg; - - // Create a merge to the LCM type, padding with undef - // %0:_(<3 x s32>) = G_FOO => <4 x s32> - // => - // %1:_(<4 x s32>) = G_FOO - // %2:_(<4 x s32>) = G_IMPLICIT_DEF - // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2 - // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3 - if (NumMergeParts > 1) { - Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0); - SmallVector<Register, 8> MergeParts(NumMergeParts, Undef); - MergeParts[0] = WideReg; - UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0); - } - - // Unmerge to the original register and pad with dead defs. - SmallVector<Register, 8> UnmergeResults(NumUnmergeParts); - UnmergeResults[0] = OrigReg; - for (int I = 1; I != NumUnmergeParts; ++I) - UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy); - - MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc); - return WideReg; -} - +Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) { + Register WideReg = MRI.createGenericVirtualRegister(WideTy); + LLT OrigTy = MRI.getType(OrigReg); + LLT LCMTy = getLCMType(WideTy, OrigTy); + + const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits(); + const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits(); + + Register UnmergeSrc = WideReg; + + // Create a merge to the LCM type, padding with undef + // %0:_(<3 x s32>) = G_FOO => <4 x s32> + // => + // %1:_(<4 x s32>) = G_FOO + // %2:_(<4 x s32>) = G_IMPLICIT_DEF + // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2 + // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3 + if (NumMergeParts > 1) { + Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0); + SmallVector<Register, 8> MergeParts(NumMergeParts, Undef); + MergeParts[0] = WideReg; + UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0); + } + + // Unmerge to the original register and pad with dead defs. + SmallVector<Register, 8> UnmergeResults(NumUnmergeParts); + UnmergeResults[0] = OrigReg; + for (int I = 1; I != NumUnmergeParts; ++I) + UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy); + + MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc); + return WideReg; +} + LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { @@ -1599,60 +1599,60 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc); - // Create a sequence of unmerges and merges to the original results. Since we - // may have widened the source, we will need to pad the results with dead defs - // to cover the source register. - // e.g. widen s48 to s64: - // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96) + // Create a sequence of unmerges and merges to the original results. Since we + // may have widened the source, we will need to pad the results with dead defs + // to cover the source register. + // e.g. widen s48 to s64: + // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96) // // => - // %4:_(s192) = G_ANYEXT %0:_(s96) - // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge - // ; unpack to GCD type, with extra dead defs - // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64) - // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64) - // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64) - // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination - // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination - const LLT GCDTy = getGCDType(WideTy, DstTy); + // %4:_(s192) = G_ANYEXT %0:_(s96) + // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge + // ; unpack to GCD type, with extra dead defs + // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64) + // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64) + // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64) + // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination + // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination + const LLT GCDTy = getGCDType(WideTy, DstTy); const int NumUnmerge = Unmerge->getNumOperands() - 1; - const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits(); - - // Directly unmerge to the destination without going through a GCD type - // if possible - if (PartsPerRemerge == 1) { - const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); - - for (int I = 0; I != NumUnmerge; ++I) { - auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); - - for (int J = 0; J != PartsPerUnmerge; ++J) { - int Idx = I * PartsPerUnmerge + J; - if (Idx < NumDst) - MIB.addDef(MI.getOperand(Idx).getReg()); - else { - // Create dead def for excess components. - MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); - } + const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits(); + + // Directly unmerge to the destination without going through a GCD type + // if possible + if (PartsPerRemerge == 1) { + const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); + + for (int I = 0; I != NumUnmerge; ++I) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + + for (int J = 0; J != PartsPerUnmerge; ++J) { + int Idx = I * PartsPerUnmerge + J; + if (Idx < NumDst) + MIB.addDef(MI.getOperand(Idx).getReg()); + else { + // Create dead def for excess components. + MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + } } - - MIB.addUse(Unmerge.getReg(I)); - } - } else { - SmallVector<Register, 16> Parts; - for (int J = 0; J != NumUnmerge; ++J) - extractGCDType(Parts, GCDTy, Unmerge.getReg(J)); - - SmallVector<Register, 8> RemergeParts; - for (int I = 0; I != NumDst; ++I) { - for (int J = 0; J < PartsPerRemerge; ++J) { - const int Idx = I * PartsPerRemerge + J; - RemergeParts.emplace_back(Parts[Idx]); - } - - MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts); - RemergeParts.clear(); + + MIB.addUse(Unmerge.getReg(I)); } + } else { + SmallVector<Register, 16> Parts; + for (int J = 0; J != NumUnmerge; ++J) + extractGCDType(Parts, GCDTy, Unmerge.getReg(J)); + + SmallVector<Register, 8> RemergeParts; + for (int I = 0; I != NumDst; ++I) { + for (int J = 0; J < PartsPerRemerge; ++J) { + const int Idx = I * PartsPerRemerge + J; + RemergeParts.emplace_back(Parts[Idx]); + } + + MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts); + RemergeParts.clear(); + } } MI.eraseFromParent(); @@ -1702,7 +1702,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { Src = MIRBuilder.buildAnyExt(WideTy, Src); ShiftTy = WideTy; - } + } auto LShr = MIRBuilder.buildLShr( ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); @@ -1740,7 +1740,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { - if (TypeIdx != 0 || WideTy.isVector()) + if (TypeIdx != 0 || WideTy.isVector()) return UnableToLegalize; Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); @@ -1750,45 +1750,45 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult -LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx, - LLT WideTy) { - if (TypeIdx == 1) - return UnableToLegalize; // TODO - unsigned Op = MI.getOpcode(); - unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO - ? TargetOpcode::G_ADD - : TargetOpcode::G_SUB; - unsigned ExtOpcode = - Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO - ? TargetOpcode::G_ZEXT - : TargetOpcode::G_SEXT; - auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)}); - auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)}); - // Do the arithmetic in the larger type. - auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}); - LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); - auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp); - auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp}); - // There is no overflow if the ExtOp is the same as NewOp. - MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp); - // Now trunc the NewOp to the original result. - MIRBuilder.buildTrunc(MI.getOperand(0), NewOp); - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, - LLT WideTy) { +LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + if (TypeIdx == 1) + return UnableToLegalize; // TODO + unsigned Op = MI.getOpcode(); + unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO + ? TargetOpcode::G_ADD + : TargetOpcode::G_SUB; + unsigned ExtOpcode = + Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO + ? TargetOpcode::G_ZEXT + : TargetOpcode::G_SEXT; + auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)}); + auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)}); + // Do the arithmetic in the larger type. + auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}); + LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); + auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp); + auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp}); + // There is no overflow if the ExtOp is the same as NewOp. + MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp); + // Now trunc the NewOp to the original result. + MIRBuilder.buildTrunc(MI.getOperand(0), NewOp); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT || - MI.getOpcode() == TargetOpcode::G_SSUBSAT || - MI.getOpcode() == TargetOpcode::G_SSHLSAT; - bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT || - MI.getOpcode() == TargetOpcode::G_USHLSAT; + MI.getOpcode() == TargetOpcode::G_SSUBSAT || + MI.getOpcode() == TargetOpcode::G_SSHLSAT; + bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT || + MI.getOpcode() == TargetOpcode::G_USHLSAT; // We can convert this to: // 1. Any extend iN to iM // 2. SHL by M-N - // 3. [US][ADD|SUB|SHL]SAT + // 3. [US][ADD|SUB|SHL]SAT // 4. L/ASHR by M-N // // It may be more efficient to lower this to a min and a max operation in @@ -1799,14 +1799,14 @@ LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, unsigned NewBits = WideTy.getScalarSizeInBits(); unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits(); - // Shifts must zero-extend the RHS to preserve the unsigned quantity, and - // must not left shift the RHS to preserve the shift amount. + // Shifts must zero-extend the RHS to preserve the unsigned quantity, and + // must not left shift the RHS to preserve the shift amount. auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1)); - auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2)) - : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); + auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2)) + : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount); auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK); - auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK); + auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK); auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {ShiftL, ShiftR}, MI.getFlags()); @@ -1834,18 +1834,18 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return widenScalarMergeValues(MI, TypeIdx, WideTy); case TargetOpcode::G_UNMERGE_VALUES: return widenScalarUnmergeValues(MI, TypeIdx, WideTy); - case TargetOpcode::G_SADDO: - case TargetOpcode::G_SSUBO: + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SSUBO: case TargetOpcode::G_UADDO: - case TargetOpcode::G_USUBO: - return widenScalarAddoSubo(MI, TypeIdx, WideTy); + case TargetOpcode::G_USUBO: + return widenScalarAddoSubo(MI, TypeIdx, WideTy); case TargetOpcode::G_SADDSAT: case TargetOpcode::G_SSUBSAT: - case TargetOpcode::G_SSHLSAT: + case TargetOpcode::G_SSHLSAT: case TargetOpcode::G_UADDSAT: case TargetOpcode::G_USUBSAT: - case TargetOpcode::G_USHLSAT: - return widenScalarAddSubShlSat(MI, TypeIdx, WideTy); + case TargetOpcode::G_USHLSAT: + return widenScalarAddSubShlSat(MI, TypeIdx, WideTy); case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: @@ -2038,22 +2038,22 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; case TargetOpcode::G_SITOFP: Observer.changingInstr(MI); - - if (TypeIdx == 0) - widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); - else - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); - + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_UITOFP: Observer.changingInstr(MI); - - if (TypeIdx == 0) - widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); - else - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); - + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_LOAD: @@ -2069,7 +2069,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return UnableToLegalize; LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (!Ty.isScalar()) + if (!Ty.isScalar()) return UnableToLegalize; Observer.changingInstr(MI); @@ -2267,7 +2267,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FPOW: case TargetOpcode::G_INTRINSIC_TRUNC: case TargetOpcode::G_INTRINSIC_ROUND: - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: assert(TypeIdx == 0); Observer.changingInstr(MI); @@ -2277,15 +2277,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_FPOWI: { - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); - widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); - Observer.changedInstr(MI); - return Legalized; - } + case TargetOpcode::G_FPOWI: { + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_INTTOPTR: if (TypeIdx != 1) return UnableToLegalize; @@ -2312,7 +2312,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { // Avoid changing the result vector type if the source element type was // requested. if (TypeIdx == 1) { - MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); + MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); } else { widenScalarDst(MI, WideTy, 0); } @@ -2415,377 +2415,377 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) { return UnableToLegalize; } -/// Figure out the bit offset into a register when coercing a vector index for -/// the wide element type. This is only for the case when promoting vector to -/// one with larger elements. -// -/// -/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) -/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) -static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, - Register Idx, - unsigned NewEltSize, - unsigned OldEltSize) { - const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); - LLT IdxTy = B.getMRI()->getType(Idx); - - // Now figure out the amount we need to shift to get the target bits. - auto OffsetMask = B.buildConstant( - IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio)); - auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask); - return B.buildShl(IdxTy, OffsetIdx, - B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0); -} - -/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this -/// is casting to a vector with a smaller element size, perform multiple element -/// extracts and merge the results. If this is coercing to a vector with larger -/// elements, index the bitcasted vector and extract the target element with bit -/// operations. This is intended to force the indexing in the native register -/// size for architectures that can dynamically index the register file. -LegalizerHelper::LegalizeResult -LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, - LLT CastTy) { - if (TypeIdx != 1) - return UnableToLegalize; - - Register Dst = MI.getOperand(0).getReg(); - Register SrcVec = MI.getOperand(1).getReg(); - Register Idx = MI.getOperand(2).getReg(); - LLT SrcVecTy = MRI.getType(SrcVec); - LLT IdxTy = MRI.getType(Idx); - - LLT SrcEltTy = SrcVecTy.getElementType(); - unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; - unsigned OldNumElts = SrcVecTy.getNumElements(); - - LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; - Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); - - const unsigned NewEltSize = NewEltTy.getSizeInBits(); - const unsigned OldEltSize = SrcEltTy.getSizeInBits(); - if (NewNumElts > OldNumElts) { - // Decreasing the vector element size - // - // e.g. i64 = extract_vector_elt x:v2i64, y:i32 - // => - // v4i32:castx = bitcast x:v2i64 - // - // i64 = bitcast - // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), - // (i32 (extract_vector_elt castx, (2 * y + 1))) - // - if (NewNumElts % OldNumElts != 0) - return UnableToLegalize; - - // Type of the intermediate result vector. - const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts; - LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy); - - auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt); - - SmallVector<Register, 8> NewOps(NewEltsPerOldElt); - auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK); - - for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { - auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I); - auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset); - auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx); - NewOps[I] = Elt.getReg(0); - } - - auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps); - MIRBuilder.buildBitcast(Dst, NewVec); - MI.eraseFromParent(); - return Legalized; - } - - if (NewNumElts < OldNumElts) { - if (NewEltSize % OldEltSize != 0) - return UnableToLegalize; - - // This only depends on powers of 2 because we use bit tricks to figure out - // the bit offset we need to shift to get the target element. A general - // expansion could emit division/multiply. - if (!isPowerOf2_32(NewEltSize / OldEltSize)) - return UnableToLegalize; - - // Increasing the vector element size. - // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx - // - // => - // - // %cast = G_BITCAST %vec - // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize) - // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx - // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) - // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) - // %elt_bits = G_LSHR %wide_elt, %offset_bits - // %elt = G_TRUNC %elt_bits - - const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); - auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); - - // Divide to get the index in the wider element type. - auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); - - Register WideElt = CastVec; - if (CastTy.isVector()) { - WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, - ScaledIdx).getReg(0); - } - - // Compute the bit offset into the register of the target element. - Register OffsetBits = getBitcastWiderVectorElementOffset( - MIRBuilder, Idx, NewEltSize, OldEltSize); - - // Shift the wide element to get the target element. - auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits); - MIRBuilder.buildTrunc(Dst, ExtractedBits); - MI.eraseFromParent(); - return Legalized; - } - - return UnableToLegalize; -} - -/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p -/// TargetReg, while preserving other bits in \p TargetReg. -/// -/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset) -static Register buildBitFieldInsert(MachineIRBuilder &B, - Register TargetReg, Register InsertReg, - Register OffsetBits) { - LLT TargetTy = B.getMRI()->getType(TargetReg); - LLT InsertTy = B.getMRI()->getType(InsertReg); - auto ZextVal = B.buildZExt(TargetTy, InsertReg); - auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits); - - // Produce a bitmask of the value to insert - auto EltMask = B.buildConstant( - TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(), - InsertTy.getSizeInBits())); - // Shift it into position - auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits); - auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask); - - // Clear out the bits in the wide element - auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask); - - // The value to insert has all zeros already, so stick it into the masked - // wide element. - return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0); -} - -/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this -/// is increasing the element size, perform the indexing in the target element -/// type, and use bit operations to insert at the element position. This is -/// intended for architectures that can dynamically index the register file and -/// want to force indexing in the native register size. -LegalizerHelper::LegalizeResult -LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, - LLT CastTy) { - if (TypeIdx != 0) - return UnableToLegalize; - - Register Dst = MI.getOperand(0).getReg(); - Register SrcVec = MI.getOperand(1).getReg(); - Register Val = MI.getOperand(2).getReg(); - Register Idx = MI.getOperand(3).getReg(); - - LLT VecTy = MRI.getType(Dst); - LLT IdxTy = MRI.getType(Idx); - - LLT VecEltTy = VecTy.getElementType(); - LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; - const unsigned NewEltSize = NewEltTy.getSizeInBits(); - const unsigned OldEltSize = VecEltTy.getSizeInBits(); - - unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; - unsigned OldNumElts = VecTy.getNumElements(); - - Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); - if (NewNumElts < OldNumElts) { - if (NewEltSize % OldEltSize != 0) - return UnableToLegalize; - - // This only depends on powers of 2 because we use bit tricks to figure out - // the bit offset we need to shift to get the target element. A general - // expansion could emit division/multiply. - if (!isPowerOf2_32(NewEltSize / OldEltSize)) - return UnableToLegalize; - - const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); - auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); - - // Divide to get the index in the wider element type. - auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); - - Register ExtractedElt = CastVec; - if (CastTy.isVector()) { - ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, - ScaledIdx).getReg(0); - } - - // Compute the bit offset into the register of the target element. - Register OffsetBits = getBitcastWiderVectorElementOffset( - MIRBuilder, Idx, NewEltSize, OldEltSize); - - Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt, - Val, OffsetBits); - if (CastTy.isVector()) { - InsertedElt = MIRBuilder.buildInsertVectorElement( - CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0); - } - - MIRBuilder.buildBitcast(Dst, InsertedElt); - MI.eraseFromParent(); - return Legalized; - } - - return UnableToLegalize; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerLoad(MachineInstr &MI) { - // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT - Register DstReg = MI.getOperand(0).getReg(); - Register PtrReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - auto &MMO = **MI.memoperands_begin(); - - if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { - if (MI.getOpcode() == TargetOpcode::G_LOAD) { - // This load needs splitting into power of 2 sized loads. - if (DstTy.isVector()) - return UnableToLegalize; - if (isPowerOf2_32(DstTy.getSizeInBits())) - return UnableToLegalize; // Don't know what we're being asked to do. - - // Our strategy here is to generate anyextending loads for the smaller - // types up to next power-2 result type, and then combine the two larger - // result values together, before truncating back down to the non-pow-2 - // type. - // E.g. v1 = i24 load => - // v2 = i32 zextload (2 byte) - // v3 = i32 load (1 byte) - // v4 = i32 shl v3, 16 - // v5 = i32 or v4, v2 - // v1 = i24 trunc v5 - // By doing this we generate the correct truncate which should get - // combined away as an artifact with a matching extend. - uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); - uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; - - MachineFunction &MF = MIRBuilder.getMF(); - MachineMemOperand *LargeMMO = - MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); - MachineMemOperand *SmallMMO = MF.getMachineMemOperand( - &MMO, LargeSplitSize / 8, SmallSplitSize / 8); - - LLT PtrTy = MRI.getType(PtrReg); - unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); - LLT AnyExtTy = LLT::scalar(AnyExtSize); - Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); - Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); - auto LargeLoad = MIRBuilder.buildLoadInstr( - TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO); - - auto OffsetCst = MIRBuilder.buildConstant( - LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); - auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), - *SmallMMO); - - auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); - auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); - auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); - MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); - MI.eraseFromParent(); - return Legalized; - } - - MIRBuilder.buildLoad(DstReg, PtrReg, MMO); - MI.eraseFromParent(); - return Legalized; - } - - if (DstTy.isScalar()) { - Register TmpReg = - MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); - MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); - switch (MI.getOpcode()) { - default: - llvm_unreachable("Unexpected opcode"); - case TargetOpcode::G_LOAD: - MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg); - break; - case TargetOpcode::G_SEXTLOAD: - MIRBuilder.buildSExt(DstReg, TmpReg); - break; - case TargetOpcode::G_ZEXTLOAD: - MIRBuilder.buildZExt(DstReg, TmpReg); - break; - } - - MI.eraseFromParent(); - return Legalized; - } - - return UnableToLegalize; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerStore(MachineInstr &MI) { - // Lower a non-power of 2 store into multiple pow-2 stores. - // E.g. split an i24 store into an i16 store + i8 store. - // We do this by first extending the stored value to the next largest power - // of 2 type, and then using truncating stores to store the components. - // By doing this, likewise with G_LOAD, generate an extend that can be - // artifact-combined away instead of leaving behind extracts. - Register SrcReg = MI.getOperand(0).getReg(); - Register PtrReg = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - MachineMemOperand &MMO = **MI.memoperands_begin(); - if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) - return UnableToLegalize; - if (SrcTy.isVector()) - return UnableToLegalize; - if (isPowerOf2_32(SrcTy.getSizeInBits())) - return UnableToLegalize; // Don't know what we're being asked to do. - - // Extend to the next pow-2. - const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); - auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); - - // Obtain the smaller value by shifting away the larger value. - uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); - uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; - auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); - auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); - - // Generate the PtrAdd and truncating stores. - LLT PtrTy = MRI.getType(PtrReg); - auto OffsetCst = MIRBuilder.buildConstant( - LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); - - MachineFunction &MF = MIRBuilder.getMF(); - MachineMemOperand *LargeMMO = - MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); - MachineMemOperand *SmallMMO = - MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); - MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); - MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); - MI.eraseFromParent(); - return Legalized; -} - +/// Figure out the bit offset into a register when coercing a vector index for +/// the wide element type. This is only for the case when promoting vector to +/// one with larger elements. +// +/// +/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) +/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) +static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, + Register Idx, + unsigned NewEltSize, + unsigned OldEltSize) { + const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); + LLT IdxTy = B.getMRI()->getType(Idx); + + // Now figure out the amount we need to shift to get the target bits. + auto OffsetMask = B.buildConstant( + IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio)); + auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask); + return B.buildShl(IdxTy, OffsetIdx, + B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0); +} + +/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this +/// is casting to a vector with a smaller element size, perform multiple element +/// extracts and merge the results. If this is coercing to a vector with larger +/// elements, index the bitcasted vector and extract the target element with bit +/// operations. This is intended to force the indexing in the native register +/// size for architectures that can dynamically index the register file. LegalizerHelper::LegalizeResult +LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + Register Dst = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register Idx = MI.getOperand(2).getReg(); + LLT SrcVecTy = MRI.getType(SrcVec); + LLT IdxTy = MRI.getType(Idx); + + LLT SrcEltTy = SrcVecTy.getElementType(); + unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; + unsigned OldNumElts = SrcVecTy.getNumElements(); + + LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; + Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); + + const unsigned NewEltSize = NewEltTy.getSizeInBits(); + const unsigned OldEltSize = SrcEltTy.getSizeInBits(); + if (NewNumElts > OldNumElts) { + // Decreasing the vector element size + // + // e.g. i64 = extract_vector_elt x:v2i64, y:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // + // i64 = bitcast + // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), + // (i32 (extract_vector_elt castx, (2 * y + 1))) + // + if (NewNumElts % OldNumElts != 0) + return UnableToLegalize; + + // Type of the intermediate result vector. + const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts; + LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy); + + auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt); + + SmallVector<Register, 8> NewOps(NewEltsPerOldElt); + auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK); + + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I); + auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset); + auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx); + NewOps[I] = Elt.getReg(0); + } + + auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps); + MIRBuilder.buildBitcast(Dst, NewVec); + MI.eraseFromParent(); + return Legalized; + } + + if (NewNumElts < OldNumElts) { + if (NewEltSize % OldEltSize != 0) + return UnableToLegalize; + + // This only depends on powers of 2 because we use bit tricks to figure out + // the bit offset we need to shift to get the target element. A general + // expansion could emit division/multiply. + if (!isPowerOf2_32(NewEltSize / OldEltSize)) + return UnableToLegalize; + + // Increasing the vector element size. + // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx + // + // => + // + // %cast = G_BITCAST %vec + // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize) + // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx + // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) + // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) + // %elt_bits = G_LSHR %wide_elt, %offset_bits + // %elt = G_TRUNC %elt_bits + + const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); + auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); + + // Divide to get the index in the wider element type. + auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); + + Register WideElt = CastVec; + if (CastTy.isVector()) { + WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, + ScaledIdx).getReg(0); + } + + // Compute the bit offset into the register of the target element. + Register OffsetBits = getBitcastWiderVectorElementOffset( + MIRBuilder, Idx, NewEltSize, OldEltSize); + + // Shift the wide element to get the target element. + auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits); + MIRBuilder.buildTrunc(Dst, ExtractedBits); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p +/// TargetReg, while preserving other bits in \p TargetReg. +/// +/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset) +static Register buildBitFieldInsert(MachineIRBuilder &B, + Register TargetReg, Register InsertReg, + Register OffsetBits) { + LLT TargetTy = B.getMRI()->getType(TargetReg); + LLT InsertTy = B.getMRI()->getType(InsertReg); + auto ZextVal = B.buildZExt(TargetTy, InsertReg); + auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits); + + // Produce a bitmask of the value to insert + auto EltMask = B.buildConstant( + TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(), + InsertTy.getSizeInBits())); + // Shift it into position + auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits); + auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask); + + // Clear out the bits in the wide element + auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask); + + // The value to insert has all zeros already, so stick it into the masked + // wide element. + return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0); +} + +/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this +/// is increasing the element size, perform the indexing in the target element +/// type, and use bit operations to insert at the element position. This is +/// intended for architectures that can dynamically index the register file and +/// want to force indexing in the native register size. +LegalizerHelper::LegalizeResult +LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + Register Dst = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register Val = MI.getOperand(2).getReg(); + Register Idx = MI.getOperand(3).getReg(); + + LLT VecTy = MRI.getType(Dst); + LLT IdxTy = MRI.getType(Idx); + + LLT VecEltTy = VecTy.getElementType(); + LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; + const unsigned NewEltSize = NewEltTy.getSizeInBits(); + const unsigned OldEltSize = VecEltTy.getSizeInBits(); + + unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; + unsigned OldNumElts = VecTy.getNumElements(); + + Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); + if (NewNumElts < OldNumElts) { + if (NewEltSize % OldEltSize != 0) + return UnableToLegalize; + + // This only depends on powers of 2 because we use bit tricks to figure out + // the bit offset we need to shift to get the target element. A general + // expansion could emit division/multiply. + if (!isPowerOf2_32(NewEltSize / OldEltSize)) + return UnableToLegalize; + + const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); + auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); + + // Divide to get the index in the wider element type. + auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); + + Register ExtractedElt = CastVec; + if (CastTy.isVector()) { + ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, + ScaledIdx).getReg(0); + } + + // Compute the bit offset into the register of the target element. + Register OffsetBits = getBitcastWiderVectorElementOffset( + MIRBuilder, Idx, NewEltSize, OldEltSize); + + Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt, + Val, OffsetBits); + if (CastTy.isVector()) { + InsertedElt = MIRBuilder.buildInsertVectorElement( + CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0); + } + + MIRBuilder.buildBitcast(Dst, InsertedElt); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerLoad(MachineInstr &MI) { + // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT + Register DstReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + auto &MMO = **MI.memoperands_begin(); + + if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { + if (MI.getOpcode() == TargetOpcode::G_LOAD) { + // This load needs splitting into power of 2 sized loads. + if (DstTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(DstTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Our strategy here is to generate anyextending loads for the smaller + // types up to next power-2 result type, and then combine the two larger + // result values together, before truncating back down to the non-pow-2 + // type. + // E.g. v1 = i24 load => + // v2 = i32 zextload (2 byte) + // v3 = i32 load (1 byte) + // v4 = i32 shl v3, 16 + // v5 = i32 or v4, v2 + // v1 = i24 trunc v5 + // By doing this we generate the correct truncate which should get + // combined away as an artifact with a matching extend. + uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); + uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = MF.getMachineMemOperand( + &MMO, LargeSplitSize / 8, SmallSplitSize / 8); + + LLT PtrTy = MRI.getType(PtrReg); + unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); + LLT AnyExtTy = LLT::scalar(AnyExtSize); + Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + auto LargeLoad = MIRBuilder.buildLoadInstr( + TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO); + + auto OffsetCst = MIRBuilder.buildConstant( + LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); + Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = + MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); + auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), + *SmallMMO); + + auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); + auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); + auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); + MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } + + MIRBuilder.buildLoad(DstReg, PtrReg, MMO); + MI.eraseFromParent(); + return Legalized; + } + + if (DstTy.isScalar()) { + Register TmpReg = + MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); + MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case TargetOpcode::G_LOAD: + MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg); + break; + case TargetOpcode::G_SEXTLOAD: + MIRBuilder.buildSExt(DstReg, TmpReg); + break; + case TargetOpcode::G_ZEXTLOAD: + MIRBuilder.buildZExt(DstReg, TmpReg); + break; + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerStore(MachineInstr &MI) { + // Lower a non-power of 2 store into multiple pow-2 stores. + // E.g. split an i24 store into an i16 store + i8 store. + // We do this by first extending the stored value to the next largest power + // of 2 type, and then using truncating stores to store the components. + // By doing this, likewise with G_LOAD, generate an extend that can be + // artifact-combined away instead of leaving behind extracts. + Register SrcReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + MachineMemOperand &MMO = **MI.memoperands_begin(); + if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) + return UnableToLegalize; + if (SrcTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(SrcTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Extend to the next pow-2. + const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); + auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); + + // Obtain the smaller value by shifting away the larger value. + uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); + uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; + auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); + auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); + + // Generate the PtrAdd and truncating stores. + LLT PtrTy = MRI.getType(PtrReg); + auto OffsetCst = MIRBuilder.buildConstant( + LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); + Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = + MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = + MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); + MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); + MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { switch (MI.getOpcode()) { case TargetOpcode::G_LOAD: { @@ -2833,24 +2833,24 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_EXTRACT_VECTOR_ELT: - return bitcastExtractVectorElt(MI, TypeIdx, CastTy); - case TargetOpcode::G_INSERT_VECTOR_ELT: - return bitcastInsertVectorElt(MI, TypeIdx, CastTy); + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + return bitcastExtractVectorElt(MI, TypeIdx, CastTy); + case TargetOpcode::G_INSERT_VECTOR_ELT: + return bitcastInsertVectorElt(MI, TypeIdx, CastTy); default: return UnableToLegalize; } } -// Legalize an instruction by changing the opcode in place. -void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) { - Observer.changingInstr(MI); - MI.setDesc(MIRBuilder.getTII().get(NewOpcode)); - Observer.changedInstr(MI); -} - +// Legalize an instruction by changing the opcode in place. +void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) { + Observer.changingInstr(MI); + MI.setDesc(MIRBuilder.getTII().get(NewOpcode)); + Observer.changedInstr(MI); +} + LegalizerHelper::LegalizeResult -LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { +LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { using namespace TargetOpcode; switch(MI.getOpcode()) { @@ -2860,7 +2860,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerBitcast(MI); case TargetOpcode::G_SREM: case TargetOpcode::G_UREM: { - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); auto Quot = MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty}, {MI.getOperand(1), MI.getOperand(2)}); @@ -2873,9 +2873,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case TargetOpcode::G_SADDO: case TargetOpcode::G_SSUBO: return lowerSADDO_SSUBO(MI); - case TargetOpcode::G_UMULH: - case TargetOpcode::G_SMULH: - return lowerSMULH_UMULH(MI); + case TargetOpcode::G_UMULH: + case TargetOpcode::G_SMULH: + return lowerSMULH_UMULH(MI); case TargetOpcode::G_SMULO: case TargetOpcode::G_UMULO: { // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the @@ -2884,7 +2884,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { Register Overflow = MI.getOperand(1).getReg(); Register LHS = MI.getOperand(2).getReg(); Register RHS = MI.getOperand(3).getReg(); - LLT Ty = MRI.getType(Res); + LLT Ty = MRI.getType(Res); unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO ? TargetOpcode::G_SMULH @@ -2914,24 +2914,24 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return Legalized; } case TargetOpcode::G_FNEG: { - Register Res = MI.getOperand(0).getReg(); - LLT Ty = MRI.getType(Res); - + Register Res = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Res); + // TODO: Handle vector types once we are able to // represent them. if (Ty.isVector()) return UnableToLegalize; - auto SignMask = - MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits())); + auto SignMask = + MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits())); Register SubByReg = MI.getOperand(1).getReg(); - MIRBuilder.buildXor(Res, SubByReg, SignMask); + MIRBuilder.buildXor(Res, SubByReg, SignMask); MI.eraseFromParent(); return Legalized; } case TargetOpcode::G_FSUB: { - Register Res = MI.getOperand(0).getReg(); - LLT Ty = MRI.getType(Res); - + Register Res = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Res); + // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). // First, check if G_FNEG is marked as Lower. If so, we may // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. @@ -2951,12 +2951,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerFFloor(MI); case TargetOpcode::G_INTRINSIC_ROUND: return lowerIntrinsicRound(MI); - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { - // Since round even is the assumed rounding mode for unconstrained FP - // operations, rint and roundeven are the same operation. - changeOpcode(MI, TargetOpcode::G_FRINT); - return Legalized; - } + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { + // Since round even is the assumed rounding mode for unconstrained FP + // operations, rint and roundeven are the same operation. + changeOpcode(MI, TargetOpcode::G_FRINT); + return Legalized; + } case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { Register OldValRes = MI.getOperand(0).getReg(); Register SuccessRes = MI.getOperand(1).getReg(); @@ -2971,16 +2971,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { } case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: - case TargetOpcode::G_ZEXTLOAD: - return lowerLoad(MI); - case TargetOpcode::G_STORE: - return lowerStore(MI); + case TargetOpcode::G_ZEXTLOAD: + return lowerLoad(MI); + case TargetOpcode::G_STORE: + return lowerStore(MI); case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTPOP: - return lowerBitCount(MI); + return lowerBitCount(MI); case G_UADDO: { Register Res = MI.getOperand(0).getReg(); Register CarryOut = MI.getOperand(1).getReg(); @@ -3042,24 +3042,24 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return Legalized; } case G_UITOFP: - return lowerUITOFP(MI); + return lowerUITOFP(MI); case G_SITOFP: - return lowerSITOFP(MI); + return lowerSITOFP(MI); case G_FPTOUI: - return lowerFPTOUI(MI); + return lowerFPTOUI(MI); case G_FPTOSI: return lowerFPTOSI(MI); case G_FPTRUNC: - return lowerFPTRUNC(MI); - case G_FPOWI: - return lowerFPOWI(MI); + return lowerFPTRUNC(MI); + case G_FPOWI: + return lowerFPOWI(MI); case G_SMIN: case G_SMAX: case G_UMIN: case G_UMAX: - return lowerMinMax(MI); + return lowerMinMax(MI); case G_FCOPYSIGN: - return lowerFCopySign(MI); + return lowerFCopySign(MI); case G_FMINNUM: case G_FMAXNUM: return lowerFMinNumMaxNum(MI); @@ -3082,9 +3082,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { MI.eraseFromParent(); return Legalized; } - case G_EXTRACT_VECTOR_ELT: - case G_INSERT_VECTOR_ELT: - return lowerExtractInsertVectorElt(MI); + case G_EXTRACT_VECTOR_ELT: + case G_INSERT_VECTOR_ELT: + return lowerExtractInsertVectorElt(MI); case G_SHUFFLE_VECTOR: return lowerShuffleVector(MI); case G_DYN_STACKALLOC: @@ -3100,123 +3100,123 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case G_READ_REGISTER: case G_WRITE_REGISTER: return lowerReadWriteRegister(MI); - case G_UADDSAT: - case G_USUBSAT: { - // Try to make a reasonable guess about which lowering strategy to use. The - // target can override this with custom lowering and calling the - // implementation functions. - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (LI.isLegalOrCustom({G_UMIN, Ty})) - return lowerAddSubSatToMinMax(MI); - return lowerAddSubSatToAddoSubo(MI); - } - case G_SADDSAT: - case G_SSUBSAT: { - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - - // FIXME: It would probably make more sense to see if G_SADDO is preferred, - // since it's a shorter expansion. However, we would need to figure out the - // preferred boolean type for the carry out for the query. - if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty})) - return lowerAddSubSatToMinMax(MI); - return lowerAddSubSatToAddoSubo(MI); - } - case G_SSHLSAT: - case G_USHLSAT: - return lowerShlSat(MI); - case G_ABS: { - // Expand %res = G_ABS %a into: - // %v1 = G_ASHR %a, scalar_size-1 - // %v2 = G_ADD %a, %v1 - // %res = G_XOR %v2, %v1 - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - Register OpReg = MI.getOperand(1).getReg(); - auto ShiftAmt = - MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1); - auto Shift = - MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt); - auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift); - MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift); - MI.eraseFromParent(); - return Legalized; - } - case G_SELECT: - return lowerSelect(MI); - } -} - -Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty, - Align MinAlign) const { - // FIXME: We're missing a way to go back from LLT to llvm::Type to query the - // datalayout for the preferred alignment. Also there should be a target hook - // for this to allow targets to reduce the alignment and ignore the - // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of - // the type. - return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign); -} - -MachineInstrBuilder -LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment, - MachinePointerInfo &PtrInfo) { - MachineFunction &MF = MIRBuilder.getMF(); - const DataLayout &DL = MIRBuilder.getDataLayout(); - int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false); - - unsigned AddrSpace = DL.getAllocaAddrSpace(); - LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); - - PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx); - return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx); -} - -static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg, - LLT VecTy) { - int64_t IdxVal; - if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) - return IdxReg; - - LLT IdxTy = B.getMRI()->getType(IdxReg); - unsigned NElts = VecTy.getNumElements(); - if (isPowerOf2_32(NElts)) { - APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts)); - return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0); - } - - return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1)) - .getReg(0); -} - -Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, - Register Index) { - LLT EltTy = VecTy.getElementType(); - - // Calculate the element offset and add it to the pointer. - unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size. - assert(EltSize * 8 == EltTy.getSizeInBits() && - "Converting bits to bytes lost precision"); - - Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy); - - LLT IdxTy = MRI.getType(Index); - auto Mul = MIRBuilder.buildMul(IdxTy, Index, - MIRBuilder.buildConstant(IdxTy, EltSize)); - - LLT PtrTy = MRI.getType(VecPtr); - return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); + case G_UADDSAT: + case G_USUBSAT: { + // Try to make a reasonable guess about which lowering strategy to use. The + // target can override this with custom lowering and calling the + // implementation functions. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (LI.isLegalOrCustom({G_UMIN, Ty})) + return lowerAddSubSatToMinMax(MI); + return lowerAddSubSatToAddoSubo(MI); + } + case G_SADDSAT: + case G_SSUBSAT: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + + // FIXME: It would probably make more sense to see if G_SADDO is preferred, + // since it's a shorter expansion. However, we would need to figure out the + // preferred boolean type for the carry out for the query. + if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty})) + return lowerAddSubSatToMinMax(MI); + return lowerAddSubSatToAddoSubo(MI); + } + case G_SSHLSAT: + case G_USHLSAT: + return lowerShlSat(MI); + case G_ABS: { + // Expand %res = G_ABS %a into: + // %v1 = G_ASHR %a, scalar_size-1 + // %v2 = G_ADD %a, %v1 + // %res = G_XOR %v2, %v1 + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + Register OpReg = MI.getOperand(1).getReg(); + auto ShiftAmt = + MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1); + auto Shift = + MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt); + auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift); + MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift); + MI.eraseFromParent(); + return Legalized; + } + case G_SELECT: + return lowerSelect(MI); + } } +Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty, + Align MinAlign) const { + // FIXME: We're missing a way to go back from LLT to llvm::Type to query the + // datalayout for the preferred alignment. Also there should be a target hook + // for this to allow targets to reduce the alignment and ignore the + // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of + // the type. + return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign); +} + +MachineInstrBuilder +LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment, + MachinePointerInfo &PtrInfo) { + MachineFunction &MF = MIRBuilder.getMF(); + const DataLayout &DL = MIRBuilder.getDataLayout(); + int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false); + + unsigned AddrSpace = DL.getAllocaAddrSpace(); + LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); + + PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx); + return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx); +} + +static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg, + LLT VecTy) { + int64_t IdxVal; + if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) + return IdxReg; + + LLT IdxTy = B.getMRI()->getType(IdxReg); + unsigned NElts = VecTy.getNumElements(); + if (isPowerOf2_32(NElts)) { + APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts)); + return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0); + } + + return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1)) + .getReg(0); +} + +Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, + Register Index) { + LLT EltTy = VecTy.getElementType(); + + // Calculate the element offset and add it to the pointer. + unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size. + assert(EltSize * 8 == EltTy.getSizeInBits() && + "Converting bits to bytes lost precision"); + + Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy); + + LLT IdxTy = MRI.getType(Index); + auto Mul = MIRBuilder.buildMul(IdxTy, Index, + MIRBuilder.buildConstant(IdxTy, EltSize)); + + LLT PtrTy = MRI.getType(VecPtr); + return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); +} + LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT LCMTy = getLCMType(DstTy, NarrowTy); + LLT DstTy = MRI.getType(DstReg); + LLT LCMTy = getLCMType(DstTy, NarrowTy); - unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); + unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); - auto NewUndef = MIRBuilder.buildUndef(NarrowTy); - SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0)); + auto NewUndef = MIRBuilder.buildUndef(NarrowTy); + SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0)); - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); + buildWidenedRemergeToDst(DstReg, LCMTy, Parts); MI.eraseFromParent(); return Legalized; } @@ -3337,7 +3337,7 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) return UnableToLegalize; - NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType()); + NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType()); } else { NumParts = DstTy.getNumElements(); NarrowTy1 = SrcTy.getElementType(); @@ -3610,116 +3610,116 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, return Legalized; } -// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces -// a vector -// -// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with -// undef as necessary. -// -// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 -// -> <2 x s16> -// -// %4:_(s16) = G_IMPLICIT_DEF -// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 -// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 -// %7:_(<2 x s16>) = G_IMPLICIT_DEF -// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7 -// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8 +// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces +// a vector +// +// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with +// undef as necessary. +// +// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 +// -> <2 x s16> +// +// %4:_(s16) = G_IMPLICIT_DEF +// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 +// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 +// %7:_(<2 x s16>) = G_IMPLICIT_DEF +// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7 +// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8 LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { +LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); - LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); - - // Break into a common type - SmallVector<Register, 16> Parts; - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) - extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg()); - - // Build the requested new merge, padding with undef. - LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, - TargetOpcode::G_ANYEXT); - - // Pack into the original result register. - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, - unsigned TypeIdx, - LLT NarrowVecTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcVec = MI.getOperand(1).getReg(); - Register InsertVal; - bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT; - - assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index"); - if (IsInsert) - InsertVal = MI.getOperand(2).getReg(); - - Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); - - // TODO: Handle total scalarization case. - if (!NarrowVecTy.isVector()) - return UnableToLegalize; - - LLT VecTy = MRI.getType(SrcVec); - - // If the index is a constant, we can really break this down as you would - // expect, and index into the target size pieces. - int64_t IdxVal; - if (mi_match(Idx, MRI, m_ICst(IdxVal))) { - // Avoid out of bounds indexing the pieces. - if (IdxVal >= VecTy.getNumElements()) { - MIRBuilder.buildUndef(DstReg); - MI.eraseFromParent(); - return Legalized; + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); + + // Break into a common type + SmallVector<Register, 16> Parts; + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) + extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg()); + + // Build the requested new merge, padding with undef. + LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, + TargetOpcode::G_ANYEXT); + + // Pack into the original result register. + buildWidenedRemergeToDst(DstReg, LCMTy, Parts); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowVecTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register InsertVal; + bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT; + + assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index"); + if (IsInsert) + InsertVal = MI.getOperand(2).getReg(); + + Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); + + // TODO: Handle total scalarization case. + if (!NarrowVecTy.isVector()) + return UnableToLegalize; + + LLT VecTy = MRI.getType(SrcVec); + + // If the index is a constant, we can really break this down as you would + // expect, and index into the target size pieces. + int64_t IdxVal; + if (mi_match(Idx, MRI, m_ICst(IdxVal))) { + // Avoid out of bounds indexing the pieces. + if (IdxVal >= VecTy.getNumElements()) { + MIRBuilder.buildUndef(DstReg); + MI.eraseFromParent(); + return Legalized; } - SmallVector<Register, 8> VecParts; - LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); - - // Build a sequence of NarrowTy pieces in VecParts for this operand. - LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, - TargetOpcode::G_ANYEXT); - - unsigned NewNumElts = NarrowVecTy.getNumElements(); - - LLT IdxTy = MRI.getType(Idx); - int64_t PartIdx = IdxVal / NewNumElts; - auto NewIdx = - MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); - - if (IsInsert) { - LLT PartTy = MRI.getType(VecParts[PartIdx]); - - // Use the adjusted index to insert into one of the subvectors. - auto InsertPart = MIRBuilder.buildInsertVectorElement( - PartTy, VecParts[PartIdx], InsertVal, NewIdx); - VecParts[PartIdx] = InsertPart.getReg(0); - - // Recombine the inserted subvector with the others to reform the result - // vector. - buildWidenedRemergeToDst(DstReg, LCMTy, VecParts); - } else { - MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); - } - - MI.eraseFromParent(); - return Legalized; - } - - // With a variable index, we can't perform the operation in a smaller type, so - // we're forced to expand this. - // - // TODO: We could emit a chain of compare/select to figure out which piece to - // index. - return lowerExtractInsertVectorElt(MI); + SmallVector<Register, 8> VecParts; + LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); + + // Build a sequence of NarrowTy pieces in VecParts for this operand. + LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, + TargetOpcode::G_ANYEXT); + + unsigned NewNumElts = NarrowVecTy.getNumElements(); + + LLT IdxTy = MRI.getType(Idx); + int64_t PartIdx = IdxVal / NewNumElts; + auto NewIdx = + MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); + + if (IsInsert) { + LLT PartTy = MRI.getType(VecParts[PartIdx]); + + // Use the adjusted index to insert into one of the subvectors. + auto InsertPart = MIRBuilder.buildInsertVectorElement( + PartTy, VecParts[PartIdx], InsertVal, NewIdx); + VecParts[PartIdx] = InsertPart.getReg(0); + + // Recombine the inserted subvector with the others to reform the result + // vector. + buildWidenedRemergeToDst(DstReg, LCMTy, VecParts); + } else { + MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); + } + + MI.eraseFromParent(); + return Legalized; + } + + // With a variable index, we can't perform the operation in a smaller type, so + // we're forced to expand this. + // + // TODO: We could emit a chain of compare/select to figure out which piece to + // index. + return lowerExtractInsertVectorElt(MI); } LegalizerHelper::LegalizeResult @@ -3765,8 +3765,8 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, if (NumParts == -1) return UnableToLegalize; - LLT PtrTy = MRI.getType(AddrReg); - const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); + LLT PtrTy = MRI.getType(AddrReg); + const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); unsigned TotalSize = ValTy.getSizeInBits(); @@ -3964,7 +3964,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_ADD: case G_SUB: case G_MUL: - case G_PTR_ADD: + case G_PTR_ADD: case G_SMULH: case G_UMULH: case G_FADD: @@ -3988,7 +3988,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FFLOOR: case G_FRINT: case G_INTRINSIC_ROUND: - case G_INTRINSIC_ROUNDEVEN: + case G_INTRINSIC_ROUNDEVEN: case G_INTRINSIC_TRUNC: case G_FCOS: case G_FSIN: @@ -4020,8 +4020,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_SHL: case G_LSHR: case G_ASHR: - case G_SSHLSAT: - case G_USHLSAT: + case G_SSHLSAT: + case G_USHLSAT: case G_CTLZ: case G_CTLZ_ZERO_UNDEF: case G_CTTZ: @@ -4052,15 +4052,15 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_UNMERGE_VALUES: return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); case G_BUILD_VECTOR: - assert(TypeIdx == 0 && "not a vector type index"); - return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); - case G_CONCAT_VECTORS: - if (TypeIdx != 1) // TODO: This probably does work as expected already. - return UnableToLegalize; - return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); - case G_EXTRACT_VECTOR_ELT: - case G_INSERT_VECTOR_ELT: - return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy); + assert(TypeIdx == 0 && "not a vector type index"); + return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); + case G_CONCAT_VECTORS: + if (TypeIdx != 1) // TODO: This probably does work as expected already. + return UnableToLegalize; + return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); + case G_EXTRACT_VECTOR_ELT: + case G_INSERT_VECTOR_ELT: + return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); @@ -4484,31 +4484,31 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { } LegalizerHelper::LegalizeResult -LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - if (TypeIdx != 0) - return UnableToLegalize; - - bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI; - - Register Src = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(Src); - - // If all finite floats fit into the narrowed integer type, we can just swap - // out the result type. This is practically only useful for conversions from - // half to at least 16-bits, so just handle the one case. - if (SrcTy.getScalarType() != LLT::scalar(16) || - NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16)) - return UnableToLegalize; - - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, - IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT); - Observer.changedInstr(MI); - return Legalized; -} - -LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI; + + Register Src = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(Src); + + // If all finite floats fit into the narrowed integer type, we can just swap + // out the result type. This is practically only useful for conversions from + // half to at least 16-bits, so just handle the one case. + if (SrcTy.getScalarType() != LLT::scalar(16) || + NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16)) + return UnableToLegalize; + + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, + IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { if (TypeIdx != 1) @@ -4857,9 +4857,9 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult -LegalizerHelper::lowerBitCount(MachineInstr &MI) { +LegalizerHelper::lowerBitCount(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); - const auto &TII = MIRBuilder.getTII(); + const auto &TII = MIRBuilder.getTII(); auto isSupported = [this](const LegalityQuery &Q) { auto QAction = LI.getAction(Q).Action; return QAction == Legal || QAction == Libcall || QAction == Custom; @@ -4947,15 +4947,15 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } // Ref: "Hacker's Delight" by Henry Warren - auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1); - auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1); + auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1); + auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1); auto MIBTmp = MIRBuilder.buildAnd( - SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1)); - if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) && - isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) { - auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len); + SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1)); + if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) && + isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) { + auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len); MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen, - MIRBuilder.buildCTLZ(SrcTy, MIBTmp)); + MIRBuilder.buildCTLZ(SrcTy, MIBTmp)); MI.eraseFromParent(); return Legalized; } @@ -4964,8 +4964,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { return Legalized; } case TargetOpcode::G_CTPOP: { - Register SrcReg = MI.getOperand(1).getReg(); - LLT Ty = MRI.getType(SrcReg); + Register SrcReg = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(SrcReg); unsigned Size = Ty.getSizeInBits(); MachineIRBuilder &B = MIRBuilder; @@ -4975,11 +4975,11 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { // B2Count = val - { (val >> 1) & 0x55555555 } // since it gives same result in blocks of 2 with one instruction less. auto C_1 = B.buildConstant(Ty, 1); - auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1); + auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1); APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55)); auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0); auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0); - auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi); + auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi); // In order to get count in blocks of 4 add values from adjacent block of 2. // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 } @@ -5078,7 +5078,7 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { return Legalized; } -LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(Dst); @@ -5106,7 +5106,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { return UnableToLegalize; } -LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(Dst); @@ -5152,7 +5152,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { return UnableToLegalize; } -LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(Dst); @@ -5369,7 +5369,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { } LegalizerHelper::LegalizeResult -LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { +LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -5384,20 +5384,20 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { return UnableToLegalize; } -// TODO: If RHS is a constant SelectionDAGBuilder expands this into a -// multiplication tree. -LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src0 = MI.getOperand(1).getReg(); - Register Src1 = MI.getOperand(2).getReg(); - LLT Ty = MRI.getType(Dst); - - auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1); - MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags()); - MI.eraseFromParent(); - return Legalized; -} - +// TODO: If RHS is a constant SelectionDAGBuilder expands this into a +// multiplication tree. +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Dst); + + auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1); + MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags()); + MI.eraseFromParent(); + return Legalized; +} + static CmpInst::Predicate minMaxToCompare(unsigned Opc) { switch (Opc) { case TargetOpcode::G_SMIN: @@ -5413,7 +5413,7 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) { } } -LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src0 = MI.getOperand(1).getReg(); Register Src1 = MI.getOperand(2).getReg(); @@ -5429,7 +5429,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { } LegalizerHelper::LegalizeResult -LegalizerHelper::lowerFCopySign(MachineInstr &MI) { +LegalizerHelper::lowerFCopySign(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src0 = MI.getOperand(1).getReg(); Register Src1 = MI.getOperand(2).getReg(); @@ -5651,72 +5651,72 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { return Legalized; } -/// Lower a vector extract or insert by writing the vector to a stack temporary -/// and reloading the element or vector. -/// -/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx -/// => -/// %stack_temp = G_FRAME_INDEX -/// G_STORE %vec, %stack_temp -/// %idx = clamp(%idx, %vec.getNumElements()) -/// %element_ptr = G_PTR_ADD %stack_temp, %idx -/// %dst = G_LOAD %element_ptr -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcVec = MI.getOperand(1).getReg(); - Register InsertVal; - if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) - InsertVal = MI.getOperand(2).getReg(); - - Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); - - LLT VecTy = MRI.getType(SrcVec); - LLT EltTy = VecTy.getElementType(); - if (!EltTy.isByteSized()) { // Not implemented. - LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n"); - return UnableToLegalize; - } - - unsigned EltBytes = EltTy.getSizeInBytes(); - Align VecAlign = getStackTemporaryAlignment(VecTy); - Align EltAlign; - - MachinePointerInfo PtrInfo; - auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()), - VecAlign, PtrInfo); - MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign); - - // Get the pointer to the element, and be sure not to hit undefined behavior - // if the index is out of bounds. - Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx); - - int64_t IdxVal; - if (mi_match(Idx, MRI, m_ICst(IdxVal))) { - int64_t Offset = IdxVal * EltBytes; - PtrInfo = PtrInfo.getWithOffset(Offset); - EltAlign = commonAlignment(VecAlign, Offset); - } else { - // We lose information with a variable offset. - EltAlign = getStackTemporaryAlignment(EltTy); - PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace()); - } - - if (InsertVal) { - // Write the inserted element - MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign); - - // Reload the whole vector. - MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign); - } else { - MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign); - } - - MI.eraseFromParent(); - return Legalized; -} - +/// Lower a vector extract or insert by writing the vector to a stack temporary +/// and reloading the element or vector. +/// +/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx +/// => +/// %stack_temp = G_FRAME_INDEX +/// G_STORE %vec, %stack_temp +/// %idx = clamp(%idx, %vec.getNumElements()) +/// %element_ptr = G_PTR_ADD %stack_temp, %idx +/// %dst = G_LOAD %element_ptr LegalizerHelper::LegalizeResult +LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register InsertVal; + if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) + InsertVal = MI.getOperand(2).getReg(); + + Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); + + LLT VecTy = MRI.getType(SrcVec); + LLT EltTy = VecTy.getElementType(); + if (!EltTy.isByteSized()) { // Not implemented. + LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n"); + return UnableToLegalize; + } + + unsigned EltBytes = EltTy.getSizeInBytes(); + Align VecAlign = getStackTemporaryAlignment(VecTy); + Align EltAlign; + + MachinePointerInfo PtrInfo; + auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()), + VecAlign, PtrInfo); + MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign); + + // Get the pointer to the element, and be sure not to hit undefined behavior + // if the index is out of bounds. + Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx); + + int64_t IdxVal; + if (mi_match(Idx, MRI, m_ICst(IdxVal))) { + int64_t Offset = IdxVal * EltBytes; + PtrInfo = PtrInfo.getWithOffset(Offset); + EltAlign = commonAlignment(VecAlign, Offset); + } else { + // We lose information with a variable offset. + EltAlign = getStackTemporaryAlignment(EltTy); + PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace()); + } + + if (InsertVal) { + // Write the inserted element + MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign); + + // Reload the whole vector. + MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign); + } else { + MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { Register DstReg = MI.getOperand(0).getReg(); Register Src0Reg = MI.getOperand(1).getReg(); @@ -5931,185 +5931,185 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { } LegalizerHelper::LegalizeResult -LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) { - Register Res = MI.getOperand(0).getReg(); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); - LLT Ty = MRI.getType(Res); - bool IsSigned; - bool IsAdd; - unsigned BaseOp; - switch (MI.getOpcode()) { - default: - llvm_unreachable("unexpected addsat/subsat opcode"); - case TargetOpcode::G_UADDSAT: - IsSigned = false; - IsAdd = true; - BaseOp = TargetOpcode::G_ADD; - break; - case TargetOpcode::G_SADDSAT: - IsSigned = true; - IsAdd = true; - BaseOp = TargetOpcode::G_ADD; - break; - case TargetOpcode::G_USUBSAT: - IsSigned = false; - IsAdd = false; - BaseOp = TargetOpcode::G_SUB; - break; - case TargetOpcode::G_SSUBSAT: - IsSigned = true; - IsAdd = false; - BaseOp = TargetOpcode::G_SUB; - break; - } - - if (IsSigned) { - // sadd.sat(a, b) -> - // hi = 0x7fffffff - smax(a, 0) - // lo = 0x80000000 - smin(a, 0) - // a + smin(smax(lo, b), hi) - // ssub.sat(a, b) -> - // lo = smax(a, -1) - 0x7fffffff - // hi = smin(a, -1) - 0x80000000 - // a - smin(smax(lo, b), hi) - // TODO: AMDGPU can use a "median of 3" instruction here: - // a +/- med3(lo, b, hi) - uint64_t NumBits = Ty.getScalarSizeInBits(); - auto MaxVal = - MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits)); - auto MinVal = - MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); - MachineInstrBuilder Hi, Lo; - if (IsAdd) { - auto Zero = MIRBuilder.buildConstant(Ty, 0); - Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero)); - Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero)); - } else { - auto NegOne = MIRBuilder.buildConstant(Ty, -1); - Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne), - MaxVal); - Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne), - MinVal); - } - auto RHSClamped = - MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi); - MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped}); - } else { - // uadd.sat(a, b) -> a + umin(~a, b) - // usub.sat(a, b) -> a - umin(a, b) - Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS; - auto Min = MIRBuilder.buildUMin(Ty, Not, RHS); - MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min}); - } - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) { - Register Res = MI.getOperand(0).getReg(); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); - LLT Ty = MRI.getType(Res); - LLT BoolTy = Ty.changeElementSize(1); - bool IsSigned; - bool IsAdd; - unsigned OverflowOp; - switch (MI.getOpcode()) { - default: - llvm_unreachable("unexpected addsat/subsat opcode"); - case TargetOpcode::G_UADDSAT: - IsSigned = false; - IsAdd = true; - OverflowOp = TargetOpcode::G_UADDO; - break; - case TargetOpcode::G_SADDSAT: - IsSigned = true; - IsAdd = true; - OverflowOp = TargetOpcode::G_SADDO; - break; - case TargetOpcode::G_USUBSAT: - IsSigned = false; - IsAdd = false; - OverflowOp = TargetOpcode::G_USUBO; - break; - case TargetOpcode::G_SSUBSAT: - IsSigned = true; - IsAdd = false; - OverflowOp = TargetOpcode::G_SSUBO; - break; - } - - auto OverflowRes = - MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS}); - Register Tmp = OverflowRes.getReg(0); - Register Ov = OverflowRes.getReg(1); - MachineInstrBuilder Clamp; - if (IsSigned) { - // sadd.sat(a, b) -> - // {tmp, ov} = saddo(a, b) - // ov ? (tmp >>s 31) + 0x80000000 : r - // ssub.sat(a, b) -> - // {tmp, ov} = ssubo(a, b) - // ov ? (tmp >>s 31) + 0x80000000 : r - uint64_t NumBits = Ty.getScalarSizeInBits(); - auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1); - auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount); - auto MinVal = - MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); - Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal); - } else { - // uadd.sat(a, b) -> - // {tmp, ov} = uaddo(a, b) - // ov ? 0xffffffff : tmp - // usub.sat(a, b) -> - // {tmp, ov} = usubo(a, b) - // ov ? 0 : tmp - Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0); - } - MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerShlSat(MachineInstr &MI) { - assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT || - MI.getOpcode() == TargetOpcode::G_USHLSAT) && - "Expected shlsat opcode!"); - bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT; - Register Res = MI.getOperand(0).getReg(); - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); - LLT Ty = MRI.getType(Res); - LLT BoolTy = Ty.changeElementSize(1); - - unsigned BW = Ty.getScalarSizeInBits(); - auto Result = MIRBuilder.buildShl(Ty, LHS, RHS); - auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS) - : MIRBuilder.buildLShr(Ty, Result, RHS); - - MachineInstrBuilder SatVal; - if (IsSigned) { - auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW)); - auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW)); - auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS, - MIRBuilder.buildConstant(Ty, 0)); - SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax); - } else { - SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW)); - } - auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig); - MIRBuilder.buildSelect(Res, Ov, SatVal, Result); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult +LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) { + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Res); + bool IsSigned; + bool IsAdd; + unsigned BaseOp; + switch (MI.getOpcode()) { + default: + llvm_unreachable("unexpected addsat/subsat opcode"); + case TargetOpcode::G_UADDSAT: + IsSigned = false; + IsAdd = true; + BaseOp = TargetOpcode::G_ADD; + break; + case TargetOpcode::G_SADDSAT: + IsSigned = true; + IsAdd = true; + BaseOp = TargetOpcode::G_ADD; + break; + case TargetOpcode::G_USUBSAT: + IsSigned = false; + IsAdd = false; + BaseOp = TargetOpcode::G_SUB; + break; + case TargetOpcode::G_SSUBSAT: + IsSigned = true; + IsAdd = false; + BaseOp = TargetOpcode::G_SUB; + break; + } + + if (IsSigned) { + // sadd.sat(a, b) -> + // hi = 0x7fffffff - smax(a, 0) + // lo = 0x80000000 - smin(a, 0) + // a + smin(smax(lo, b), hi) + // ssub.sat(a, b) -> + // lo = smax(a, -1) - 0x7fffffff + // hi = smin(a, -1) - 0x80000000 + // a - smin(smax(lo, b), hi) + // TODO: AMDGPU can use a "median of 3" instruction here: + // a +/- med3(lo, b, hi) + uint64_t NumBits = Ty.getScalarSizeInBits(); + auto MaxVal = + MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits)); + auto MinVal = + MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); + MachineInstrBuilder Hi, Lo; + if (IsAdd) { + auto Zero = MIRBuilder.buildConstant(Ty, 0); + Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero)); + Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero)); + } else { + auto NegOne = MIRBuilder.buildConstant(Ty, -1); + Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne), + MaxVal); + Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne), + MinVal); + } + auto RHSClamped = + MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi); + MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped}); + } else { + // uadd.sat(a, b) -> a + umin(~a, b) + // usub.sat(a, b) -> a - umin(a, b) + Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS; + auto Min = MIRBuilder.buildUMin(Ty, Not, RHS); + MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min}); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) { + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Res); + LLT BoolTy = Ty.changeElementSize(1); + bool IsSigned; + bool IsAdd; + unsigned OverflowOp; + switch (MI.getOpcode()) { + default: + llvm_unreachable("unexpected addsat/subsat opcode"); + case TargetOpcode::G_UADDSAT: + IsSigned = false; + IsAdd = true; + OverflowOp = TargetOpcode::G_UADDO; + break; + case TargetOpcode::G_SADDSAT: + IsSigned = true; + IsAdd = true; + OverflowOp = TargetOpcode::G_SADDO; + break; + case TargetOpcode::G_USUBSAT: + IsSigned = false; + IsAdd = false; + OverflowOp = TargetOpcode::G_USUBO; + break; + case TargetOpcode::G_SSUBSAT: + IsSigned = true; + IsAdd = false; + OverflowOp = TargetOpcode::G_SSUBO; + break; + } + + auto OverflowRes = + MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS}); + Register Tmp = OverflowRes.getReg(0); + Register Ov = OverflowRes.getReg(1); + MachineInstrBuilder Clamp; + if (IsSigned) { + // sadd.sat(a, b) -> + // {tmp, ov} = saddo(a, b) + // ov ? (tmp >>s 31) + 0x80000000 : r + // ssub.sat(a, b) -> + // {tmp, ov} = ssubo(a, b) + // ov ? (tmp >>s 31) + 0x80000000 : r + uint64_t NumBits = Ty.getScalarSizeInBits(); + auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1); + auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount); + auto MinVal = + MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); + Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal); + } else { + // uadd.sat(a, b) -> + // {tmp, ov} = uaddo(a, b) + // ov ? 0xffffffff : tmp + // usub.sat(a, b) -> + // {tmp, ov} = usubo(a, b) + // ov ? 0 : tmp + Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0); + } + MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerShlSat(MachineInstr &MI) { + assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT || + MI.getOpcode() == TargetOpcode::G_USHLSAT) && + "Expected shlsat opcode!"); + bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT; + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Res); + LLT BoolTy = Ty.changeElementSize(1); + + unsigned BW = Ty.getScalarSizeInBits(); + auto Result = MIRBuilder.buildShl(Ty, LHS, RHS); + auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS) + : MIRBuilder.buildLShr(Ty, Result, RHS); + + MachineInstrBuilder SatVal; + if (IsSigned) { + auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW)); + auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW)); + auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS, + MIRBuilder.buildConstant(Ty, 0)); + SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax); + } else { + SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW)); + } + auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig); + MIRBuilder.buildSelect(Res, Ov, SatVal, Result); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -6199,7 +6199,7 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) { const MDString *RegStr = cast<MDString>( cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0)); - Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF); + Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF); if (!PhysReg.isValid()) return UnableToLegalize; @@ -6211,63 +6211,63 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } - -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) { - bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH; - unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; - Register Result = MI.getOperand(0).getReg(); - LLT OrigTy = MRI.getType(Result); - auto SizeInBits = OrigTy.getScalarSizeInBits(); - LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2); - - auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)}); - auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)}); - auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS); - unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR; - - auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits); - auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt}); - MIRBuilder.buildTrunc(Result, Shifted); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { - // Implement vector G_SELECT in terms of XOR, AND, OR. - Register DstReg = MI.getOperand(0).getReg(); - Register MaskReg = MI.getOperand(1).getReg(); - Register Op1Reg = MI.getOperand(2).getReg(); - Register Op2Reg = MI.getOperand(3).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT MaskTy = MRI.getType(MaskReg); - LLT Op1Ty = MRI.getType(Op1Reg); - if (!DstTy.isVector()) - return UnableToLegalize; - - // Vector selects can have a scalar predicate. If so, splat into a vector and - // finish for later legalization attempts to try again. - if (MaskTy.isScalar()) { - Register MaskElt = MaskReg; - if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) - MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0); - // Generate a vector splat idiom to be pattern matched later. - auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); - Observer.changingInstr(MI); - MI.getOperand(1).setReg(ShufSplat.getReg(0)); - Observer.changedInstr(MI); - return Legalized; - } - - if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) { - return UnableToLegalize; - } - - auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg); - auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg); - auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask); - MIRBuilder.buildOr(DstReg, NewOp1, NewOp2); - MI.eraseFromParent(); - return Legalized; -} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) { + bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH; + unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; + Register Result = MI.getOperand(0).getReg(); + LLT OrigTy = MRI.getType(Result); + auto SizeInBits = OrigTy.getScalarSizeInBits(); + LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2); + + auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)}); + auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)}); + auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS); + unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR; + + auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits); + auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt}); + MIRBuilder.buildTrunc(Result, Shifted); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { + // Implement vector G_SELECT in terms of XOR, AND, OR. + Register DstReg = MI.getOperand(0).getReg(); + Register MaskReg = MI.getOperand(1).getReg(); + Register Op1Reg = MI.getOperand(2).getReg(); + Register Op2Reg = MI.getOperand(3).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT MaskTy = MRI.getType(MaskReg); + LLT Op1Ty = MRI.getType(Op1Reg); + if (!DstTy.isVector()) + return UnableToLegalize; + + // Vector selects can have a scalar predicate. If so, splat into a vector and + // finish for later legalization attempts to try again. + if (MaskTy.isScalar()) { + Register MaskElt = MaskReg; + if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) + MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0); + // Generate a vector splat idiom to be pattern matched later. + auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(ShufSplat.getReg(0)); + Observer.changedInstr(MI); + return Legalized; + } + + if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) { + return UnableToLegalize; + } + + auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg); + auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg); + auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask); + MIRBuilder.buildOr(DstReg, NewOp1, NewOp2); + MI.eraseFromParent(); + return Legalized; +} diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 30acac14bc..0a5cb26325 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -105,7 +105,7 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const { static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q, const std::pair<unsigned, LLT> &Mutation) { switch (Rule.getAction()) { - case Legal: + case Legal: case Custom: case Lower: case MoreElements: @@ -123,7 +123,7 @@ static bool mutationIsSane(const LegalizeRule &Rule, std::pair<unsigned, LLT> Mutation) { // If the user wants a custom mutation, then we can't really say much about // it. Return true, and trust that they're doing the right thing. - if (Rule.getAction() == Custom || Rule.getAction() == Legal) + if (Rule.getAction() == Custom || Rule.getAction() == Legal) return true; const unsigned TypeIdx = Mutation.first; @@ -148,8 +148,8 @@ static bool mutationIsSane(const LegalizeRule &Rule, if (NewTy.getNumElements() <= OldElts) return false; } - } else if (Rule.getAction() == MoreElements) - return false; + } else if (Rule.getAction() == MoreElements) + return false; // Make sure the element type didn't change. return NewTy.getScalarType() == OldTy.getScalarType(); diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Localizer.cpp index 30c00c63f6..66cff18e91 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Localizer.cpp @@ -11,7 +11,7 @@ #include "llvm/CodeGen/GlobalISel/Localizer.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -57,20 +57,20 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, return InsertMBB == Def.getParent(); } -bool Localizer::isNonUniquePhiValue(MachineOperand &Op) const { - MachineInstr *MI = Op.getParent(); - if (!MI->isPHI()) - return false; - - Register SrcReg = Op.getReg(); - for (unsigned Idx = 1; Idx < MI->getNumOperands(); Idx += 2) { - auto &MO = MI->getOperand(Idx); - if (&MO != &Op && MO.isReg() && MO.getReg() == SrcReg) - return true; - } - return false; -} - +bool Localizer::isNonUniquePhiValue(MachineOperand &Op) const { + MachineInstr *MI = Op.getParent(); + if (!MI->isPHI()) + return false; + + Register SrcReg = Op.getReg(); + for (unsigned Idx = 1; Idx < MI->getNumOperands(); Idx += 2) { + auto &MO = MI->getOperand(Idx); + if (&MO != &Op && MO.isReg() && MO.getReg() == SrcReg) + return true; + } + return false; +} + bool Localizer::localizeInterBlock(MachineFunction &MF, LocalizedSetVecT &LocalizedInstrs) { bool Changed = false; @@ -108,14 +108,14 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, LocalizedInstrs.insert(&MI); continue; } - - // If the use is a phi operand that's not unique, don't try to localize. - // If we do, we can cause unnecessary instruction bloat by duplicating - // into each predecessor block, when the existing one is sufficient and - // allows for easier optimization later. - if (isNonUniquePhiValue(MOUse)) - continue; - + + // If the use is a phi operand that's not unique, don't try to localize. + // If we do, we can cause unnecessary instruction bloat by duplicating + // into each predecessor block, when the existing one is sufficient and + // allows for easier optimization later. + if (isNonUniquePhiValue(MOUse)) + continue; + LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); Changed = true; auto MBBAndReg = std::make_pair(InsertMBB, Reg); diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 67ef02a4e7..549bb1a13c 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -9,7 +9,7 @@ /// This file implements the MachineIRBuidler class. //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -106,8 +106,8 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, } else if (auto *CFP = dyn_cast<ConstantFP>(&C)) { MIB.addFPImm(CFP); } else { - // Insert $noreg if we didn't find a usable constant and had to drop it. - MIB.addReg(Register()); + // Insert $noreg if we didn't find a usable constant and had to drop it. + MIB.addReg(Register()); } MIB.addImm(0).addMetadata(Variable).addMetadata(Expr); @@ -162,11 +162,11 @@ MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy, .addJumpTableIndex(JTI); } -void MachineIRBuilder::validateUnaryOp(const LLT Res, const LLT Op0) { - assert((Res.isScalar() || Res.isVector()) && "invalid operand type"); - assert((Res == Op0) && "type mismatch"); -} - +void MachineIRBuilder::validateUnaryOp(const LLT Res, const LLT Op0) { + assert((Res.isScalar() || Res.isVector()) && "invalid operand type"); + assert((Res == Op0) && "type mismatch"); +} + void MachineIRBuilder::validateBinaryOp(const LLT Res, const LLT Op0, const LLT Op1) { assert((Res.isScalar() || Res.isVector()) && "invalid operand type"); @@ -317,29 +317,29 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, return buildFConstant(Res, *CFP); } -MachineInstrBuilder MachineIRBuilder::buildBrCond(const SrcOp &Tst, +MachineInstrBuilder MachineIRBuilder::buildBrCond(const SrcOp &Tst, MachineBasicBlock &Dest) { - assert(Tst.getLLTTy(*getMRI()).isScalar() && "invalid operand type"); + assert(Tst.getLLTTy(*getMRI()).isScalar() && "invalid operand type"); - auto MIB = buildInstr(TargetOpcode::G_BRCOND); - Tst.addSrcToMIB(MIB); - MIB.addMBB(&Dest); - return MIB; + auto MIB = buildInstr(TargetOpcode::G_BRCOND); + Tst.addSrcToMIB(MIB); + MIB.addMBB(&Dest); + return MIB; } -MachineInstrBuilder -MachineIRBuilder::buildLoad(const DstOp &Dst, const SrcOp &Addr, - MachinePointerInfo PtrInfo, Align Alignment, - MachineMemOperand::Flags MMOFlags, - const AAMDNodes &AAInfo) { - MMOFlags |= MachineMemOperand::MOLoad; - assert((MMOFlags & MachineMemOperand::MOStore) == 0); - - uint64_t Size = MemoryLocation::getSizeOrUnknown( - TypeSize::Fixed(Dst.getLLTTy(*getMRI()).getSizeInBytes())); - MachineMemOperand *MMO = - getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); - return buildLoad(Dst, Addr, *MMO); +MachineInstrBuilder +MachineIRBuilder::buildLoad(const DstOp &Dst, const SrcOp &Addr, + MachinePointerInfo PtrInfo, Align Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo) { + MMOFlags |= MachineMemOperand::MOLoad; + assert((MMOFlags & MachineMemOperand::MOStore) == 0); + + uint64_t Size = MemoryLocation::getSizeOrUnknown( + TypeSize::Fixed(Dst.getLLTTy(*getMRI()).getSizeInBytes())); + MachineMemOperand *MMO = + getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); + return buildLoad(Dst, Addr, *MMO); } MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode, @@ -386,21 +386,21 @@ MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val, return MIB; } -MachineInstrBuilder -MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr, - MachinePointerInfo PtrInfo, Align Alignment, - MachineMemOperand::Flags MMOFlags, - const AAMDNodes &AAInfo) { - MMOFlags |= MachineMemOperand::MOStore; - assert((MMOFlags & MachineMemOperand::MOLoad) == 0); - - uint64_t Size = MemoryLocation::getSizeOrUnknown( - TypeSize::Fixed(Val.getLLTTy(*getMRI()).getSizeInBytes())); - MachineMemOperand *MMO = - getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); - return buildStore(Val, Addr, *MMO); -} - +MachineInstrBuilder +MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr, + MachinePointerInfo PtrInfo, Align Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo) { + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); + + uint64_t Size = MemoryLocation::getSizeOrUnknown( + TypeSize::Fixed(Val.getLLTTy(*getMRI()).getSizeInBytes())); + MachineMemOperand *MMO = + getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); + return buildStore(Val, Addr, *MMO); +} + MachineInstrBuilder MachineIRBuilder::buildAnyExt(const DstOp &Res, const SrcOp &Op) { return buildInstr(TargetOpcode::G_ANYEXT, Res, Op); @@ -635,35 +635,35 @@ MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res, return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec); } -MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res, - const SrcOp &Src) { - LLT DstTy = Res.getLLTTy(*getMRI()); - assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() && - "Expected Src to match Dst elt ty"); - auto UndefVec = buildUndef(DstTy); - auto Zero = buildConstant(LLT::scalar(64), 0); - auto InsElt = buildInsertVectorElement(DstTy, UndefVec, Src, Zero); - SmallVector<int, 16> ZeroMask(DstTy.getNumElements()); - return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask); -} - -MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, - const SrcOp &Src1, - const SrcOp &Src2, - ArrayRef<int> Mask) { - LLT DstTy = Res.getLLTTy(*getMRI()); - LLT Src1Ty = Src1.getLLTTy(*getMRI()); - LLT Src2Ty = Src2.getLLTTy(*getMRI()); - assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size()); - assert(DstTy.getElementType() == Src1Ty.getElementType() && - DstTy.getElementType() == Src2Ty.getElementType()); - (void)Src1Ty; - (void)Src2Ty; - ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask); - return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2}) - .addShuffleMask(MaskAlloc); -} - +MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res, + const SrcOp &Src) { + LLT DstTy = Res.getLLTTy(*getMRI()); + assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() && + "Expected Src to match Dst elt ty"); + auto UndefVec = buildUndef(DstTy); + auto Zero = buildConstant(LLT::scalar(64), 0); + auto InsElt = buildInsertVectorElement(DstTy, UndefVec, Src, Zero); + SmallVector<int, 16> ZeroMask(DstTy.getNumElements()); + return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask); +} + +MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, + const SrcOp &Src1, + const SrcOp &Src2, + ArrayRef<int> Mask) { + LLT DstTy = Res.getLLTTy(*getMRI()); + LLT Src1Ty = Src1.getLLTTy(*getMRI()); + LLT Src2Ty = Src2.getLLTTy(*getMRI()); + assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size()); + assert(DstTy.getElementType() == Src1Ty.getElementType() && + DstTy.getElementType() == Src2Ty.getElementType()); + (void)Src1Ty; + (void)Src2Ty; + ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask); + return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2}) + .addShuffleMask(MaskAlloc); +} + MachineInstrBuilder MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) { // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>, @@ -986,14 +986,14 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, SrcOps[1].getLLTTy(*getMRI()), SrcOps[2].getLLTTy(*getMRI())); break; } - case TargetOpcode::G_FNEG: - case TargetOpcode::G_ABS: - // All these are unary ops. - assert(DstOps.size() == 1 && "Invalid Dst"); - assert(SrcOps.size() == 1 && "Invalid Srcs"); - validateUnaryOp(DstOps[0].getLLTTy(*getMRI()), - SrcOps[0].getLLTTy(*getMRI())); - break; + case TargetOpcode::G_FNEG: + case TargetOpcode::G_ABS: + // All these are unary ops. + assert(DstOps.size() == 1 && "Invalid Dst"); + assert(SrcOps.size() == 1 && "Invalid Srcs"); + validateUnaryOp(DstOps[0].getLLTTy(*getMRI()), + SrcOps[0].getLLTTy(*getMRI())); + break; case TargetOpcode::G_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_MUL: @@ -1022,9 +1022,9 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, } case TargetOpcode::G_SHL: case TargetOpcode::G_ASHR: - case TargetOpcode::G_LSHR: - case TargetOpcode::G_USHLSAT: - case TargetOpcode::G_SSHLSAT: { + case TargetOpcode::G_LSHR: + case TargetOpcode::G_USHLSAT: + case TargetOpcode::G_SSHLSAT: { assert(DstOps.size() == 1 && "Invalid Dst"); assert(SrcOps.size() == 2 && "Invalid Srcs"); validateShiftOp(DstOps[0].getLLTTy(*getMRI()), @@ -1089,11 +1089,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, case TargetOpcode::G_UNMERGE_VALUES: { assert(!DstOps.empty() && "Invalid trivial sequence"); assert(SrcOps.size() == 1 && "Invalid src for Unmerge"); - assert(llvm::all_of(DstOps, - [&, this](const DstOp &Op) { - return Op.getLLTTy(*getMRI()) == - DstOps[0].getLLTTy(*getMRI()); - }) && + assert(llvm::all_of(DstOps, + [&, this](const DstOp &Op) { + return Op.getLLTTy(*getMRI()) == + DstOps[0].getLLTTy(*getMRI()); + }) && "type mismatch in output list"); assert(DstOps.size() * DstOps[0].getLLTTy(*getMRI()).getSizeInBits() == SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() && @@ -1103,11 +1103,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, case TargetOpcode::G_MERGE_VALUES: { assert(!SrcOps.empty() && "invalid trivial sequence"); assert(DstOps.size() == 1 && "Invalid Dst"); - assert(llvm::all_of(SrcOps, - [&, this](const SrcOp &Op) { - return Op.getLLTTy(*getMRI()) == - SrcOps[0].getLLTTy(*getMRI()); - }) && + assert(llvm::all_of(SrcOps, + [&, this](const SrcOp &Op) { + return Op.getLLTTy(*getMRI()) == + SrcOps[0].getLLTTy(*getMRI()); + }) && "type mismatch in input list"); assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && @@ -1154,11 +1154,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, assert(DstOps.size() == 1 && "Invalid DstOps"); assert(DstOps[0].getLLTTy(*getMRI()).isVector() && "Res type must be a vector"); - assert(llvm::all_of(SrcOps, - [&, this](const SrcOp &Op) { - return Op.getLLTTy(*getMRI()) == - SrcOps[0].getLLTTy(*getMRI()); - }) && + assert(llvm::all_of(SrcOps, + [&, this](const SrcOp &Op) { + return Op.getLLTTy(*getMRI()) == + SrcOps[0].getLLTTy(*getMRI()); + }) && "type mismatch in input list"); assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && @@ -1171,11 +1171,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, assert(DstOps.size() == 1 && "Invalid DstOps"); assert(DstOps[0].getLLTTy(*getMRI()).isVector() && "Res type must be a vector"); - assert(llvm::all_of(SrcOps, - [&, this](const SrcOp &Op) { - return Op.getLLTTy(*getMRI()) == - SrcOps[0].getLLTTy(*getMRI()); - }) && + assert(llvm::all_of(SrcOps, + [&, this](const SrcOp &Op) { + return Op.getLLTTy(*getMRI()) == + SrcOps[0].getLLTTy(*getMRI()); + }) && "type mismatch in input list"); if (SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getElementType().getSizeInBits()) @@ -1186,12 +1186,12 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, assert(DstOps.size() == 1 && "Invalid DstOps"); assert((!SrcOps.empty() || SrcOps.size() < 2) && "Must have at least 2 operands"); - assert(llvm::all_of(SrcOps, - [&, this](const SrcOp &Op) { - return (Op.getLLTTy(*getMRI()).isVector() && - Op.getLLTTy(*getMRI()) == - SrcOps[0].getLLTTy(*getMRI())); - }) && + assert(llvm::all_of(SrcOps, + [&, this](const SrcOp &Op) { + return (Op.getLLTTy(*getMRI()).isVector() && + Op.getLLTTy(*getMRI()) == + SrcOps[0].getLLTTy(*getMRI())); + }) && "type mismatch in input list"); assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index e2a9637471..05f47915b3 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -421,7 +421,7 @@ RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const { // Then the alternative mapping, if any. InstructionMappings AltMappings = getInstrAlternativeMappings(MI); - append_range(PossibleMappings, AltMappings); + append_range(PossibleMappings, AltMappings); #ifndef NDEBUG for (const InstructionMapping *Mapping : PossibleMappings) assert(Mapping->verify(MI) && "Mapping is invalid"); diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Utils.cpp index cd24832244..2adc30eacc 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Utils.cpp +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Utils.cpp @@ -11,11 +11,11 @@ #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/ADT/APFloat.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/Optional.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" -#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" -#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -23,16 +23,16 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "globalisel-utils" using namespace llvm; -using namespace MIPatternMatch; +using namespace MIPatternMatch; Register llvm::constrainRegToClass(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, @@ -48,7 +48,7 @@ Register llvm::constrainOperandRegClass( const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, - const TargetRegisterClass &RegClass, MachineOperand &RegMO) { + const TargetRegisterClass &RegClass, MachineOperand &RegMO) { Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); @@ -69,13 +69,13 @@ Register llvm::constrainOperandRegClass( TII.get(TargetOpcode::COPY), Reg) .addReg(ConstrainedReg); } - if (GISelChangeObserver *Observer = MF.getObserver()) { - Observer->changingInstr(*RegMO.getParent()); - } - RegMO.setReg(ConstrainedReg); - if (GISelChangeObserver *Observer = MF.getObserver()) { - Observer->changedInstr(*RegMO.getParent()); - } + if (GISelChangeObserver *Observer = MF.getObserver()) { + Observer->changingInstr(*RegMO.getParent()); + } + RegMO.setReg(ConstrainedReg); + if (GISelChangeObserver *Observer = MF.getObserver()) { + Observer->changedInstr(*RegMO.getParent()); + } } else { if (GISelChangeObserver *Observer = MF.getObserver()) { if (!RegMO.isDef()) { @@ -93,7 +93,7 @@ Register llvm::constrainOperandRegClass( const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, - MachineOperand &RegMO, unsigned OpIdx) { + MachineOperand &RegMO, unsigned OpIdx) { Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); @@ -163,7 +163,7 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, // If the operand is a vreg, we should constrain its regclass, and only // insert COPYs if that's impossible. // constrainOperandRegClass does that for us. - constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), MO, OpI); + constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), MO, OpI); // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been // done. @@ -192,14 +192,14 @@ bool llvm::canReplaceReg(Register DstReg, Register SrcReg, bool llvm::isTriviallyDead(const MachineInstr &MI, const MachineRegisterInfo &MRI) { - // FIXME: This logical is mostly duplicated with - // DeadMachineInstructionElim::isDead. Why is LOCAL_ESCAPE not considered in - // MachineInstr::isLabel? - - // Don't delete frame allocation labels. - if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) - return false; - + // FIXME: This logical is mostly duplicated with + // DeadMachineInstructionElim::isDead. Why is LOCAL_ESCAPE not considered in + // MachineInstr::isLabel? + + // Don't delete frame allocation labels. + if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) + return false; + // If we can move an instruction, we can remove it. Otherwise, it has // a side-effect of some sort. bool SawStore = false; @@ -262,8 +262,8 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, reportGISelFailure(MF, TPC, MORE, R); } -Optional<APInt> llvm::getConstantVRegVal(Register VReg, - const MachineRegisterInfo &MRI) { +Optional<APInt> llvm::getConstantVRegVal(Register VReg, + const MachineRegisterInfo &MRI) { Optional<ValueAndVReg> ValAndVReg = getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false); assert((!ValAndVReg || ValAndVReg->VReg == VReg) && @@ -273,17 +273,17 @@ Optional<APInt> llvm::getConstantVRegVal(Register VReg, return ValAndVReg->Value; } -Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg, - const MachineRegisterInfo &MRI) { - Optional<APInt> Val = getConstantVRegVal(VReg, MRI); - if (Val && Val->getBitWidth() <= 64) - return Val->getSExtValue(); - return None; -} - +Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg, + const MachineRegisterInfo &MRI) { + Optional<APInt> Val = getConstantVRegVal(VReg, MRI); + if (Val && Val->getBitWidth() <= 64) + return Val->getSExtValue(); + return None; +} + Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, - bool HandleFConstant, bool LookThroughAnyExt) { + bool HandleFConstant, bool LookThroughAnyExt) { SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes; MachineInstr *MI; auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) { @@ -310,10 +310,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) && LookThroughInstrs) { switch (MI->getOpcode()) { - case TargetOpcode::G_ANYEXT: - if (!LookThroughAnyExt) - return None; - LLVM_FALLTHROUGH; + case TargetOpcode::G_ANYEXT: + if (!LookThroughAnyExt) + return None; + LLVM_FALLTHROUGH; case TargetOpcode::G_TRUNC: case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: @@ -347,7 +347,7 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( case TargetOpcode::G_TRUNC: Val = Val.trunc(OpcodeAndSize.second); break; - case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_ANYEXT: case TargetOpcode::G_SEXT: Val = Val.sext(OpcodeAndSize.second); break; @@ -357,10 +357,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( } } - return ValueAndVReg{Val, VReg}; + return ValueAndVReg{Val, VReg}; } -const ConstantFP * +const ConstantFP * llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) { MachineInstr *MI = MRI.getVRegDef(VReg); if (TargetOpcode::G_FCONSTANT != MI->getOpcode()) @@ -368,8 +368,8 @@ llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) { return MI->getOperand(1).getFPImm(); } -Optional<DefinitionAndSourceRegister> -llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { +Optional<DefinitionAndSourceRegister> +llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { Register DefSrcReg = Reg; auto *DefMI = MRI.getVRegDef(Reg); auto DstTy = MRI.getType(DefMI->getOperand(0).getReg()); @@ -378,7 +378,7 @@ llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { while (DefMI->getOpcode() == TargetOpcode::COPY) { Register SrcReg = DefMI->getOperand(1).getReg(); auto SrcTy = MRI.getType(SrcReg); - if (!SrcTy.isValid()) + if (!SrcTy.isValid()) break; DefMI = MRI.getVRegDef(SrcReg); DefSrcReg = SrcReg; @@ -386,8 +386,8 @@ llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { return DefinitionAndSourceRegister{DefMI, DefSrcReg}; } -MachineInstr *llvm::getDefIgnoringCopies(Register Reg, - const MachineRegisterInfo &MRI) { +MachineInstr *llvm::getDefIgnoringCopies(Register Reg, + const MachineRegisterInfo &MRI) { Optional<DefinitionAndSourceRegister> DefSrcReg = getDefSrcRegIgnoringCopies(Reg, MRI); return DefSrcReg ? DefSrcReg->MI : nullptr; @@ -400,8 +400,8 @@ Register llvm::getSrcRegIgnoringCopies(Register Reg, return DefSrcReg ? DefSrcReg->Reg : Register(); } -MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg, - const MachineRegisterInfo &MRI) { +MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg, + const MachineRegisterInfo &MRI) { MachineInstr *DefMI = getDefIgnoringCopies(Reg, MRI); return DefMI && DefMI->getOpcode() == Opcode ? DefMI : nullptr; } @@ -430,8 +430,8 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, if (!MaybeOp1Cst) return None; - const APInt &C1 = *MaybeOp1Cst; - const APInt &C2 = *MaybeOp2Cst; + const APInt &C1 = *MaybeOp1Cst; + const APInt &C2 = *MaybeOp2Cst; switch (Opcode) { default: break; @@ -480,8 +480,8 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, if (!DefMI) return false; - const TargetMachine& TM = DefMI->getMF()->getTarget(); - if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) + const TargetMachine& TM = DefMI->getMF()->getTarget(); + if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) return true; if (SNaN) { @@ -512,40 +512,40 @@ Align llvm::inferAlignFromPtrInfo(MachineFunction &MF, return Align(1); } -Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF, - const TargetInstrInfo &TII, - MCRegister PhysReg, - const TargetRegisterClass &RC, - LLT RegTy) { - DebugLoc DL; // FIXME: Is no location the right choice? - MachineBasicBlock &EntryMBB = MF.front(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - Register LiveIn = MRI.getLiveInVirtReg(PhysReg); - if (LiveIn) { - MachineInstr *Def = MRI.getVRegDef(LiveIn); - if (Def) { - // FIXME: Should the verifier check this is in the entry block? - assert(Def->getParent() == &EntryMBB && "live-in copy not in entry block"); - return LiveIn; - } - - // It's possible the incoming argument register and copy was added during - // lowering, but later deleted due to being/becoming dead. If this happens, - // re-insert the copy. - } else { - // The live in register was not present, so add it. - LiveIn = MF.addLiveIn(PhysReg, &RC); - if (RegTy.isValid()) - MRI.setType(LiveIn, RegTy); - } - - BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(TargetOpcode::COPY), LiveIn) - .addReg(PhysReg); - if (!EntryMBB.isLiveIn(PhysReg)) - EntryMBB.addLiveIn(PhysReg); - return LiveIn; -} - +Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF, + const TargetInstrInfo &TII, + MCRegister PhysReg, + const TargetRegisterClass &RC, + LLT RegTy) { + DebugLoc DL; // FIXME: Is no location the right choice? + MachineBasicBlock &EntryMBB = MF.front(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register LiveIn = MRI.getLiveInVirtReg(PhysReg); + if (LiveIn) { + MachineInstr *Def = MRI.getVRegDef(LiveIn); + if (Def) { + // FIXME: Should the verifier check this is in the entry block? + assert(Def->getParent() == &EntryMBB && "live-in copy not in entry block"); + return LiveIn; + } + + // It's possible the incoming argument register and copy was added during + // lowering, but later deleted due to being/becoming dead. If this happens, + // re-insert the copy. + } else { + // The live in register was not present, so add it. + LiveIn = MF.addLiveIn(PhysReg, &RC); + if (RegTy.isValid()) + MRI.setType(LiveIn, RegTy); + } + + BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(TargetOpcode::COPY), LiveIn) + .addReg(PhysReg); + if (!EntryMBB.isLiveIn(PhysReg)) + EntryMBB.addLiveIn(PhysReg); + return LiveIn; +} + Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1, uint64_t Imm, const MachineRegisterInfo &MRI) { @@ -554,262 +554,262 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1, switch (Opcode) { default: break; - case TargetOpcode::G_SEXT_INREG: { - LLT Ty = MRI.getType(Op1); - return MaybeOp1Cst->trunc(Imm).sext(Ty.getScalarSizeInBits()); - } + case TargetOpcode::G_SEXT_INREG: { + LLT Ty = MRI.getType(Op1); + return MaybeOp1Cst->trunc(Imm).sext(Ty.getScalarSizeInBits()); } + } } return None; } -bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, - GISelKnownBits *KB) { - Optional<DefinitionAndSourceRegister> DefSrcReg = - getDefSrcRegIgnoringCopies(Reg, MRI); - if (!DefSrcReg) - return false; - - const MachineInstr &MI = *DefSrcReg->MI; - const LLT Ty = MRI.getType(Reg); - - switch (MI.getOpcode()) { - case TargetOpcode::G_CONSTANT: { - unsigned BitWidth = Ty.getScalarSizeInBits(); - const ConstantInt *CI = MI.getOperand(1).getCImm(); - return CI->getValue().zextOrTrunc(BitWidth).isPowerOf2(); - } - case TargetOpcode::G_SHL: { - // A left-shift of a constant one will have exactly one bit set because - // shifting the bit off the end is undefined. - - // TODO: Constant splat - if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { - if (*ConstLHS == 1) - return true; - } - - break; - } - case TargetOpcode::G_LSHR: { - if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { - if (ConstLHS->isSignMask()) - return true; - } - - break; - } - default: - break; - } - - // TODO: Are all operands of a build vector constant powers of two? - if (!KB) - return false; - - // More could be done here, though the above checks are enough - // to handle some common cases. - - // Fall back to computeKnownBits to catch other known cases. - KnownBits Known = KB->getKnownBits(Reg); - return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1); -} - +bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, + GISelKnownBits *KB) { + Optional<DefinitionAndSourceRegister> DefSrcReg = + getDefSrcRegIgnoringCopies(Reg, MRI); + if (!DefSrcReg) + return false; + + const MachineInstr &MI = *DefSrcReg->MI; + const LLT Ty = MRI.getType(Reg); + + switch (MI.getOpcode()) { + case TargetOpcode::G_CONSTANT: { + unsigned BitWidth = Ty.getScalarSizeInBits(); + const ConstantInt *CI = MI.getOperand(1).getCImm(); + return CI->getValue().zextOrTrunc(BitWidth).isPowerOf2(); + } + case TargetOpcode::G_SHL: { + // A left-shift of a constant one will have exactly one bit set because + // shifting the bit off the end is undefined. + + // TODO: Constant splat + if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { + if (*ConstLHS == 1) + return true; + } + + break; + } + case TargetOpcode::G_LSHR: { + if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { + if (ConstLHS->isSignMask()) + return true; + } + + break; + } + default: + break; + } + + // TODO: Are all operands of a build vector constant powers of two? + if (!KB) + return false; + + // More could be done here, though the above checks are enough + // to handle some common cases. + + // Fall back to computeKnownBits to catch other known cases. + KnownBits Known = KB->getKnownBits(Reg); + return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1); +} + void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved<StackProtector>(); } -static unsigned getLCMSize(unsigned OrigSize, unsigned TargetSize) { - unsigned Mul = OrigSize * TargetSize; - unsigned GCDSize = greatestCommonDivisor(OrigSize, TargetSize); - return Mul / GCDSize; -} - -LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) { - const unsigned OrigSize = OrigTy.getSizeInBits(); - const unsigned TargetSize = TargetTy.getSizeInBits(); - - if (OrigSize == TargetSize) - return OrigTy; - - if (OrigTy.isVector()) { - const LLT OrigElt = OrigTy.getElementType(); - - if (TargetTy.isVector()) { - const LLT TargetElt = TargetTy.getElementType(); - - if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { - int GCDElts = greatestCommonDivisor(OrigTy.getNumElements(), - TargetTy.getNumElements()); - // Prefer the original element type. - int Mul = OrigTy.getNumElements() * TargetTy.getNumElements(); - return LLT::vector(Mul / GCDElts, OrigTy.getElementType()); - } - } else { - if (OrigElt.getSizeInBits() == TargetSize) - return OrigTy; - } - - unsigned LCMSize = getLCMSize(OrigSize, TargetSize); - return LLT::vector(LCMSize / OrigElt.getSizeInBits(), OrigElt); +static unsigned getLCMSize(unsigned OrigSize, unsigned TargetSize) { + unsigned Mul = OrigSize * TargetSize; + unsigned GCDSize = greatestCommonDivisor(OrigSize, TargetSize); + return Mul / GCDSize; +} + +LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) { + const unsigned OrigSize = OrigTy.getSizeInBits(); + const unsigned TargetSize = TargetTy.getSizeInBits(); + + if (OrigSize == TargetSize) + return OrigTy; + + if (OrigTy.isVector()) { + const LLT OrigElt = OrigTy.getElementType(); + + if (TargetTy.isVector()) { + const LLT TargetElt = TargetTy.getElementType(); + + if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { + int GCDElts = greatestCommonDivisor(OrigTy.getNumElements(), + TargetTy.getNumElements()); + // Prefer the original element type. + int Mul = OrigTy.getNumElements() * TargetTy.getNumElements(); + return LLT::vector(Mul / GCDElts, OrigTy.getElementType()); + } + } else { + if (OrigElt.getSizeInBits() == TargetSize) + return OrigTy; + } + + unsigned LCMSize = getLCMSize(OrigSize, TargetSize); + return LLT::vector(LCMSize / OrigElt.getSizeInBits(), OrigElt); } - if (TargetTy.isVector()) { - unsigned LCMSize = getLCMSize(OrigSize, TargetSize); - return LLT::vector(LCMSize / OrigSize, OrigTy); + if (TargetTy.isVector()) { + unsigned LCMSize = getLCMSize(OrigSize, TargetSize); + return LLT::vector(LCMSize / OrigSize, OrigTy); } - unsigned LCMSize = getLCMSize(OrigSize, TargetSize); - - // Preserve pointer types. - if (LCMSize == OrigSize) - return OrigTy; - if (LCMSize == TargetSize) - return TargetTy; - - return LLT::scalar(LCMSize); -} - -LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { - const unsigned OrigSize = OrigTy.getSizeInBits(); - const unsigned TargetSize = TargetTy.getSizeInBits(); - - if (OrigSize == TargetSize) - return OrigTy; - - if (OrigTy.isVector()) { - LLT OrigElt = OrigTy.getElementType(); - if (TargetTy.isVector()) { - LLT TargetElt = TargetTy.getElementType(); - if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { - int GCD = greatestCommonDivisor(OrigTy.getNumElements(), - TargetTy.getNumElements()); - return LLT::scalarOrVector(GCD, OrigElt); - } - } else { - // If the source is a vector of pointers, return a pointer element. - if (OrigElt.getSizeInBits() == TargetSize) - return OrigElt; - } - - unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize); - if (GCD == OrigElt.getSizeInBits()) - return OrigElt; - - // If we can't produce the original element type, we have to use a smaller - // scalar. - if (GCD < OrigElt.getSizeInBits()) - return LLT::scalar(GCD); - return LLT::vector(GCD / OrigElt.getSizeInBits(), OrigElt); - } - - if (TargetTy.isVector()) { - // Try to preserve the original element type. - LLT TargetElt = TargetTy.getElementType(); - if (TargetElt.getSizeInBits() == OrigSize) - return OrigTy; - } - - unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize); - return LLT::scalar(GCD); -} - -Optional<int> llvm::getSplatIndex(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && - "Only G_SHUFFLE_VECTOR can have a splat index!"); - ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); - auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; }); - - // If all elements are undefined, this shuffle can be considered a splat. - // Return 0 for better potential for callers to simplify. - if (FirstDefinedIdx == Mask.end()) - return 0; - - // Make sure all remaining elements are either undef or the same - // as the first non-undef value. - int SplatValue = *FirstDefinedIdx; - if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()), - [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; })) - return None; - - return SplatValue; -} - -static bool isBuildVectorOp(unsigned Opcode) { - return Opcode == TargetOpcode::G_BUILD_VECTOR || - Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC; -} - -// TODO: Handle mixed undef elements. -static bool isBuildVectorConstantSplat(const MachineInstr &MI, - const MachineRegisterInfo &MRI, - int64_t SplatValue) { - if (!isBuildVectorOp(MI.getOpcode())) - return false; - - const unsigned NumOps = MI.getNumOperands(); - for (unsigned I = 1; I != NumOps; ++I) { - Register Element = MI.getOperand(I).getReg(); - if (!mi_match(Element, MRI, m_SpecificICst(SplatValue))) - return false; + unsigned LCMSize = getLCMSize(OrigSize, TargetSize); + + // Preserve pointer types. + if (LCMSize == OrigSize) + return OrigTy; + if (LCMSize == TargetSize) + return TargetTy; + + return LLT::scalar(LCMSize); +} + +LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { + const unsigned OrigSize = OrigTy.getSizeInBits(); + const unsigned TargetSize = TargetTy.getSizeInBits(); + + if (OrigSize == TargetSize) + return OrigTy; + + if (OrigTy.isVector()) { + LLT OrigElt = OrigTy.getElementType(); + if (TargetTy.isVector()) { + LLT TargetElt = TargetTy.getElementType(); + if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { + int GCD = greatestCommonDivisor(OrigTy.getNumElements(), + TargetTy.getNumElements()); + return LLT::scalarOrVector(GCD, OrigElt); + } + } else { + // If the source is a vector of pointers, return a pointer element. + if (OrigElt.getSizeInBits() == TargetSize) + return OrigElt; + } + + unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize); + if (GCD == OrigElt.getSizeInBits()) + return OrigElt; + + // If we can't produce the original element type, we have to use a smaller + // scalar. + if (GCD < OrigElt.getSizeInBits()) + return LLT::scalar(GCD); + return LLT::vector(GCD / OrigElt.getSizeInBits(), OrigElt); } - return true; -} - -Optional<int64_t> -llvm::getBuildVectorConstantSplat(const MachineInstr &MI, - const MachineRegisterInfo &MRI) { - if (!isBuildVectorOp(MI.getOpcode())) - return None; - - const unsigned NumOps = MI.getNumOperands(); - Optional<int64_t> Scalar; - for (unsigned I = 1; I != NumOps; ++I) { - Register Element = MI.getOperand(I).getReg(); - int64_t ElementValue; - if (!mi_match(Element, MRI, m_ICst(ElementValue))) - return None; - if (!Scalar) - Scalar = ElementValue; - else if (*Scalar != ElementValue) - return None; + if (TargetTy.isVector()) { + // Try to preserve the original element type. + LLT TargetElt = TargetTy.getElementType(); + if (TargetElt.getSizeInBits() == OrigSize) + return OrigTy; + } + + unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize); + return LLT::scalar(GCD); +} + +Optional<int> llvm::getSplatIndex(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && + "Only G_SHUFFLE_VECTOR can have a splat index!"); + ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); + auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; }); + + // If all elements are undefined, this shuffle can be considered a splat. + // Return 0 for better potential for callers to simplify. + if (FirstDefinedIdx == Mask.end()) + return 0; + + // Make sure all remaining elements are either undef or the same + // as the first non-undef value. + int SplatValue = *FirstDefinedIdx; + if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()), + [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; })) + return None; + + return SplatValue; +} + +static bool isBuildVectorOp(unsigned Opcode) { + return Opcode == TargetOpcode::G_BUILD_VECTOR || + Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC; +} + +// TODO: Handle mixed undef elements. +static bool isBuildVectorConstantSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + int64_t SplatValue) { + if (!isBuildVectorOp(MI.getOpcode())) + return false; + + const unsigned NumOps = MI.getNumOperands(); + for (unsigned I = 1; I != NumOps; ++I) { + Register Element = MI.getOperand(I).getReg(); + if (!mi_match(Element, MRI, m_SpecificICst(SplatValue))) + return false; } - return Scalar; -} - -bool llvm::isBuildVectorAllZeros(const MachineInstr &MI, - const MachineRegisterInfo &MRI) { - return isBuildVectorConstantSplat(MI, MRI, 0); -} - -bool llvm::isBuildVectorAllOnes(const MachineInstr &MI, - const MachineRegisterInfo &MRI) { - return isBuildVectorConstantSplat(MI, MRI, -1); -} - -bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, - bool IsFP) { - switch (TLI.getBooleanContents(IsVector, IsFP)) { - case TargetLowering::UndefinedBooleanContent: - return Val & 0x1; - case TargetLowering::ZeroOrOneBooleanContent: - return Val == 1; - case TargetLowering::ZeroOrNegativeOneBooleanContent: - return Val == -1; + return true; +} + +Optional<int64_t> +llvm::getBuildVectorConstantSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + if (!isBuildVectorOp(MI.getOpcode())) + return None; + + const unsigned NumOps = MI.getNumOperands(); + Optional<int64_t> Scalar; + for (unsigned I = 1; I != NumOps; ++I) { + Register Element = MI.getOperand(I).getReg(); + int64_t ElementValue; + if (!mi_match(Element, MRI, m_ICst(ElementValue))) + return None; + if (!Scalar) + Scalar = ElementValue; + else if (*Scalar != ElementValue) + return None; } - llvm_unreachable("Invalid boolean contents"); -} -int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector, - bool IsFP) { - switch (TLI.getBooleanContents(IsVector, IsFP)) { - case TargetLowering::UndefinedBooleanContent: - case TargetLowering::ZeroOrOneBooleanContent: - return 1; - case TargetLowering::ZeroOrNegativeOneBooleanContent: - return -1; - } - llvm_unreachable("Invalid boolean contents"); -} + return Scalar; +} + +bool llvm::isBuildVectorAllZeros(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + return isBuildVectorConstantSplat(MI, MRI, 0); +} + +bool llvm::isBuildVectorAllOnes(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + return isBuildVectorConstantSplat(MI, MRI, -1); +} + +bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, + bool IsFP) { + switch (TLI.getBooleanContents(IsVector, IsFP)) { + case TargetLowering::UndefinedBooleanContent: + return Val & 0x1; + case TargetLowering::ZeroOrOneBooleanContent: + return Val == 1; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return Val == -1; + } + llvm_unreachable("Invalid boolean contents"); +} + +int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector, + bool IsFP) { + switch (TLI.getBooleanContents(IsVector, IsFP)) { + case TargetLowering::UndefinedBooleanContent: + case TargetLowering::ZeroOrOneBooleanContent: + return 1; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return -1; + } + llvm_unreachable("Invalid boolean contents"); +} diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make index e6de0fe8d9..6ede6da277 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make @@ -12,16 +12,16 @@ LICENSE(Apache-2.0 WITH LLVM-exception) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( - contrib/libs/llvm12 - contrib/libs/llvm12/include - contrib/libs/llvm12/lib/Analysis - contrib/libs/llvm12/lib/CodeGen - contrib/libs/llvm12/lib/CodeGen/SelectionDAG - contrib/libs/llvm12/lib/IR - contrib/libs/llvm12/lib/MC - contrib/libs/llvm12/lib/Support - contrib/libs/llvm12/lib/Target - contrib/libs/llvm12/lib/Transforms/Utils + contrib/libs/llvm12 + contrib/libs/llvm12/include + contrib/libs/llvm12/lib/Analysis + contrib/libs/llvm12/lib/CodeGen + contrib/libs/llvm12/lib/CodeGen/SelectionDAG + contrib/libs/llvm12/lib/IR + contrib/libs/llvm12/lib/MC + contrib/libs/llvm12/lib/Support + contrib/libs/llvm12/lib/Target + contrib/libs/llvm12/lib/Transforms/Utils ) ADDINCL( |