aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/CodeGen/GlobalISel
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/CodeGen/GlobalISel
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/CodeGen/GlobalISel')
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEInfo.cpp36
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp64
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/CallLowering.cpp886
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/Combiner.cpp6
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/CombinerHelper.cpp3832
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp2
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelKnownBits.cpp356
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/IRTranslator.cpp1642
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp12
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelect.cpp2
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelector.cpp2
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalityPredicates.cpp34
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizeMutations.cpp20
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/Legalizer.cpp2
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp2534
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerInfo.cpp8
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/Localizer.cpp46
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp216
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp2
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/Utils.cpp656
-rw-r--r--contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make20
21 files changed, 5189 insertions, 5189 deletions
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 2fa208fbfa..24391970d6 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -59,7 +59,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
case TargetOpcode::G_UNMERGE_VALUES:
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_PTR_ADD:
- case TargetOpcode::G_EXTRACT:
+ case TargetOpcode::G_EXTRACT:
return true;
}
return false;
@@ -367,21 +367,21 @@ GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const {
return *this;
}
-const GISelInstProfileBuilder &
-GISelInstProfileBuilder::addNodeIDReg(Register Reg) const {
- LLT Ty = MRI.getType(Reg);
- if (Ty.isValid())
- addNodeIDRegType(Ty);
-
- if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) {
- if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>())
- addNodeIDRegType(RB);
- else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
- addNodeIDRegType(RC);
- }
- return *this;
-}
-
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDReg(Register Reg) const {
+ LLT Ty = MRI.getType(Reg);
+ if (Ty.isValid())
+ addNodeIDRegType(Ty);
+
+ if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) {
+ if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>())
+ addNodeIDRegType(RB);
+ else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
+ addNodeIDRegType(RC);
+ }
+ return *this;
+}
+
const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
const MachineOperand &MO) const {
if (MO.isReg()) {
@@ -389,8 +389,8 @@ const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
if (!MO.isDef())
addNodeIDRegNum(Reg);
- // Profile the register properties.
- addNodeIDReg(Reg);
+ // Profile the register properties.
+ addNodeIDReg(Reg);
assert(!MO.isImplicit() && "Unhandled case");
} else if (MO.isImm())
ID.AddInteger(MO.getImm());
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 2c86f06a60..b0f8a6610d 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -13,7 +13,7 @@
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
-#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugInfoMetadata.h"
using namespace llvm;
@@ -42,14 +42,14 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID,
if (MI) {
CSEInfo->countOpcodeHit(MI->getOpcode());
auto CurrPos = getInsertPt();
- auto MII = MachineBasicBlock::iterator(MI);
- if (MII == CurrPos) {
- // Move the insert point ahead of the instruction so any future uses of
- // this builder will have the def ready.
- setInsertPt(*CurMBB, std::next(MII));
- } else if (!dominates(MI, CurrPos)) {
+ auto MII = MachineBasicBlock::iterator(MI);
+ if (MII == CurrPos) {
+ // Move the insert point ahead of the instruction so any future uses of
+ // this builder will have the def ready.
+ setInsertPt(*CurMBB, std::next(MII));
+ } else if (!dominates(MI, CurrPos)) {
CurMBB->splice(CurrPos, CurMBB, MI);
- }
+ }
return MachineInstrBuilder(getMF(), MI);
}
return MachineInstrBuilder();
@@ -68,11 +68,11 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op,
case DstOp::DstType::Ty_RC:
B.addNodeIDRegType(Op.getRegClass());
break;
- case DstOp::DstType::Ty_Reg: {
- // Regs can have LLT&(RB|RC). If those exist, profile them as well.
- B.addNodeIDReg(Op.getReg());
- break;
- }
+ case DstOp::DstType::Ty_Reg: {
+ // Regs can have LLT&(RB|RC). If those exist, profile them as well.
+ B.addNodeIDReg(Op.getReg());
+ break;
+ }
default:
B.addNodeIDRegType(Op.getLLTTy(*getMRI()));
break;
@@ -82,9 +82,9 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op,
void CSEMIRBuilder::profileSrcOp(const SrcOp &Op,
GISelInstProfileBuilder &B) const {
switch (Op.getSrcOpKind()) {
- case SrcOp::SrcType::Ty_Imm:
- B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm()));
- break;
+ case SrcOp::SrcType::Ty_Imm:
+ B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm()));
+ break;
case SrcOp::SrcType::Ty_Predicate:
B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate()));
break;
@@ -130,7 +130,7 @@ bool CSEMIRBuilder::checkCopyToDefsPossible(ArrayRef<DstOp> DstOps) {
if (DstOps.size() == 1)
return true; // always possible to emit copy to just 1 vreg.
- return llvm::all_of(DstOps, [](const DstOp &Op) {
+ return llvm::all_of(DstOps, [](const DstOp &Op) {
DstOp::DstType DT = Op.getDstOpKind();
return DT == DstOp::DstType::Ty_LLT || DT == DstOp::DstType::Ty_RC;
});
@@ -146,21 +146,21 @@ CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps,
if (Op.getDstOpKind() == DstOp::DstType::Ty_Reg)
return buildCopy(Op.getReg(), MIB.getReg(0));
}
-
- // If we didn't generate a copy then we're re-using an existing node directly
- // instead of emitting any code. Merge the debug location we wanted to emit
- // into the instruction we're CSE'ing with. Debug locations arent part of the
- // profile so we don't need to recompute it.
- if (getDebugLoc()) {
- GISelChangeObserver *Observer = getState().Observer;
- if (Observer)
- Observer->changingInstr(*MIB);
- MIB->setDebugLoc(
- DILocation::getMergedLocation(MIB->getDebugLoc(), getDebugLoc()));
- if (Observer)
- Observer->changedInstr(*MIB);
- }
-
+
+ // If we didn't generate a copy then we're re-using an existing node directly
+ // instead of emitting any code. Merge the debug location we wanted to emit
+ // into the instruction we're CSE'ing with. Debug locations arent part of the
+ // profile so we don't need to recompute it.
+ if (getDebugLoc()) {
+ GISelChangeObserver *Observer = getState().Observer;
+ if (Observer)
+ Observer->changingInstr(*MIB);
+ MIB->setDebugLoc(
+ DILocation::getMergedLocation(MIB->getDebugLoc(), getDebugLoc()));
+ if (Observer)
+ Observer->changedInstr(*MIB);
+ }
+
return MIB;
}
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CallLowering.cpp
index 803e1527a4..ad7c789b2e 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -30,51 +30,51 @@ using namespace llvm;
void CallLowering::anchor() {}
-/// Helper function which updates \p Flags when \p AttrFn returns true.
-static void
-addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags,
- const std::function<bool(Attribute::AttrKind)> &AttrFn) {
- if (AttrFn(Attribute::SExt))
- Flags.setSExt();
- if (AttrFn(Attribute::ZExt))
- Flags.setZExt();
- if (AttrFn(Attribute::InReg))
- Flags.setInReg();
- if (AttrFn(Attribute::StructRet))
- Flags.setSRet();
- if (AttrFn(Attribute::Nest))
- Flags.setNest();
- if (AttrFn(Attribute::ByVal))
- Flags.setByVal();
- if (AttrFn(Attribute::Preallocated))
- Flags.setPreallocated();
- if (AttrFn(Attribute::InAlloca))
- Flags.setInAlloca();
- if (AttrFn(Attribute::Returned))
- Flags.setReturned();
- if (AttrFn(Attribute::SwiftSelf))
- Flags.setSwiftSelf();
- if (AttrFn(Attribute::SwiftError))
- Flags.setSwiftError();
-}
-
-ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call,
- unsigned ArgIdx) const {
- ISD::ArgFlagsTy Flags;
- addFlagsUsingAttrFn(Flags, [&Call, &ArgIdx](Attribute::AttrKind Attr) {
- return Call.paramHasAttr(ArgIdx, Attr);
- });
- return Flags;
-}
-
-void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
- const AttributeList &Attrs,
- unsigned OpIdx) const {
- addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) {
- return Attrs.hasAttribute(OpIdx, Attr);
- });
-}
-
+/// Helper function which updates \p Flags when \p AttrFn returns true.
+static void
+addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags,
+ const std::function<bool(Attribute::AttrKind)> &AttrFn) {
+ if (AttrFn(Attribute::SExt))
+ Flags.setSExt();
+ if (AttrFn(Attribute::ZExt))
+ Flags.setZExt();
+ if (AttrFn(Attribute::InReg))
+ Flags.setInReg();
+ if (AttrFn(Attribute::StructRet))
+ Flags.setSRet();
+ if (AttrFn(Attribute::Nest))
+ Flags.setNest();
+ if (AttrFn(Attribute::ByVal))
+ Flags.setByVal();
+ if (AttrFn(Attribute::Preallocated))
+ Flags.setPreallocated();
+ if (AttrFn(Attribute::InAlloca))
+ Flags.setInAlloca();
+ if (AttrFn(Attribute::Returned))
+ Flags.setReturned();
+ if (AttrFn(Attribute::SwiftSelf))
+ Flags.setSwiftSelf();
+ if (AttrFn(Attribute::SwiftError))
+ Flags.setSwiftError();
+}
+
+ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call,
+ unsigned ArgIdx) const {
+ ISD::ArgFlagsTy Flags;
+ addFlagsUsingAttrFn(Flags, [&Call, &ArgIdx](Attribute::AttrKind Attr) {
+ return Call.paramHasAttr(ArgIdx, Attr);
+ });
+ return Flags;
+}
+
+void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
+ const AttributeList &Attrs,
+ unsigned OpIdx) const {
+ addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) {
+ return Attrs.hasAttribute(OpIdx, Attr);
+ });
+}
+
bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
ArrayRef<Register> ResRegs,
ArrayRef<ArrayRef<Register>> ArgRegs,
@@ -82,45 +82,45 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
std::function<unsigned()> GetCalleeReg) const {
CallLoweringInfo Info;
const DataLayout &DL = MIRBuilder.getDataLayout();
- MachineFunction &MF = MIRBuilder.getMF();
- bool CanBeTailCalled = CB.isTailCall() &&
- isInTailCallPosition(CB, MF.getTarget()) &&
- (MF.getFunction()
- .getFnAttribute("disable-tail-calls")
- .getValueAsString() != "true");
-
- CallingConv::ID CallConv = CB.getCallingConv();
- Type *RetTy = CB.getType();
- bool IsVarArg = CB.getFunctionType()->isVarArg();
-
- SmallVector<BaseArgInfo, 4> SplitArgs;
- getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL);
- Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg);
-
- if (!Info.CanLowerReturn) {
- // Callee requires sret demotion.
- insertSRetOutgoingArgument(MIRBuilder, CB, Info);
-
- // The sret demotion isn't compatible with tail-calls, since the sret
- // argument points into the caller's stack frame.
- CanBeTailCalled = false;
- }
-
+ MachineFunction &MF = MIRBuilder.getMF();
+ bool CanBeTailCalled = CB.isTailCall() &&
+ isInTailCallPosition(CB, MF.getTarget()) &&
+ (MF.getFunction()
+ .getFnAttribute("disable-tail-calls")
+ .getValueAsString() != "true");
+
+ CallingConv::ID CallConv = CB.getCallingConv();
+ Type *RetTy = CB.getType();
+ bool IsVarArg = CB.getFunctionType()->isVarArg();
+
+ SmallVector<BaseArgInfo, 4> SplitArgs;
+ getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL);
+ Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg);
+
+ if (!Info.CanLowerReturn) {
+ // Callee requires sret demotion.
+ insertSRetOutgoingArgument(MIRBuilder, CB, Info);
+
+ // The sret demotion isn't compatible with tail-calls, since the sret
+ // argument points into the caller's stack frame.
+ CanBeTailCalled = false;
+ }
+
// First step is to marshall all the function's parameters into the correct
// physregs and memory locations. Gather the sequence of argument types that
// we'll pass to the assigner function.
unsigned i = 0;
unsigned NumFixedArgs = CB.getFunctionType()->getNumParams();
for (auto &Arg : CB.args()) {
- ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i),
+ ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i),
i < NumFixedArgs};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB);
-
- // If we have an explicit sret argument that is an Instruction, (i.e., it
- // might point to function-local memory), we can't meaningfully tail-call.
- if (OrigArg.Flags[0].isSRet() && isa<Instruction>(&Arg))
- CanBeTailCalled = false;
-
+
+ // If we have an explicit sret argument that is an Instruction, (i.e., it
+ // might point to function-local memory), we can't meaningfully tail-call.
+ if (OrigArg.Flags[0].isSRet() && isa<Instruction>(&Arg))
+ CanBeTailCalled = false;
+
Info.OrigArgs.push_back(OrigArg);
++i;
}
@@ -133,16 +133,16 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
else
Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
- Info.OrigRet = ArgInfo{ResRegs, RetTy, ISD::ArgFlagsTy{}};
+ Info.OrigRet = ArgInfo{ResRegs, RetTy, ISD::ArgFlagsTy{}};
if (!Info.OrigRet.Ty->isVoidTy())
setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
- Info.CallConv = CallConv;
+ Info.CallConv = CallConv;
Info.SwiftErrorVReg = SwiftErrorVReg;
Info.IsMustTailCall = CB.isMustTailCall();
- Info.IsTailCall = CanBeTailCalled;
- Info.IsVarArg = IsVarArg;
+ Info.IsTailCall = CanBeTailCalled;
+ Info.IsVarArg = IsVarArg;
return lowerCall(MIRBuilder, Info);
}
@@ -152,7 +152,7 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const FuncInfoTy &FuncInfo) const {
auto &Flags = Arg.Flags[0];
const AttributeList &Attrs = FuncInfo.getAttributes();
- addArgFlagsFromAttributes(Flags, Attrs, OpIdx);
+ addArgFlagsFromAttributes(Flags, Attrs, OpIdx);
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
@@ -245,97 +245,97 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
EVT CurVT = EVT::getEVT(Args[i].Ty);
- if (CurVT.isSimple() &&
- !Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(),
- CCValAssign::Full, Args[i], Args[i].Flags[0],
- CCInfo))
- continue;
-
- MVT NewVT = TLI->getRegisterTypeForCallingConv(
- F.getContext(), F.getCallingConv(), EVT(CurVT));
-
- // If we need to split the type over multiple regs, check it's a scenario
- // we currently support.
- unsigned NumParts = TLI->getNumRegistersForCallingConv(
- F.getContext(), F.getCallingConv(), CurVT);
-
- if (NumParts == 1) {
- // Try to use the register type if we couldn't assign the VT.
- if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
- Args[i].Flags[0], CCInfo))
- return false;
- continue;
- }
-
- assert(NumParts > 1);
- // For now only handle exact splits.
- if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits())
- return false;
-
- // For incoming arguments (physregs to vregs), we could have values in
- // physregs (or memlocs) which we want to extract and copy to vregs.
- // During this, we might have to deal with the LLT being split across
- // multiple regs, so we have to record this information for later.
- //
- // If we have outgoing args, then we have the opposite case. We have a
- // vreg with an LLT which we want to assign to a physical location, and
- // we might have to record that the value has to be split later.
- if (Handler.isIncomingArgumentHandler()) {
- // We're handling an incoming arg which is split over multiple regs.
- // E.g. passing an s128 on AArch64.
- ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
- Args[i].OrigRegs.push_back(Args[i].Regs[0]);
- Args[i].Regs.clear();
- Args[i].Flags.clear();
- LLT NewLLT = getLLTForMVT(NewVT);
- // For each split register, create and assign a vreg that will store
- // the incoming component of the larger value. These will later be
- // merged to form the final vreg.
- for (unsigned Part = 0; Part < NumParts; ++Part) {
- Register Reg =
- MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT);
- ISD::ArgFlagsTy Flags = OrigFlags;
- if (Part == 0) {
- Flags.setSplit();
+ if (CurVT.isSimple() &&
+ !Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(),
+ CCValAssign::Full, Args[i], Args[i].Flags[0],
+ CCInfo))
+ continue;
+
+ MVT NewVT = TLI->getRegisterTypeForCallingConv(
+ F.getContext(), F.getCallingConv(), EVT(CurVT));
+
+ // If we need to split the type over multiple regs, check it's a scenario
+ // we currently support.
+ unsigned NumParts = TLI->getNumRegistersForCallingConv(
+ F.getContext(), F.getCallingConv(), CurVT);
+
+ if (NumParts == 1) {
+ // Try to use the register type if we couldn't assign the VT.
+ if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[0], CCInfo))
+ return false;
+ continue;
+ }
+
+ assert(NumParts > 1);
+ // For now only handle exact splits.
+ if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits())
+ return false;
+
+ // For incoming arguments (physregs to vregs), we could have values in
+ // physregs (or memlocs) which we want to extract and copy to vregs.
+ // During this, we might have to deal with the LLT being split across
+ // multiple regs, so we have to record this information for later.
+ //
+ // If we have outgoing args, then we have the opposite case. We have a
+ // vreg with an LLT which we want to assign to a physical location, and
+ // we might have to record that the value has to be split later.
+ if (Handler.isIncomingArgumentHandler()) {
+ // We're handling an incoming arg which is split over multiple regs.
+ // E.g. passing an s128 on AArch64.
+ ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+ Args[i].OrigRegs.push_back(Args[i].Regs[0]);
+ Args[i].Regs.clear();
+ Args[i].Flags.clear();
+ LLT NewLLT = getLLTForMVT(NewVT);
+ // For each split register, create and assign a vreg that will store
+ // the incoming component of the larger value. These will later be
+ // merged to form the final vreg.
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ Register Reg =
+ MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT);
+ ISD::ArgFlagsTy Flags = OrigFlags;
+ if (Part == 0) {
+ Flags.setSplit();
} else {
- Flags.setOrigAlign(Align(1));
- if (Part == NumParts - 1)
- Flags.setSplitEnd();
+ Flags.setOrigAlign(Align(1));
+ if (Part == NumParts - 1)
+ Flags.setSplitEnd();
}
- Args[i].Regs.push_back(Reg);
- Args[i].Flags.push_back(Flags);
- if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
- Args[i].Flags[Part], CCInfo)) {
- // Still couldn't assign this smaller part type for some reason.
- return false;
- }
- }
- } else {
- // This type is passed via multiple registers in the calling convention.
- // We need to extract the individual parts.
- Register LargeReg = Args[i].Regs[0];
- LLT SmallTy = LLT::scalar(NewVT.getSizeInBits());
- auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg);
- assert(Unmerge->getNumOperands() == NumParts + 1);
- ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
- // We're going to replace the regs and flags with the split ones.
- Args[i].Regs.clear();
- Args[i].Flags.clear();
- for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) {
- ISD::ArgFlagsTy Flags = OrigFlags;
- if (PartIdx == 0) {
- Flags.setSplit();
- } else {
- Flags.setOrigAlign(Align(1));
- if (PartIdx == NumParts - 1)
- Flags.setSplitEnd();
+ Args[i].Regs.push_back(Reg);
+ Args[i].Flags.push_back(Flags);
+ if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[Part], CCInfo)) {
+ // Still couldn't assign this smaller part type for some reason.
+ return false;
}
- Args[i].Regs.push_back(Unmerge.getReg(PartIdx));
- Args[i].Flags.push_back(Flags);
- if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full,
- Args[i], Args[i].Flags[PartIdx], CCInfo))
- return false;
}
+ } else {
+ // This type is passed via multiple registers in the calling convention.
+ // We need to extract the individual parts.
+ Register LargeReg = Args[i].Regs[0];
+ LLT SmallTy = LLT::scalar(NewVT.getSizeInBits());
+ auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg);
+ assert(Unmerge->getNumOperands() == NumParts + 1);
+ ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+ // We're going to replace the regs and flags with the split ones.
+ Args[i].Regs.clear();
+ Args[i].Flags.clear();
+ for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) {
+ ISD::ArgFlagsTy Flags = OrigFlags;
+ if (PartIdx == 0) {
+ Flags.setSplit();
+ } else {
+ Flags.setOrigAlign(Align(1));
+ if (PartIdx == NumParts - 1)
+ Flags.setSplitEnd();
+ }
+ Args[i].Regs.push_back(Unmerge.getReg(PartIdx));
+ Args[i].Flags.push_back(Flags);
+ if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full,
+ Args[i], Args[i].Flags[PartIdx], CCInfo))
+ return false;
+ }
}
}
@@ -361,239 +361,239 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
EVT VAVT = VA.getValVT();
const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
- // Expected to be multiple regs for a single incoming arg.
- // There should be Regs.size() ArgLocs per argument.
- unsigned NumArgRegs = Args[i].Regs.size();
-
- assert((j + (NumArgRegs - 1)) < ArgLocs.size() &&
- "Too many regs for number of args");
- for (unsigned Part = 0; Part < NumArgRegs; ++Part) {
- // There should be Regs.size() ArgLocs per argument.
- VA = ArgLocs[j + Part];
- if (VA.isMemLoc()) {
- // Don't currently support loading/storing a type that needs to be split
- // to the stack. Should be easy, just not implemented yet.
- if (NumArgRegs > 1) {
- LLVM_DEBUG(
- dbgs()
- << "Load/store a split arg to/from the stack not implemented yet\n");
- return false;
+ // Expected to be multiple regs for a single incoming arg.
+ // There should be Regs.size() ArgLocs per argument.
+ unsigned NumArgRegs = Args[i].Regs.size();
+
+ assert((j + (NumArgRegs - 1)) < ArgLocs.size() &&
+ "Too many regs for number of args");
+ for (unsigned Part = 0; Part < NumArgRegs; ++Part) {
+ // There should be Regs.size() ArgLocs per argument.
+ VA = ArgLocs[j + Part];
+ if (VA.isMemLoc()) {
+ // Don't currently support loading/storing a type that needs to be split
+ // to the stack. Should be easy, just not implemented yet.
+ if (NumArgRegs > 1) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Load/store a split arg to/from the stack not implemented yet\n");
+ return false;
}
-
- // FIXME: Use correct address space for pointer size
- EVT LocVT = VA.getValVT();
- unsigned MemSize = LocVT == MVT::iPTR ? DL.getPointerSize()
- : LocVT.getStoreSize();
- unsigned Offset = VA.getLocMemOffset();
- MachinePointerInfo MPO;
- Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO);
- Handler.assignValueToAddress(Args[i], StackAddr,
- MemSize, MPO, VA);
- continue;
- }
-
- assert(VA.isRegLoc() && "custom loc should have been handled already");
-
- // GlobalISel does not currently work for scalable vectors.
- if (OrigVT.getFixedSizeInBits() >= VAVT.getFixedSizeInBits() ||
- !Handler.isIncomingArgumentHandler()) {
- // This is an argument that might have been split. There should be
- // Regs.size() ArgLocs per argument.
-
- // Insert the argument copies. If VAVT < OrigVT, we'll insert the merge
- // to the original register after handling all of the parts.
- Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
- continue;
- }
-
- // This ArgLoc covers multiple pieces, so we need to split it.
- const LLT VATy(VAVT.getSimpleVT());
- Register NewReg =
- MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
- Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
- // If it's a vector type, we either need to truncate the elements
- // or do an unmerge to get the lower block of elements.
- if (VATy.isVector() &&
- VATy.getNumElements() > OrigVT.getVectorNumElements()) {
- // Just handle the case where the VA type is 2 * original type.
- if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
- LLVM_DEBUG(dbgs()
- << "Incoming promoted vector arg has too many elts");
- return false;
+
+ // FIXME: Use correct address space for pointer size
+ EVT LocVT = VA.getValVT();
+ unsigned MemSize = LocVT == MVT::iPTR ? DL.getPointerSize()
+ : LocVT.getStoreSize();
+ unsigned Offset = VA.getLocMemOffset();
+ MachinePointerInfo MPO;
+ Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO);
+ Handler.assignValueToAddress(Args[i], StackAddr,
+ MemSize, MPO, VA);
+ continue;
+ }
+
+ assert(VA.isRegLoc() && "custom loc should have been handled already");
+
+ // GlobalISel does not currently work for scalable vectors.
+ if (OrigVT.getFixedSizeInBits() >= VAVT.getFixedSizeInBits() ||
+ !Handler.isIncomingArgumentHandler()) {
+ // This is an argument that might have been split. There should be
+ // Regs.size() ArgLocs per argument.
+
+ // Insert the argument copies. If VAVT < OrigVT, we'll insert the merge
+ // to the original register after handling all of the parts.
+ Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
+ continue;
+ }
+
+ // This ArgLoc covers multiple pieces, so we need to split it.
+ const LLT VATy(VAVT.getSimpleVT());
+ Register NewReg =
+ MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
+ Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
+ // If it's a vector type, we either need to truncate the elements
+ // or do an unmerge to get the lower block of elements.
+ if (VATy.isVector() &&
+ VATy.getNumElements() > OrigVT.getVectorNumElements()) {
+ // Just handle the case where the VA type is 2 * original type.
+ if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
+ LLVM_DEBUG(dbgs()
+ << "Incoming promoted vector arg has too many elts");
+ return false;
}
- auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg});
- MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0));
+ auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg});
+ MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0));
} else {
- MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0);
+ MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0);
}
- }
-
- // Now that all pieces have been handled, re-pack any arguments into any
- // wider, original registers.
- if (Handler.isIncomingArgumentHandler()) {
- if (VAVT.getFixedSizeInBits() < OrigVT.getFixedSizeInBits()) {
- assert(NumArgRegs >= 2);
-
- // Merge the split registers into the expected larger result vreg
- // of the original call.
- MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs);
+ }
+
+ // Now that all pieces have been handled, re-pack any arguments into any
+ // wider, original registers.
+ if (Handler.isIncomingArgumentHandler()) {
+ if (VAVT.getFixedSizeInBits() < OrigVT.getFixedSizeInBits()) {
+ assert(NumArgRegs >= 2);
+
+ // Merge the split registers into the expected larger result vreg
+ // of the original call.
+ MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs);
}
- }
-
- j += NumArgRegs - 1;
- }
-
- return true;
-}
-
-void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
- ArrayRef<Register> VRegs, Register DemoteReg,
- int FI) const {
- MachineFunction &MF = MIRBuilder.getMF();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const DataLayout &DL = MF.getDataLayout();
-
- SmallVector<EVT, 4> SplitVTs;
- SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
-
- assert(VRegs.size() == SplitVTs.size());
-
- unsigned NumValues = SplitVTs.size();
- Align BaseAlign = DL.getPrefTypeAlign(RetTy);
- Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace());
- LLT OffsetLLTy = getLLTForType(*DL.getIntPtrType(RetPtrTy), DL);
-
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
-
- for (unsigned I = 0; I < NumValues; ++I) {
- Register Addr;
- MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
- auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
- MRI.getType(VRegs[I]).getSizeInBytes(),
- commonAlignment(BaseAlign, Offsets[I]));
- MIRBuilder.buildLoad(VRegs[I], Addr, *MMO);
- }
-}
-
-void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
- ArrayRef<Register> VRegs,
- Register DemoteReg) const {
- MachineFunction &MF = MIRBuilder.getMF();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const DataLayout &DL = MF.getDataLayout();
-
- SmallVector<EVT, 4> SplitVTs;
- SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
-
- assert(VRegs.size() == SplitVTs.size());
-
- unsigned NumValues = SplitVTs.size();
- Align BaseAlign = DL.getPrefTypeAlign(RetTy);
- unsigned AS = DL.getAllocaAddrSpace();
- LLT OffsetLLTy =
- getLLTForType(*DL.getIntPtrType(RetTy->getPointerTo(AS)), DL);
-
- MachinePointerInfo PtrInfo(AS);
-
- for (unsigned I = 0; I < NumValues; ++I) {
- Register Addr;
- MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
- auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
- MRI.getType(VRegs[I]).getSizeInBytes(),
- commonAlignment(BaseAlign, Offsets[I]));
- MIRBuilder.buildStore(VRegs[I], Addr, *MMO);
- }
-}
-
-void CallLowering::insertSRetIncomingArgument(
- const Function &F, SmallVectorImpl<ArgInfo> &SplitArgs, Register &DemoteReg,
- MachineRegisterInfo &MRI, const DataLayout &DL) const {
- unsigned AS = DL.getAllocaAddrSpace();
- DemoteReg = MRI.createGenericVirtualRegister(
- LLT::pointer(AS, DL.getPointerSizeInBits(AS)));
-
- Type *PtrTy = PointerType::get(F.getReturnType(), AS);
-
- SmallVector<EVT, 1> ValueVTs;
- ComputeValueVTs(*TLI, DL, PtrTy, ValueVTs);
-
- // NOTE: Assume that a pointer won't get split into more than one VT.
- assert(ValueVTs.size() == 1);
-
- ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext()));
- setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, F);
- DemoteArg.Flags[0].setSRet();
- SplitArgs.insert(SplitArgs.begin(), DemoteArg);
-}
-
-void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder,
- const CallBase &CB,
- CallLoweringInfo &Info) const {
- const DataLayout &DL = MIRBuilder.getDataLayout();
- Type *RetTy = CB.getType();
- unsigned AS = DL.getAllocaAddrSpace();
- LLT FramePtrTy = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
-
- int FI = MIRBuilder.getMF().getFrameInfo().CreateStackObject(
- DL.getTypeAllocSize(RetTy), DL.getPrefTypeAlign(RetTy), false);
-
- Register DemoteReg = MIRBuilder.buildFrameIndex(FramePtrTy, FI).getReg(0);
- ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS));
- setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, CB);
- DemoteArg.Flags[0].setSRet();
-
- Info.OrigArgs.insert(Info.OrigArgs.begin(), DemoteArg);
- Info.DemoteStackIndex = FI;
- Info.DemoteRegister = DemoteReg;
-}
-
-bool CallLowering::checkReturn(CCState &CCInfo,
- SmallVectorImpl<BaseArgInfo> &Outs,
- CCAssignFn *Fn) const {
- for (unsigned I = 0, E = Outs.size(); I < E; ++I) {
- MVT VT = MVT::getVT(Outs[I].Ty);
- if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo))
+ }
+
+ j += NumArgRegs - 1;
+ }
+
+ return true;
+}
+
+void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
+ ArrayRef<Register> VRegs, Register DemoteReg,
+ int FI) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const DataLayout &DL = MF.getDataLayout();
+
+ SmallVector<EVT, 4> SplitVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+
+ assert(VRegs.size() == SplitVTs.size());
+
+ unsigned NumValues = SplitVTs.size();
+ Align BaseAlign = DL.getPrefTypeAlign(RetTy);
+ Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace());
+ LLT OffsetLLTy = getLLTForType(*DL.getIntPtrType(RetPtrTy), DL);
+
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
+
+ for (unsigned I = 0; I < NumValues; ++I) {
+ Register Addr;
+ MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
+ auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MRI.getType(VRegs[I]).getSizeInBytes(),
+ commonAlignment(BaseAlign, Offsets[I]));
+ MIRBuilder.buildLoad(VRegs[I], Addr, *MMO);
+ }
+}
+
+void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
+ ArrayRef<Register> VRegs,
+ Register DemoteReg) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const DataLayout &DL = MF.getDataLayout();
+
+ SmallVector<EVT, 4> SplitVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+
+ assert(VRegs.size() == SplitVTs.size());
+
+ unsigned NumValues = SplitVTs.size();
+ Align BaseAlign = DL.getPrefTypeAlign(RetTy);
+ unsigned AS = DL.getAllocaAddrSpace();
+ LLT OffsetLLTy =
+ getLLTForType(*DL.getIntPtrType(RetTy->getPointerTo(AS)), DL);
+
+ MachinePointerInfo PtrInfo(AS);
+
+ for (unsigned I = 0; I < NumValues; ++I) {
+ Register Addr;
+ MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
+ auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ MRI.getType(VRegs[I]).getSizeInBytes(),
+ commonAlignment(BaseAlign, Offsets[I]));
+ MIRBuilder.buildStore(VRegs[I], Addr, *MMO);
+ }
+}
+
+void CallLowering::insertSRetIncomingArgument(
+ const Function &F, SmallVectorImpl<ArgInfo> &SplitArgs, Register &DemoteReg,
+ MachineRegisterInfo &MRI, const DataLayout &DL) const {
+ unsigned AS = DL.getAllocaAddrSpace();
+ DemoteReg = MRI.createGenericVirtualRegister(
+ LLT::pointer(AS, DL.getPointerSizeInBits(AS)));
+
+ Type *PtrTy = PointerType::get(F.getReturnType(), AS);
+
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(*TLI, DL, PtrTy, ValueVTs);
+
+ // NOTE: Assume that a pointer won't get split into more than one VT.
+ assert(ValueVTs.size() == 1);
+
+ ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext()));
+ setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, F);
+ DemoteArg.Flags[0].setSRet();
+ SplitArgs.insert(SplitArgs.begin(), DemoteArg);
+}
+
+void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder,
+ const CallBase &CB,
+ CallLoweringInfo &Info) const {
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ Type *RetTy = CB.getType();
+ unsigned AS = DL.getAllocaAddrSpace();
+ LLT FramePtrTy = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
+
+ int FI = MIRBuilder.getMF().getFrameInfo().CreateStackObject(
+ DL.getTypeAllocSize(RetTy), DL.getPrefTypeAlign(RetTy), false);
+
+ Register DemoteReg = MIRBuilder.buildFrameIndex(FramePtrTy, FI).getReg(0);
+ ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS));
+ setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, CB);
+ DemoteArg.Flags[0].setSRet();
+
+ Info.OrigArgs.insert(Info.OrigArgs.begin(), DemoteArg);
+ Info.DemoteStackIndex = FI;
+ Info.DemoteRegister = DemoteReg;
+}
+
+bool CallLowering::checkReturn(CCState &CCInfo,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ CCAssignFn *Fn) const {
+ for (unsigned I = 0, E = Outs.size(); I < E; ++I) {
+ MVT VT = MVT::getVT(Outs[I].Ty);
+ if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo))
return false;
}
return true;
}
-void CallLowering::getReturnInfo(CallingConv::ID CallConv, Type *RetTy,
- AttributeList Attrs,
- SmallVectorImpl<BaseArgInfo> &Outs,
- const DataLayout &DL) const {
- LLVMContext &Context = RetTy->getContext();
- ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-
- SmallVector<EVT, 4> SplitVTs;
- ComputeValueVTs(*TLI, DL, RetTy, SplitVTs);
- addArgFlagsFromAttributes(Flags, Attrs, AttributeList::ReturnIndex);
-
- for (EVT VT : SplitVTs) {
- unsigned NumParts =
- TLI->getNumRegistersForCallingConv(Context, CallConv, VT);
- MVT RegVT = TLI->getRegisterTypeForCallingConv(Context, CallConv, VT);
- Type *PartTy = EVT(RegVT).getTypeForEVT(Context);
-
- for (unsigned I = 0; I < NumParts; ++I) {
- Outs.emplace_back(PartTy, Flags);
- }
- }
-}
-
-bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const {
- const auto &F = MF.getFunction();
- Type *ReturnType = F.getReturnType();
- CallingConv::ID CallConv = F.getCallingConv();
-
- SmallVector<BaseArgInfo, 4> SplitArgs;
- getReturnInfo(CallConv, ReturnType, F.getAttributes(), SplitArgs,
- MF.getDataLayout());
- return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg());
-}
-
+void CallLowering::getReturnInfo(CallingConv::ID CallConv, Type *RetTy,
+ AttributeList Attrs,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ const DataLayout &DL) const {
+ LLVMContext &Context = RetTy->getContext();
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+
+ SmallVector<EVT, 4> SplitVTs;
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs);
+ addArgFlagsFromAttributes(Flags, Attrs, AttributeList::ReturnIndex);
+
+ for (EVT VT : SplitVTs) {
+ unsigned NumParts =
+ TLI->getNumRegistersForCallingConv(Context, CallConv, VT);
+ MVT RegVT = TLI->getRegisterTypeForCallingConv(Context, CallConv, VT);
+ Type *PartTy = EVT(RegVT).getTypeForEVT(Context);
+
+ for (unsigned I = 0; I < NumParts; ++I) {
+ Outs.emplace_back(PartTy, Flags);
+ }
+ }
+}
+
+bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const {
+ const auto &F = MF.getFunction();
+ Type *ReturnType = F.getReturnType();
+ CallingConv::ID CallConv = F.getCallingConv();
+
+ SmallVector<BaseArgInfo, 4> SplitArgs;
+ getReturnInfo(CallConv, ReturnType, F.getAttributes(), SplitArgs,
+ MF.getDataLayout());
+ return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg());
+}
+
bool CallLowering::analyzeArgInfo(CCState &CCState,
SmallVectorImpl<ArgInfo> &Args,
CCAssignFn &AssignFnFixed,
@@ -611,58 +611,58 @@ bool CallLowering::analyzeArgInfo(CCState &CCState,
return true;
}
-bool CallLowering::parametersInCSRMatch(
- const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask,
- const SmallVectorImpl<CCValAssign> &OutLocs,
- const SmallVectorImpl<ArgInfo> &OutArgs) const {
- for (unsigned i = 0; i < OutLocs.size(); ++i) {
- auto &ArgLoc = OutLocs[i];
- // If it's not a register, it's fine.
- if (!ArgLoc.isRegLoc())
- continue;
-
- MCRegister PhysReg = ArgLoc.getLocReg();
-
- // Only look at callee-saved registers.
- if (MachineOperand::clobbersPhysReg(CallerPreservedMask, PhysReg))
- continue;
-
- LLVM_DEBUG(
- dbgs()
- << "... Call has an argument passed in a callee-saved register.\n");
-
- // Check if it was copied from.
- const ArgInfo &OutInfo = OutArgs[i];
-
- if (OutInfo.Regs.size() > 1) {
- LLVM_DEBUG(
- dbgs() << "... Cannot handle arguments in multiple registers.\n");
- return false;
- }
-
- // Check if we copy the register, walking through copies from virtual
- // registers. Note that getDefIgnoringCopies does not ignore copies from
- // physical registers.
- MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI);
- if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) {
- LLVM_DEBUG(
- dbgs()
- << "... Parameter was not copied into a VReg, cannot tail call.\n");
- return false;
- }
-
- // Got a copy. Verify that it's the same as the register we want.
- Register CopyRHS = RegDef->getOperand(1).getReg();
- if (CopyRHS != PhysReg) {
- LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into "
- "VReg, cannot tail call.\n");
- return false;
- }
- }
-
- return true;
-}
-
+bool CallLowering::parametersInCSRMatch(
+ const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask,
+ const SmallVectorImpl<CCValAssign> &OutLocs,
+ const SmallVectorImpl<ArgInfo> &OutArgs) const {
+ for (unsigned i = 0; i < OutLocs.size(); ++i) {
+ auto &ArgLoc = OutLocs[i];
+ // If it's not a register, it's fine.
+ if (!ArgLoc.isRegLoc())
+ continue;
+
+ MCRegister PhysReg = ArgLoc.getLocReg();
+
+ // Only look at callee-saved registers.
+ if (MachineOperand::clobbersPhysReg(CallerPreservedMask, PhysReg))
+ continue;
+
+ LLVM_DEBUG(
+ dbgs()
+ << "... Call has an argument passed in a callee-saved register.\n");
+
+ // Check if it was copied from.
+ const ArgInfo &OutInfo = OutArgs[i];
+
+ if (OutInfo.Regs.size() > 1) {
+ LLVM_DEBUG(
+ dbgs() << "... Cannot handle arguments in multiple registers.\n");
+ return false;
+ }
+
+ // Check if we copy the register, walking through copies from virtual
+ // registers. Note that getDefIgnoringCopies does not ignore copies from
+ // physical registers.
+ MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI);
+ if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) {
+ LLVM_DEBUG(
+ dbgs()
+ << "... Parameter was not copied into a VReg, cannot tail call.\n");
+ return false;
+ }
+
+ // Got a copy. Verify that it's the same as the register we want.
+ Register CopyRHS = RegDef->getOperand(1).getReg();
+ if (CopyRHS != PhysReg) {
+ LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into "
+ "VReg, cannot tail call.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
MachineFunction &MF,
SmallVectorImpl<ArgInfo> &InArgs,
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Combiner.cpp
index f1071d96e5..86480b47e9 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -153,8 +153,8 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
MFChanged |= Changed;
} while (Changed);
- assert(!CSEInfo || (!errorToBool(CSEInfo->verify()) &&
- "CSEInfo is not consistent. Likely missing calls to "
- "observer on mutations"));
+ assert(!CSEInfo || (!errorToBool(CSEInfo->verify()) &&
+ "CSEInfo is not consistent. Likely missing calls to "
+ "observer on mutations"));
return MFChanged;
}
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index a9353bdfb7..8ea55b6abd 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -16,7 +16,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -44,75 +44,75 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
(void)this->KB;
}
-const TargetLowering &CombinerHelper::getTargetLowering() const {
- return *Builder.getMF().getSubtarget().getTargetLowering();
-}
-
-/// \returns The little endian in-memory byte position of byte \p I in a
-/// \p ByteWidth bytes wide type.
-///
-/// E.g. Given a 4-byte type x, x[0] -> byte 0
-static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
- assert(I < ByteWidth && "I must be in [0, ByteWidth)");
- return I;
-}
-
-/// \returns The big endian in-memory byte position of byte \p I in a
-/// \p ByteWidth bytes wide type.
-///
-/// E.g. Given a 4-byte type x, x[0] -> byte 3
-static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
- assert(I < ByteWidth && "I must be in [0, ByteWidth)");
- return ByteWidth - I - 1;
-}
-
-/// Given a map from byte offsets in memory to indices in a load/store,
-/// determine if that map corresponds to a little or big endian byte pattern.
-///
-/// \param MemOffset2Idx maps memory offsets to address offsets.
-/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
-///
-/// \returns true if the map corresponds to a big endian byte pattern, false
-/// if it corresponds to a little endian byte pattern, and None otherwise.
-///
-/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
-/// are as follows:
-///
-/// AddrOffset Little endian Big endian
-/// 0 0 3
-/// 1 1 2
-/// 2 2 1
-/// 3 3 0
-static Optional<bool>
-isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
- int64_t LowestIdx) {
- // Need at least two byte positions to decide on endianness.
- unsigned Width = MemOffset2Idx.size();
- if (Width < 2)
- return None;
- bool BigEndian = true, LittleEndian = true;
- for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
- auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
- if (MemOffsetAndIdx == MemOffset2Idx.end())
- return None;
- const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
- assert(Idx >= 0 && "Expected non-negative byte offset?");
- LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
- BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
- if (!BigEndian && !LittleEndian)
- return None;
- }
-
- assert((BigEndian != LittleEndian) &&
- "Pattern cannot be both big and little endian!");
- return BigEndian;
-}
-
-bool CombinerHelper::isLegalOrBeforeLegalizer(
- const LegalityQuery &Query) const {
- return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
-}
-
+const TargetLowering &CombinerHelper::getTargetLowering() const {
+ return *Builder.getMF().getSubtarget().getTargetLowering();
+}
+
+/// \returns The little endian in-memory byte position of byte \p I in a
+/// \p ByteWidth bytes wide type.
+///
+/// E.g. Given a 4-byte type x, x[0] -> byte 0
+static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
+ assert(I < ByteWidth && "I must be in [0, ByteWidth)");
+ return I;
+}
+
+/// \returns The big endian in-memory byte position of byte \p I in a
+/// \p ByteWidth bytes wide type.
+///
+/// E.g. Given a 4-byte type x, x[0] -> byte 3
+static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
+ assert(I < ByteWidth && "I must be in [0, ByteWidth)");
+ return ByteWidth - I - 1;
+}
+
+/// Given a map from byte offsets in memory to indices in a load/store,
+/// determine if that map corresponds to a little or big endian byte pattern.
+///
+/// \param MemOffset2Idx maps memory offsets to address offsets.
+/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
+///
+/// \returns true if the map corresponds to a big endian byte pattern, false
+/// if it corresponds to a little endian byte pattern, and None otherwise.
+///
+/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
+/// are as follows:
+///
+/// AddrOffset Little endian Big endian
+/// 0 0 3
+/// 1 1 2
+/// 2 2 1
+/// 3 3 0
+static Optional<bool>
+isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
+ int64_t LowestIdx) {
+ // Need at least two byte positions to decide on endianness.
+ unsigned Width = MemOffset2Idx.size();
+ if (Width < 2)
+ return None;
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
+ auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
+ if (MemOffsetAndIdx == MemOffset2Idx.end())
+ return None;
+ const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
+ assert(Idx >= 0 && "Expected non-negative byte offset?");
+ LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
+ BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
+ if (!BigEndian && !LittleEndian)
+ return None;
+ }
+
+ assert((BigEndian != LittleEndian) &&
+ "Pattern cannot be both big and little endian!");
+ return BigEndian;
+}
+
+bool CombinerHelper::isLegalOrBeforeLegalizer(
+ const LegalityQuery &Query) const {
+ return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
+}
+
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
Register ToReg) const {
Observer.changingAllUsesOfReg(MRI, FromReg);
@@ -624,13 +624,13 @@ bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
assert(DefMI.getParent() == UseMI.getParent());
if (&DefMI == &UseMI)
return false;
- const MachineBasicBlock &MBB = *DefMI.getParent();
- auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
- return &MI == &DefMI || &MI == &UseMI;
- });
- if (DefOrUse == MBB.end())
- llvm_unreachable("Block must contain both DefMI and UseMI!");
- return &*DefOrUse == &DefMI;
+ const MachineBasicBlock &MBB = *DefMI.getParent();
+ auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
+ return &MI == &DefMI || &MI == &UseMI;
+ });
+ if (DefOrUse == MBB.end())
+ llvm_unreachable("Block must contain both DefMI and UseMI!");
+ return &*DefOrUse == &DefMI;
}
bool CombinerHelper::dominates(const MachineInstr &DefMI,
@@ -645,101 +645,101 @@ bool CombinerHelper::dominates(const MachineInstr &DefMI,
return isPredecessor(DefMI, UseMI);
}
-bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
- Register SrcReg = MI.getOperand(1).getReg();
- Register LoadUser = SrcReg;
-
- if (MRI.getType(SrcReg).isVector())
- return false;
-
- Register TruncSrc;
- if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
- LoadUser = TruncSrc;
-
- uint64_t SizeInBits = MI.getOperand(2).getImm();
- // If the source is a G_SEXTLOAD from the same bit width, then we don't
- // need any extend at all, just a truncate.
- if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
- const auto &MMO = **LoadMI->memoperands_begin();
- // If truncating more than the original extended value, abort.
- if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
- return false;
- if (MMO.getSizeInBits() == SizeInBits)
- return true;
- }
- return false;
-}
-
-bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
- Builder.setInstrAndDebugLoc(MI);
- Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::matchSextInRegOfLoad(
- MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
-
- // Only supports scalars for now.
- if (MRI.getType(MI.getOperand(0).getReg()).isVector())
- return false;
-
Register SrcReg = MI.getOperand(1).getReg();
- MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
- if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()))
- return false;
-
- // If the sign extend extends from a narrower width than the load's width,
- // then we can narrow the load width when we combine to a G_SEXTLOAD.
- auto &MMO = **LoadDef->memoperands_begin();
- // Don't do this for non-simple loads.
- if (MMO.isAtomic() || MMO.isVolatile())
- return false;
-
- // Avoid widening the load at all.
- unsigned NewSizeBits =
- std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits());
-
- // Don't generate G_SEXTLOADs with a < 1 byte width.
- if (NewSizeBits < 8)
- return false;
- // Don't bother creating a non-power-2 sextload, it will likely be broken up
- // anyway for most targets.
- if (!isPowerOf2_32(NewSizeBits))
- return false;
- MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits);
- return true;
-}
-
-bool CombinerHelper::applySextInRegOfLoad(
- MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ Register LoadUser = SrcReg;
+
+ if (MRI.getType(SrcReg).isVector())
+ return false;
+
+ Register TruncSrc;
+ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
+ LoadUser = TruncSrc;
+
+ uint64_t SizeInBits = MI.getOperand(2).getImm();
+ // If the source is a G_SEXTLOAD from the same bit width, then we don't
+ // need any extend at all, just a truncate.
+ if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
+ const auto &MMO = **LoadMI->memoperands_begin();
+ // If truncating more than the original extended value, abort.
+ if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
+ return false;
+ if (MMO.getSizeInBits() == SizeInBits)
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
- Register LoadReg;
- unsigned ScalarSizeBits;
- std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
- auto *LoadDef = MRI.getVRegDef(LoadReg);
- assert(LoadDef && "Expected a load reg");
-
- // If we have the following:
- // %ld = G_LOAD %ptr, (load 2)
- // %ext = G_SEXT_INREG %ld, 8
- // ==>
- // %ld = G_SEXTLOAD %ptr (load 1)
-
- auto &MMO = **LoadDef->memoperands_begin();
- Builder.setInstrAndDebugLoc(MI);
- auto &MF = Builder.getMF();
- auto PtrInfo = MMO.getPointerInfo();
- auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
- Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
- LoadDef->getOperand(1).getReg(), *NewMMO);
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
MI.eraseFromParent();
return true;
}
+bool CombinerHelper::matchSextInRegOfLoad(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+
+ // Only supports scalars for now.
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ return false;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
+ if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()))
+ return false;
+
+ // If the sign extend extends from a narrower width than the load's width,
+ // then we can narrow the load width when we combine to a G_SEXTLOAD.
+ auto &MMO = **LoadDef->memoperands_begin();
+ // Don't do this for non-simple loads.
+ if (MMO.isAtomic() || MMO.isVolatile())
+ return false;
+
+ // Avoid widening the load at all.
+ unsigned NewSizeBits =
+ std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits());
+
+ // Don't generate G_SEXTLOADs with a < 1 byte width.
+ if (NewSizeBits < 8)
+ return false;
+ // Don't bother creating a non-power-2 sextload, it will likely be broken up
+ // anyway for most targets.
+ if (!isPowerOf2_32(NewSizeBits))
+ return false;
+ MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits);
+ return true;
+}
+
+bool CombinerHelper::applySextInRegOfLoad(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Register LoadReg;
+ unsigned ScalarSizeBits;
+ std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
+ auto *LoadDef = MRI.getVRegDef(LoadReg);
+ assert(LoadDef && "Expected a load reg");
+
+ // If we have the following:
+ // %ld = G_LOAD %ptr, (load 2)
+ // %ext = G_SEXT_INREG %ld, 8
+ // ==>
+ // %ld = G_SEXTLOAD %ptr (load 1)
+
+ auto &MMO = **LoadDef->memoperands_begin();
+ Builder.setInstrAndDebugLoc(MI);
+ auto &MF = Builder.getMF();
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
+ Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
+ LoadDef->getOperand(1).getReg(), *NewMMO);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
Register &Base, Register &Offset) {
auto &MF = *MI.getParent()->getParent();
@@ -757,7 +757,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
return false;
LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
- // FIXME: The following use traversal needs a bail out for patholigical cases.
+ // FIXME: The following use traversal needs a bail out for patholigical cases.
for (auto &Use : MRI.use_nodbg_instructions(Base)) {
if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
continue;
@@ -884,11 +884,11 @@ bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadS
Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
return false;
- // For now, no targets actually support these opcodes so don't waste time
- // running these unless we're forced to for testing.
- if (!ForceLegalIndexing)
- return false;
-
+ // For now, no targets actually support these opcodes so don't waste time
+ // running these unless we're forced to for testing.
+ if (!ForceLegalIndexing)
+ return false;
+
MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
MatchInfo.Offset);
if (!MatchInfo.IsPre &&
@@ -941,7 +941,7 @@ void CombinerHelper::applyCombineIndexedLoadStore(
LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
}
-bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
+bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::G_BR)
return false;
@@ -956,7 +956,7 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
// The above pattern does not have a fall through to the successor bb2, always
// resulting in a branch no matter which path is taken. Here we try to find
// and replace that pattern with conditional branch to bb3 and otherwise
- // fallthrough to bb2. This is generally better for branch predictors.
+ // fallthrough to bb2. This is generally better for branch predictors.
MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::iterator BrIt(MI);
@@ -968,36 +968,36 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
return false;
- // Check that the next block is the conditional branch target. Also make sure
- // that it isn't the same as the G_BR's target (otherwise, this will loop.)
- MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
- return BrCondTarget != MI.getOperand(0).getMBB() &&
- MBB->isLayoutSuccessor(BrCondTarget);
+ // Check that the next block is the conditional branch target. Also make sure
+ // that it isn't the same as the G_BR's target (otherwise, this will loop.)
+ MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
+ return BrCondTarget != MI.getOperand(0).getMBB() &&
+ MBB->isLayoutSuccessor(BrCondTarget);
}
-void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) {
+void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) {
MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
MachineBasicBlock::iterator BrIt(MI);
MachineInstr *BrCond = &*std::prev(BrIt);
- Builder.setInstrAndDebugLoc(*BrCond);
- LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
- // FIXME: Does int/fp matter for this? If so, we might need to restrict
- // this to i1 only since we might not know for sure what kind of
- // compare generated the condition value.
- auto True = Builder.buildConstant(
- Ty, getICmpTrueVal(getTargetLowering(), false, false));
- auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
-
- auto *FallthroughBB = BrCond->getOperand(1).getMBB();
- Observer.changingInstr(MI);
- MI.getOperand(0).setMBB(FallthroughBB);
- Observer.changedInstr(MI);
-
- // Change the conditional branch to use the inverted condition and
- // new target block.
+ Builder.setInstrAndDebugLoc(*BrCond);
+ LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
+ // FIXME: Does int/fp matter for this? If so, we might need to restrict
+ // this to i1 only since we might not know for sure what kind of
+ // compare generated the condition value.
+ auto True = Builder.buildConstant(
+ Ty, getICmpTrueVal(getTargetLowering(), false, false));
+ auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
+
+ auto *FallthroughBB = BrCond->getOperand(1).getMBB();
+ Observer.changingInstr(MI);
+ MI.getOperand(0).setMBB(FallthroughBB);
+ Observer.changedInstr(MI);
+
+ // Change the conditional branch to use the inverted condition and
+ // new target block.
Observer.changingInstr(*BrCond);
- BrCond->getOperand(0).setReg(Xor.getReg(0));
+ BrCond->getOperand(0).setReg(Xor.getReg(0));
BrCond->getOperand(1).setMBB(BrTarget);
Observer.changedInstr(*BrCond);
}
@@ -1090,7 +1090,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
unsigned NumBits = Ty.getScalarSizeInBits();
auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
if (!Ty.isVector() && ValVRegAndVal) {
- APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
+ APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
APInt SplatVal = APInt::getSplat(NumBits, Scalar);
return MIB.buildConstant(Ty, SplatVal).getReg(0);
}
@@ -1442,11 +1442,11 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
}
bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
- const unsigned Opc = MI.getOpcode();
+ const unsigned Opc = MI.getOpcode();
// This combine is fairly complex so it's not written with a separate
// matcher function.
- assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
- Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
+ assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
+ Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
auto MMOIt = MI.memoperands_begin();
const MachineMemOperand *MemOp = *MMOIt;
@@ -1457,11 +1457,11 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
Align DstAlign = MemOp->getBaseAlign();
Align SrcAlign;
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register Len = MI.getOperand(2).getReg();
- if (Opc != TargetOpcode::G_MEMSET) {
+ if (Opc != TargetOpcode::G_MEMSET) {
assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
MemOp = *(++MMOIt);
SrcAlign = MemOp->getBaseAlign();
@@ -1471,7 +1471,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
if (!LenVRegAndVal)
return false; // Leave it to the legalizer to lower it to a libcall.
- unsigned KnownLen = LenVRegAndVal->Value.getZExtValue();
+ unsigned KnownLen = LenVRegAndVal->Value.getZExtValue();
if (KnownLen == 0) {
MI.eraseFromParent();
@@ -1481,78 +1481,78 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
if (MaxLen && KnownLen > MaxLen)
return false;
- if (Opc == TargetOpcode::G_MEMCPY)
+ if (Opc == TargetOpcode::G_MEMCPY)
return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
- if (Opc == TargetOpcode::G_MEMMOVE)
+ if (Opc == TargetOpcode::G_MEMMOVE)
return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
- if (Opc == TargetOpcode::G_MEMSET)
+ if (Opc == TargetOpcode::G_MEMSET)
return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
return false;
}
-static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
- const Register Op,
- const MachineRegisterInfo &MRI) {
- const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
- if (!MaybeCst)
- return None;
-
- APFloat V = MaybeCst->getValueAPF();
- switch (Opcode) {
- default:
- llvm_unreachable("Unexpected opcode!");
- case TargetOpcode::G_FNEG: {
- V.changeSign();
- return V;
- }
- case TargetOpcode::G_FABS: {
- V.clearSign();
- return V;
- }
- case TargetOpcode::G_FPTRUNC:
- break;
- case TargetOpcode::G_FSQRT: {
- bool Unused;
- V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
- V = APFloat(sqrt(V.convertToDouble()));
- break;
- }
- case TargetOpcode::G_FLOG2: {
- bool Unused;
- V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
- V = APFloat(log2(V.convertToDouble()));
- break;
- }
- }
- // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
- // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
- // and `G_FLOG2` reach here.
- bool Unused;
- V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
- return V;
-}
-
-bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
- Optional<APFloat> &Cst) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
- return Cst.hasValue();
-}
-
-bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
- Optional<APFloat> &Cst) {
- assert(Cst.hasValue() && "Optional is unexpectedly empty!");
- Builder.setInstrAndDebugLoc(MI);
- MachineFunction &MF = Builder.getMF();
- auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
- Register DstReg = MI.getOperand(0).getReg();
- Builder.buildFConstant(DstReg, *FPVal);
- MI.eraseFromParent();
- return true;
-}
-
+static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
+ const Register Op,
+ const MachineRegisterInfo &MRI) {
+ const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
+ if (!MaybeCst)
+ return None;
+
+ APFloat V = MaybeCst->getValueAPF();
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case TargetOpcode::G_FNEG: {
+ V.changeSign();
+ return V;
+ }
+ case TargetOpcode::G_FABS: {
+ V.clearSign();
+ return V;
+ }
+ case TargetOpcode::G_FPTRUNC:
+ break;
+ case TargetOpcode::G_FSQRT: {
+ bool Unused;
+ V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
+ V = APFloat(sqrt(V.convertToDouble()));
+ break;
+ }
+ case TargetOpcode::G_FLOG2: {
+ bool Unused;
+ V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
+ V = APFloat(log2(V.convertToDouble()));
+ break;
+ }
+ }
+ // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
+ // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
+ // and `G_FLOG2` reach here.
+ bool Unused;
+ V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
+ return V;
+}
+
+bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
+ Optional<APFloat> &Cst) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
+ return Cst.hasValue();
+}
+
+bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
+ Optional<APFloat> &Cst) {
+ assert(Cst.hasValue() && "Optional is unexpectedly empty!");
+ Builder.setInstrAndDebugLoc(MI);
+ MachineFunction &MF = Builder.getMF();
+ auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.buildFConstant(DstReg, *FPVal);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
PtrAddChain &MatchInfo) {
// We're trying to match the following pattern:
@@ -1581,7 +1581,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
return false;
// Pass the combined immediate to the apply function.
- MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
+ MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
MatchInfo.Base = Base;
return true;
}
@@ -1599,211 +1599,211 @@ bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
return true;
}
-bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
- RegisterImmPair &MatchInfo) {
- // We're trying to match the following pattern with any of
- // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
- // %t1 = SHIFT %base, G_CONSTANT imm1
- // %root = SHIFT %t1, G_CONSTANT imm2
- // -->
- // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
-
- unsigned Opcode = MI.getOpcode();
- assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
- Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
- Opcode == TargetOpcode::G_USHLSAT) &&
- "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
-
- Register Shl2 = MI.getOperand(1).getReg();
- Register Imm1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
- if (!MaybeImmVal)
- return false;
-
- MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
- if (Shl2Def->getOpcode() != Opcode)
- return false;
-
- Register Base = Shl2Def->getOperand(1).getReg();
- Register Imm2 = Shl2Def->getOperand(2).getReg();
- auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
- if (!MaybeImm2Val)
- return false;
-
- // Pass the combined immediate to the apply function.
- MatchInfo.Imm =
- (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
- MatchInfo.Reg = Base;
-
- // There is no simple replacement for a saturating unsigned left shift that
- // exceeds the scalar size.
- if (Opcode == TargetOpcode::G_USHLSAT &&
- MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
- return false;
-
- return true;
-}
-
-bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
- RegisterImmPair &MatchInfo) {
- unsigned Opcode = MI.getOpcode();
- assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
- Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
- Opcode == TargetOpcode::G_USHLSAT) &&
- "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
-
- Builder.setInstrAndDebugLoc(MI);
- LLT Ty = MRI.getType(MI.getOperand(1).getReg());
- unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
- auto Imm = MatchInfo.Imm;
-
- if (Imm >= ScalarSizeInBits) {
- // Any logical shift that exceeds scalar size will produce zero.
- if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
- Builder.buildConstant(MI.getOperand(0), 0);
- MI.eraseFromParent();
- return true;
- }
- // Arithmetic shift and saturating signed left shift have no effect beyond
- // scalar size.
- Imm = ScalarSizeInBits - 1;
- }
-
- LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
- Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(MatchInfo.Reg);
- MI.getOperand(2).setReg(NewImm);
- Observer.changedInstr(MI);
- return true;
-}
-
-bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
- ShiftOfShiftedLogic &MatchInfo) {
- // We're trying to match the following pattern with any of
- // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
- // with any of G_AND/G_OR/G_XOR logic instructions.
- // %t1 = SHIFT %X, G_CONSTANT C0
- // %t2 = LOGIC %t1, %Y
- // %root = SHIFT %t2, G_CONSTANT C1
- // -->
- // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
- // %t4 = SHIFT %Y, G_CONSTANT C1
- // %root = LOGIC %t3, %t4
- unsigned ShiftOpcode = MI.getOpcode();
- assert((ShiftOpcode == TargetOpcode::G_SHL ||
- ShiftOpcode == TargetOpcode::G_ASHR ||
- ShiftOpcode == TargetOpcode::G_LSHR ||
- ShiftOpcode == TargetOpcode::G_USHLSAT ||
- ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
- "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
-
- // Match a one-use bitwise logic op.
- Register LogicDest = MI.getOperand(1).getReg();
- if (!MRI.hasOneNonDBGUse(LogicDest))
- return false;
-
- MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
- unsigned LogicOpcode = LogicMI->getOpcode();
- if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
- LogicOpcode != TargetOpcode::G_XOR)
- return false;
-
- // Find a matching one-use shift by constant.
- const Register C1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
- if (!MaybeImmVal)
- return false;
-
- const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
-
- auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
- // Shift should match previous one and should be a one-use.
- if (MI->getOpcode() != ShiftOpcode ||
- !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
- return false;
-
- // Must be a constant.
- auto MaybeImmVal =
- getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
- if (!MaybeImmVal)
- return false;
-
- ShiftVal = MaybeImmVal->Value.getSExtValue();
- return true;
- };
-
- // Logic ops are commutative, so check each operand for a match.
- Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
- MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
- Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
- MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
- uint64_t C0Val;
-
- if (matchFirstShift(LogicMIOp1, C0Val)) {
- MatchInfo.LogicNonShiftReg = LogicMIReg2;
- MatchInfo.Shift2 = LogicMIOp1;
- } else if (matchFirstShift(LogicMIOp2, C0Val)) {
- MatchInfo.LogicNonShiftReg = LogicMIReg1;
- MatchInfo.Shift2 = LogicMIOp2;
- } else
- return false;
-
- MatchInfo.ValSum = C0Val + C1Val;
-
- // The fold is not valid if the sum of the shift values exceeds bitwidth.
- if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
- return false;
-
- MatchInfo.Logic = LogicMI;
- return true;
-}
-
-bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
- ShiftOfShiftedLogic &MatchInfo) {
- unsigned Opcode = MI.getOpcode();
- assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
- Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
- Opcode == TargetOpcode::G_SSHLSAT) &&
- "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
-
- LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
- LLT DestType = MRI.getType(MI.getOperand(0).getReg());
- Builder.setInstrAndDebugLoc(MI);
-
- Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
-
- Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
- Register Shift1 =
- Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
-
- Register Shift2Const = MI.getOperand(2).getReg();
- Register Shift2 = Builder
- .buildInstr(Opcode, {DestType},
- {MatchInfo.LogicNonShiftReg, Shift2Const})
- .getReg(0);
-
- Register Dest = MI.getOperand(0).getReg();
- Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
-
- // These were one use so it's safe to remove them.
- MatchInfo.Shift2->eraseFromParent();
- MatchInfo.Logic->eraseFromParent();
-
- MI.eraseFromParent();
- return true;
-}
-
+bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
+ RegisterImmPair &MatchInfo) {
+ // We're trying to match the following pattern with any of
+ // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
+ // %t1 = SHIFT %base, G_CONSTANT imm1
+ // %root = SHIFT %t1, G_CONSTANT imm2
+ // -->
+ // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
+
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
+ Opcode == TargetOpcode::G_USHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
+
+ Register Shl2 = MI.getOperand(1).getReg();
+ Register Imm1 = MI.getOperand(2).getReg();
+ auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
+ if (Shl2Def->getOpcode() != Opcode)
+ return false;
+
+ Register Base = Shl2Def->getOperand(1).getReg();
+ Register Imm2 = Shl2Def->getOperand(2).getReg();
+ auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ if (!MaybeImm2Val)
+ return false;
+
+ // Pass the combined immediate to the apply function.
+ MatchInfo.Imm =
+ (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
+ MatchInfo.Reg = Base;
+
+ // There is no simple replacement for a saturating unsigned left shift that
+ // exceeds the scalar size.
+ if (Opcode == TargetOpcode::G_USHLSAT &&
+ MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
+ return false;
+
+ return true;
+}
+
+bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
+ RegisterImmPair &MatchInfo) {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
+ Opcode == TargetOpcode::G_USHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
+
+ Builder.setInstrAndDebugLoc(MI);
+ LLT Ty = MRI.getType(MI.getOperand(1).getReg());
+ unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
+ auto Imm = MatchInfo.Imm;
+
+ if (Imm >= ScalarSizeInBits) {
+ // Any logical shift that exceeds scalar size will produce zero.
+ if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
+ Builder.buildConstant(MI.getOperand(0), 0);
+ MI.eraseFromParent();
+ return true;
+ }
+ // Arithmetic shift and saturating signed left shift have no effect beyond
+ // scalar size.
+ Imm = ScalarSizeInBits - 1;
+ }
+
+ LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
+ Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(MatchInfo.Reg);
+ MI.getOperand(2).setReg(NewImm);
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo) {
+ // We're trying to match the following pattern with any of
+ // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
+ // with any of G_AND/G_OR/G_XOR logic instructions.
+ // %t1 = SHIFT %X, G_CONSTANT C0
+ // %t2 = LOGIC %t1, %Y
+ // %root = SHIFT %t2, G_CONSTANT C1
+ // -->
+ // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
+ // %t4 = SHIFT %Y, G_CONSTANT C1
+ // %root = LOGIC %t3, %t4
+ unsigned ShiftOpcode = MI.getOpcode();
+ assert((ShiftOpcode == TargetOpcode::G_SHL ||
+ ShiftOpcode == TargetOpcode::G_ASHR ||
+ ShiftOpcode == TargetOpcode::G_LSHR ||
+ ShiftOpcode == TargetOpcode::G_USHLSAT ||
+ ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
+
+ // Match a one-use bitwise logic op.
+ Register LogicDest = MI.getOperand(1).getReg();
+ if (!MRI.hasOneNonDBGUse(LogicDest))
+ return false;
+
+ MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
+ unsigned LogicOpcode = LogicMI->getOpcode();
+ if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
+ LogicOpcode != TargetOpcode::G_XOR)
+ return false;
+
+ // Find a matching one-use shift by constant.
+ const Register C1 = MI.getOperand(2).getReg();
+ auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
+
+ auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
+ // Shift should match previous one and should be a one-use.
+ if (MI->getOpcode() != ShiftOpcode ||
+ !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
+ return false;
+
+ // Must be a constant.
+ auto MaybeImmVal =
+ getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ ShiftVal = MaybeImmVal->Value.getSExtValue();
+ return true;
+ };
+
+ // Logic ops are commutative, so check each operand for a match.
+ Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
+ MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
+ Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
+ MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
+ uint64_t C0Val;
+
+ if (matchFirstShift(LogicMIOp1, C0Val)) {
+ MatchInfo.LogicNonShiftReg = LogicMIReg2;
+ MatchInfo.Shift2 = LogicMIOp1;
+ } else if (matchFirstShift(LogicMIOp2, C0Val)) {
+ MatchInfo.LogicNonShiftReg = LogicMIReg1;
+ MatchInfo.Shift2 = LogicMIOp2;
+ } else
+ return false;
+
+ MatchInfo.ValSum = C0Val + C1Val;
+
+ // The fold is not valid if the sum of the shift values exceeds bitwidth.
+ if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
+ return false;
+
+ MatchInfo.Logic = LogicMI;
+ return true;
+}
+
+bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo) {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
+ Opcode == TargetOpcode::G_SSHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
+
+ LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
+ LLT DestType = MRI.getType(MI.getOperand(0).getReg());
+ Builder.setInstrAndDebugLoc(MI);
+
+ Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
+
+ Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
+ Register Shift1 =
+ Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
+
+ Register Shift2Const = MI.getOperand(2).getReg();
+ Register Shift2 = Builder
+ .buildInstr(Opcode, {DestType},
+ {MatchInfo.LogicNonShiftReg, Shift2Const})
+ .getReg(0);
+
+ Register Dest = MI.getOperand(0).getReg();
+ Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
+
+ // These were one use so it's safe to remove them.
+ MatchInfo.Shift2->eraseFromParent();
+ MatchInfo.Logic->eraseFromParent();
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
auto MaybeImmVal =
getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
- if (!MaybeImmVal)
+ if (!MaybeImmVal)
return false;
-
- ShiftVal = MaybeImmVal->Value.exactLogBase2();
- return (static_cast<int32_t>(ShiftVal) != -1);
+
+ ShiftVal = MaybeImmVal->Value.exactLogBase2();
+ return (static_cast<int32_t>(ShiftVal) != -1);
}
bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
@@ -1819,254 +1819,254 @@ bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
return true;
}
-// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
-bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
- RegisterImmPair &MatchData) {
- assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
-
- Register LHS = MI.getOperand(1).getReg();
-
- Register ExtSrc;
- if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
- !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
- !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
- return false;
-
- // TODO: Should handle vector splat.
- Register RHS = MI.getOperand(2).getReg();
- auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
- if (!MaybeShiftAmtVal)
- return false;
-
- if (LI) {
- LLT SrcTy = MRI.getType(ExtSrc);
-
- // We only really care about the legality with the shifted value. We can
- // pick any type the constant shift amount, so ask the target what to
- // use. Otherwise we would have to guess and hope it is reported as legal.
- LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
- if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
- return false;
- }
-
- int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
- MatchData.Reg = ExtSrc;
- MatchData.Imm = ShiftAmt;
-
- unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
- return MinLeadingZeros >= ShiftAmt;
-}
-
-bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
- const RegisterImmPair &MatchData) {
- Register ExtSrcReg = MatchData.Reg;
- int64_t ShiftAmtVal = MatchData.Imm;
-
- LLT ExtSrcTy = MRI.getType(ExtSrcReg);
- Builder.setInstrAndDebugLoc(MI);
- auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
- auto NarrowShift =
- Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
- Builder.buildZExt(MI.getOperand(0), NarrowShift);
- MI.eraseFromParent();
- return true;
-}
-
-static Register peekThroughBitcast(Register Reg,
- const MachineRegisterInfo &MRI) {
- while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
- ;
-
- return Reg;
-}
-
-bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
- MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
- assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
- "Expected an unmerge");
- Register SrcReg =
- peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
-
- MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
- if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
- SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
- SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
- return false;
-
- // Check the source type of the merge.
- LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
- LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
- bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
- if (SrcMergeTy != Dst0Ty && !SameSize)
- return false;
- // They are the same now (modulo a bitcast).
- // We can collect all the src registers.
- for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
- ++Idx)
- Operands.push_back(SrcInstr->getOperand(Idx).getReg());
- return true;
-}
-
-bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
- MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
- assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
- "Expected an unmerge");
- assert((MI.getNumOperands() - 1 == Operands.size()) &&
- "Not enough operands to replace all defs");
- unsigned NumElems = MI.getNumOperands() - 1;
-
- LLT SrcTy = MRI.getType(Operands[0]);
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- bool CanReuseInputDirectly = DstTy == SrcTy;
- Builder.setInstrAndDebugLoc(MI);
- for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
- Register DstReg = MI.getOperand(Idx).getReg();
- Register SrcReg = Operands[Idx];
- if (CanReuseInputDirectly)
- replaceRegWith(MRI, DstReg, SrcReg);
- else
- Builder.buildCast(DstReg, SrcReg);
- }
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
- SmallVectorImpl<APInt> &Csts) {
- unsigned SrcIdx = MI.getNumOperands() - 1;
- Register SrcReg = MI.getOperand(SrcIdx).getReg();
- MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
- if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
- SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
- return false;
- // Break down the big constant in smaller ones.
- const MachineOperand &CstVal = SrcInstr->getOperand(1);
- APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
- ? CstVal.getCImm()->getValue()
- : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
-
- LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
- unsigned ShiftAmt = Dst0Ty.getSizeInBits();
- // Unmerge a constant.
- for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
- Csts.emplace_back(Val.trunc(ShiftAmt));
- Val = Val.lshr(ShiftAmt);
- }
-
- return true;
-}
-
-bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
- SmallVectorImpl<APInt> &Csts) {
- assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
- "Expected an unmerge");
- assert((MI.getNumOperands() - 1 == Csts.size()) &&
- "Not enough operands to replace all defs");
- unsigned NumElems = MI.getNumOperands() - 1;
- Builder.setInstrAndDebugLoc(MI);
- for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
- Register DstReg = MI.getOperand(Idx).getReg();
- Builder.buildConstant(DstReg, Csts[Idx]);
- }
-
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
- "Expected an unmerge");
- // Check that all the lanes are dead except the first one.
- for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
- if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
- return false;
- }
- return true;
-}
-
-bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
- Builder.setInstrAndDebugLoc(MI);
- Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
- // Truncating a vector is going to truncate every single lane,
- // whereas we want the full lowbits.
- // Do the operation on a scalar instead.
- LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy.isVector())
- SrcReg =
- Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
-
- Register Dst0Reg = MI.getOperand(0).getReg();
- LLT Dst0Ty = MRI.getType(Dst0Reg);
- if (Dst0Ty.isVector()) {
- auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
- Builder.buildCast(Dst0Reg, MIB);
- } else
- Builder.buildTrunc(Dst0Reg, SrcReg);
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
- "Expected an unmerge");
- Register Dst0Reg = MI.getOperand(0).getReg();
- LLT Dst0Ty = MRI.getType(Dst0Reg);
- // G_ZEXT on vector applies to each lane, so it will
- // affect all destinations. Therefore we won't be able
- // to simplify the unmerge to just the first definition.
- if (Dst0Ty.isVector())
- return false;
- Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy.isVector())
- return false;
-
- Register ZExtSrcReg;
- if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
- return false;
-
- // Finally we can replace the first definition with
- // a zext of the source if the definition is big enough to hold
- // all of ZExtSrc bits.
- LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
- return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
-}
-
-bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
- "Expected an unmerge");
-
- Register Dst0Reg = MI.getOperand(0).getReg();
-
- MachineInstr *ZExtInstr =
- MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
- assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
- "Expecting a G_ZEXT");
-
- Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
- LLT Dst0Ty = MRI.getType(Dst0Reg);
- LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
-
- Builder.setInstrAndDebugLoc(MI);
-
- if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
- Builder.buildZExt(Dst0Reg, ZExtSrcReg);
- } else {
- assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
- "ZExt src doesn't fit in destination");
- replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
- }
-
- Register ZeroReg;
- for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
- if (!ZeroReg)
- ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
- replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
- }
- MI.eraseFromParent();
- return true;
-}
-
+// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
+bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
+ RegisterImmPair &MatchData) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
+
+ Register LHS = MI.getOperand(1).getReg();
+
+ Register ExtSrc;
+ if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
+ !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
+ !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
+ return false;
+
+ // TODO: Should handle vector splat.
+ Register RHS = MI.getOperand(2).getReg();
+ auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ if (!MaybeShiftAmtVal)
+ return false;
+
+ if (LI) {
+ LLT SrcTy = MRI.getType(ExtSrc);
+
+ // We only really care about the legality with the shifted value. We can
+ // pick any type the constant shift amount, so ask the target what to
+ // use. Otherwise we would have to guess and hope it is reported as legal.
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
+ return false;
+ }
+
+ int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
+ MatchData.Reg = ExtSrc;
+ MatchData.Imm = ShiftAmt;
+
+ unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
+ return MinLeadingZeros >= ShiftAmt;
+}
+
+bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
+ const RegisterImmPair &MatchData) {
+ Register ExtSrcReg = MatchData.Reg;
+ int64_t ShiftAmtVal = MatchData.Imm;
+
+ LLT ExtSrcTy = MRI.getType(ExtSrcReg);
+ Builder.setInstrAndDebugLoc(MI);
+ auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
+ auto NarrowShift =
+ Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
+ Builder.buildZExt(MI.getOperand(0), NarrowShift);
+ MI.eraseFromParent();
+ return true;
+}
+
+static Register peekThroughBitcast(Register Reg,
+ const MachineRegisterInfo &MRI) {
+ while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
+ ;
+
+ return Reg;
+}
+
+bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
+ MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ Register SrcReg =
+ peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
+
+ MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
+ if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
+ SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
+ SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
+ return false;
+
+ // Check the source type of the merge.
+ LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
+ LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+ bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
+ if (SrcMergeTy != Dst0Ty && !SameSize)
+ return false;
+ // They are the same now (modulo a bitcast).
+ // We can collect all the src registers.
+ for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
+ ++Idx)
+ Operands.push_back(SrcInstr->getOperand(Idx).getReg());
+ return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
+ MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ assert((MI.getNumOperands() - 1 == Operands.size()) &&
+ "Not enough operands to replace all defs");
+ unsigned NumElems = MI.getNumOperands() - 1;
+
+ LLT SrcTy = MRI.getType(Operands[0]);
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ bool CanReuseInputDirectly = DstTy == SrcTy;
+ Builder.setInstrAndDebugLoc(MI);
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ Register SrcReg = Operands[Idx];
+ if (CanReuseInputDirectly)
+ replaceRegWith(MRI, DstReg, SrcReg);
+ else
+ Builder.buildCast(DstReg, SrcReg);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts) {
+ unsigned SrcIdx = MI.getNumOperands() - 1;
+ Register SrcReg = MI.getOperand(SrcIdx).getReg();
+ MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
+ if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
+ SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
+ return false;
+ // Break down the big constant in smaller ones.
+ const MachineOperand &CstVal = SrcInstr->getOperand(1);
+ APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
+ ? CstVal.getCImm()->getValue()
+ : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+
+ LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+ unsigned ShiftAmt = Dst0Ty.getSizeInBits();
+ // Unmerge a constant.
+ for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
+ Csts.emplace_back(Val.trunc(ShiftAmt));
+ Val = Val.lshr(ShiftAmt);
+ }
+
+ return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ assert((MI.getNumOperands() - 1 == Csts.size()) &&
+ "Not enough operands to replace all defs");
+ unsigned NumElems = MI.getNumOperands() - 1;
+ Builder.setInstrAndDebugLoc(MI);
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ Builder.buildConstant(DstReg, Csts[Idx]);
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ // Check that all the lanes are dead except the first one.
+ for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
+ if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
+ return false;
+ }
+ return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+ Builder.setInstrAndDebugLoc(MI);
+ Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
+ // Truncating a vector is going to truncate every single lane,
+ // whereas we want the full lowbits.
+ // Do the operation on a scalar instead.
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
+ SrcReg =
+ Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ if (Dst0Ty.isVector()) {
+ auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
+ Builder.buildCast(Dst0Reg, MIB);
+ } else
+ Builder.buildTrunc(Dst0Reg, SrcReg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ // G_ZEXT on vector applies to each lane, so it will
+ // affect all destinations. Therefore we won't be able
+ // to simplify the unmerge to just the first definition.
+ if (Dst0Ty.isVector())
+ return false;
+ Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
+ return false;
+
+ Register ZExtSrcReg;
+ if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
+ return false;
+
+ // Finally we can replace the first definition with
+ // a zext of the source if the definition is big enough to hold
+ // all of ZExtSrc bits.
+ LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
+ return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
+}
+
+bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+
+ MachineInstr *ZExtInstr =
+ MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
+ assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
+ "Expecting a G_ZEXT");
+
+ Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+
+ if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
+ Builder.buildZExt(Dst0Reg, ZExtSrcReg);
+ } else {
+ assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
+ "ZExt src doesn't fit in destination");
+ replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
+ }
+
+ Register ZeroReg;
+ for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
+ if (!ZeroReg)
+ ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
+ replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
unsigned TargetShiftSize,
unsigned &ShiftVal) {
@@ -2088,7 +2088,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
if (!MaybeImmVal)
return false;
- ShiftVal = MaybeImmVal->Value.getSExtValue();
+ ShiftVal = MaybeImmVal->Value.getSExtValue();
return ShiftVal >= Size / 2 && ShiftVal < Size;
}
@@ -2177,296 +2177,296 @@ bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
return false;
}
-bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
- assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- Register SrcReg = MI.getOperand(1).getReg();
- return mi_match(SrcReg, MRI,
- m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
-}
-
-bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
- assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
- Register DstReg = MI.getOperand(0).getReg();
- Builder.setInstr(MI);
- Builder.buildCopy(DstReg, Reg);
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
- assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
- Register SrcReg = MI.getOperand(1).getReg();
- return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
-}
-
-bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
- assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
- Register DstReg = MI.getOperand(0).getReg();
- Builder.setInstr(MI);
- Builder.buildZExtOrTrunc(DstReg, Reg);
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::matchCombineAddP2IToPtrAdd(
- MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
- assert(MI.getOpcode() == TargetOpcode::G_ADD);
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
- LLT IntTy = MRI.getType(LHS);
-
- // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
- // instruction.
- PtrReg.second = false;
- for (Register SrcReg : {LHS, RHS}) {
- if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
- // Don't handle cases where the integer is implicitly converted to the
- // pointer width.
- LLT PtrTy = MRI.getType(PtrReg.first);
- if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
- return true;
- }
-
- PtrReg.second = true;
- }
-
- return false;
-}
-
-bool CombinerHelper::applyCombineAddP2IToPtrAdd(
- MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
- Register Dst = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
-
- const bool DoCommute = PtrReg.second;
- if (DoCommute)
- std::swap(LHS, RHS);
- LHS = PtrReg.first;
-
- LLT PtrTy = MRI.getType(LHS);
-
- Builder.setInstrAndDebugLoc(MI);
- auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
- Builder.buildPtrToInt(Dst, PtrAdd);
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
- int64_t &NewCst) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
- MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
-
- if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
- int64_t Cst;
- if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
- NewCst = Cst + *RHSCst;
- return true;
- }
- }
-
- return false;
-}
-
-bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
- int64_t &NewCst) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
- Register Dst = MI.getOperand(0).getReg();
-
- Builder.setInstrAndDebugLoc(MI);
- Builder.buildConstant(Dst, NewCst);
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
- assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- return mi_match(SrcReg, MRI,
- m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
-}
-
-bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
- assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
- Register DstReg = MI.getOperand(0).getReg();
- MI.eraseFromParent();
- replaceRegWith(MRI, DstReg, Reg);
- return true;
-}
-
-bool CombinerHelper::matchCombineExtOfExt(
- MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
- assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
- MI.getOpcode() == TargetOpcode::G_SEXT ||
- MI.getOpcode() == TargetOpcode::G_ZEXT) &&
- "Expected a G_[ASZ]EXT");
- Register SrcReg = MI.getOperand(1).getReg();
- MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
- // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
- unsigned Opc = MI.getOpcode();
- unsigned SrcOpc = SrcMI->getOpcode();
- if (Opc == SrcOpc ||
- (Opc == TargetOpcode::G_ANYEXT &&
- (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
- (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
- MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
- return true;
- }
- return false;
-}
-
-bool CombinerHelper::applyCombineExtOfExt(
- MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
- assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
- MI.getOpcode() == TargetOpcode::G_SEXT ||
- MI.getOpcode() == TargetOpcode::G_ZEXT) &&
- "Expected a G_[ASZ]EXT");
-
- Register Reg = std::get<0>(MatchInfo);
- unsigned SrcExtOp = std::get<1>(MatchInfo);
-
- // Combine exts with the same opcode.
- if (MI.getOpcode() == SrcExtOp) {
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(Reg);
- Observer.changedInstr(MI);
- return true;
- }
-
- // Combine:
- // - anyext([sz]ext x) to [sz]ext x
- // - sext(zext x) to zext x
- if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
- (MI.getOpcode() == TargetOpcode::G_SEXT &&
- SrcExtOp == TargetOpcode::G_ZEXT)) {
- Register DstReg = MI.getOperand(0).getReg();
- Builder.setInstrAndDebugLoc(MI);
- Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
- MI.eraseFromParent();
- return true;
- }
-
- return false;
-}
-
-bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
-
- Builder.setInstrAndDebugLoc(MI);
- Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
- MI.getFlags());
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
- assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
- Register SrcReg = MI.getOperand(1).getReg();
- return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
-}
-
-bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
- assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
- Src = MI.getOperand(1).getReg();
- Register AbsSrc;
- return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
-}
-
-bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
- assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
- Register Dst = MI.getOperand(0).getReg();
- MI.eraseFromParent();
- replaceRegWith(MRI, Dst, Src);
- return true;
-}
-
-bool CombinerHelper::matchCombineTruncOfExt(
- MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
- Register SrcReg = MI.getOperand(1).getReg();
- MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
- unsigned SrcOpc = SrcMI->getOpcode();
- if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
- SrcOpc == TargetOpcode::G_ZEXT) {
- MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
- return true;
- }
- return false;
-}
-
-bool CombinerHelper::applyCombineTruncOfExt(
- MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
- Register SrcReg = MatchInfo.first;
- unsigned SrcExtOp = MatchInfo.second;
- Register DstReg = MI.getOperand(0).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- LLT DstTy = MRI.getType(DstReg);
- if (SrcTy == DstTy) {
- MI.eraseFromParent();
- replaceRegWith(MRI, DstReg, SrcReg);
- return true;
- }
- Builder.setInstrAndDebugLoc(MI);
- if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
- Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
- else
- Builder.buildTrunc(DstReg, SrcReg);
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::matchCombineTruncOfShl(
- MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- Register ShiftSrc;
- Register ShiftAmt;
-
- if (MRI.hasOneNonDBGUse(SrcReg) &&
- mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
- isLegalOrBeforeLegalizer(
- {TargetOpcode::G_SHL,
- {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
- KnownBits Known = KB->getKnownBits(ShiftAmt);
- unsigned Size = DstTy.getSizeInBits();
- if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
- MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
- return true;
- }
- }
- return false;
-}
-
-bool CombinerHelper::applyCombineTruncOfShl(
- MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
-
- Register ShiftSrc = MatchInfo.first;
- Register ShiftAmt = MatchInfo.second;
- Builder.setInstrAndDebugLoc(MI);
- auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
- Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
- MI.eraseFromParent();
- return true;
-}
-
+bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Register SrcReg = MI.getOperand(1).getReg();
+ return mi_match(SrcReg, MRI,
+ m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
+}
+
+bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstr(MI);
+ Builder.buildCopy(DstReg, Reg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
+ Register SrcReg = MI.getOperand(1).getReg();
+ return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
+}
+
+bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstr(MI);
+ Builder.buildZExtOrTrunc(DstReg, Reg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineAddP2IToPtrAdd(
+ MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ADD);
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT IntTy = MRI.getType(LHS);
+
+ // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
+ // instruction.
+ PtrReg.second = false;
+ for (Register SrcReg : {LHS, RHS}) {
+ if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
+ // Don't handle cases where the integer is implicitly converted to the
+ // pointer width.
+ LLT PtrTy = MRI.getType(PtrReg.first);
+ if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
+ return true;
+ }
+
+ PtrReg.second = true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::applyCombineAddP2IToPtrAdd(
+ MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ const bool DoCommute = PtrReg.second;
+ if (DoCommute)
+ std::swap(LHS, RHS);
+ LHS = PtrReg.first;
+
+ LLT PtrTy = MRI.getType(LHS);
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
+ Builder.buildPtrToInt(Dst, PtrAdd);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
+ int64_t &NewCst) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
+
+ if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
+ int64_t Cst;
+ if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
+ NewCst = Cst + *RHSCst;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
+ int64_t &NewCst) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
+ Register Dst = MI.getOperand(0).getReg();
+
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildConstant(Dst, NewCst);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ return mi_match(SrcReg, MRI,
+ m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
+}
+
+bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
+ Register DstReg = MI.getOperand(0).getReg();
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, Reg);
+ return true;
+}
+
+bool CombinerHelper::matchCombineExtOfExt(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ MI.getOpcode() == TargetOpcode::G_SEXT ||
+ MI.getOpcode() == TargetOpcode::G_ZEXT) &&
+ "Expected a G_[ASZ]EXT");
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
+ unsigned Opc = MI.getOpcode();
+ unsigned SrcOpc = SrcMI->getOpcode();
+ if (Opc == SrcOpc ||
+ (Opc == TargetOpcode::G_ANYEXT &&
+ (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
+ (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
+ MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::applyCombineExtOfExt(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ MI.getOpcode() == TargetOpcode::G_SEXT ||
+ MI.getOpcode() == TargetOpcode::G_ZEXT) &&
+ "Expected a G_[ASZ]EXT");
+
+ Register Reg = std::get<0>(MatchInfo);
+ unsigned SrcExtOp = std::get<1>(MatchInfo);
+
+ // Combine exts with the same opcode.
+ if (MI.getOpcode() == SrcExtOp) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Reg);
+ Observer.changedInstr(MI);
+ return true;
+ }
+
+ // Combine:
+ // - anyext([sz]ext x) to [sz]ext x
+ // - sext(zext x) to zext x
+ if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ (MI.getOpcode() == TargetOpcode::G_SEXT &&
+ SrcExtOp == TargetOpcode::G_ZEXT)) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
+ MI.eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
+ MI.getFlags());
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
+ Register SrcReg = MI.getOperand(1).getReg();
+ return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
+}
+
+bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
+ assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+ Src = MI.getOperand(1).getReg();
+ Register AbsSrc;
+ return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
+}
+
+bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
+ assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+ Register Dst = MI.getOperand(0).getReg();
+ MI.eraseFromParent();
+ replaceRegWith(MRI, Dst, Src);
+ return true;
+}
+
+bool CombinerHelper::matchCombineTruncOfExt(
+ MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ unsigned SrcOpc = SrcMI->getOpcode();
+ if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
+ SrcOpc == TargetOpcode::G_ZEXT) {
+ MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::applyCombineTruncOfExt(
+ MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register SrcReg = MatchInfo.first;
+ unsigned SrcExtOp = MatchInfo.second;
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(DstReg);
+ if (SrcTy == DstTy) {
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, SrcReg);
+ return true;
+ }
+ Builder.setInstrAndDebugLoc(MI);
+ if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
+ Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
+ else
+ Builder.buildTrunc(DstReg, SrcReg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineTruncOfShl(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Register ShiftSrc;
+ Register ShiftAmt;
+
+ if (MRI.hasOneNonDBGUse(SrcReg) &&
+ mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
+ isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_SHL,
+ {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
+ KnownBits Known = KB->getKnownBits(ShiftAmt);
+ unsigned Size = DstTy.getSizeInBits();
+ if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool CombinerHelper::applyCombineTruncOfShl(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+
+ Register ShiftSrc = MatchInfo.first;
+ Register ShiftAmt = MatchInfo.second;
+ Builder.setInstrAndDebugLoc(MI);
+ auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
+ Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
return MO.isReg() &&
@@ -2493,22 +2493,22 @@ bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
MRI);
}
-bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_SELECT);
- return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
- MRI);
-}
-
-bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
- assert(MI.getOpcode() == TargetOpcode::G_SELECT);
- if (auto MaybeCstCmp =
- getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
- OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
- return true;
- }
- return false;
-}
-
+bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ if (auto MaybeCstCmp =
+ getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
+ OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
+ return true;
+ }
+ return false;
+}
+
bool CombinerHelper::eraseInst(MachineInstr &MI) {
MI.eraseFromParent();
return true;
@@ -2605,16 +2605,16 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
return true;
}
-bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
- Register Replacement) {
- assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
- Register OldReg = MI.getOperand(0).getReg();
- assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
- MI.eraseFromParent();
- replaceRegWith(MRI, OldReg, Replacement);
- return true;
-}
-
+bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
+ Register Replacement) {
+ assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
+ Register OldReg = MI.getOperand(0).getReg();
+ assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
+ MI.eraseFromParent();
+ replaceRegWith(MRI, OldReg, Replacement);
+ return true;
+}
+
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SELECT);
// Match (cond ? x : x)
@@ -2635,18 +2635,18 @@ bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
MRI);
}
-bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- return MO.isReg() &&
- getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
-}
-
-bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
- unsigned OpIdx) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
-}
-
+bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ return MO.isReg() &&
+ getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
+}
+
+bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
+}
+
bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
@@ -2682,7 +2682,7 @@ bool CombinerHelper::matchSimplifyAddToSub(
// ((0-A) + B) -> B - A
// (A + (0-B)) -> A - B
auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
- if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
+ if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
return false;
NewLHS = MaybeNewLHS;
return true;
@@ -2691,67 +2691,67 @@ bool CombinerHelper::matchSimplifyAddToSub(
return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
}
-bool CombinerHelper::matchCombineInsertVecElts(
- MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
- "Invalid opcode");
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
- unsigned NumElts = DstTy.getNumElements();
- // If this MI is part of a sequence of insert_vec_elts, then
- // don't do the combine in the middle of the sequence.
- if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
- TargetOpcode::G_INSERT_VECTOR_ELT)
- return false;
- MachineInstr *CurrInst = &MI;
- MachineInstr *TmpInst;
- int64_t IntImm;
- Register TmpReg;
- MatchInfo.resize(NumElts);
- while (mi_match(
- CurrInst->getOperand(0).getReg(), MRI,
- m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
- if (IntImm >= NumElts)
- return false;
- if (!MatchInfo[IntImm])
- MatchInfo[IntImm] = TmpReg;
- CurrInst = TmpInst;
- }
- // Variable index.
- if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
- return false;
- if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
- for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
- if (!MatchInfo[I - 1].isValid())
- MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
- }
- return true;
- }
- // If we didn't end in a G_IMPLICIT_DEF, bail out.
- return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
-}
-
-bool CombinerHelper::applyCombineInsertVecElts(
- MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
- Builder.setInstr(MI);
- Register UndefReg;
- auto GetUndef = [&]() {
- if (UndefReg)
- return UndefReg;
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
- return UndefReg;
- };
- for (unsigned I = 0; I < MatchInfo.size(); ++I) {
- if (!MatchInfo[I])
- MatchInfo[I] = GetUndef();
- }
- Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
- MI.eraseFromParent();
- return true;
-}
-
+bool CombinerHelper::matchCombineInsertVecElts(
+ MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
+ "Invalid opcode");
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
+ unsigned NumElts = DstTy.getNumElements();
+ // If this MI is part of a sequence of insert_vec_elts, then
+ // don't do the combine in the middle of the sequence.
+ if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
+ TargetOpcode::G_INSERT_VECTOR_ELT)
+ return false;
+ MachineInstr *CurrInst = &MI;
+ MachineInstr *TmpInst;
+ int64_t IntImm;
+ Register TmpReg;
+ MatchInfo.resize(NumElts);
+ while (mi_match(
+ CurrInst->getOperand(0).getReg(), MRI,
+ m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
+ if (IntImm >= NumElts)
+ return false;
+ if (!MatchInfo[IntImm])
+ MatchInfo[IntImm] = TmpReg;
+ CurrInst = TmpInst;
+ }
+ // Variable index.
+ if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
+ return false;
+ if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
+ for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
+ if (!MatchInfo[I - 1].isValid())
+ MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
+ }
+ return true;
+ }
+ // If we didn't end in a G_IMPLICIT_DEF, bail out.
+ return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
+}
+
+bool CombinerHelper::applyCombineInsertVecElts(
+ MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+ Builder.setInstr(MI);
+ Register UndefReg;
+ auto GetUndef = [&]() {
+ if (UndefReg)
+ return UndefReg;
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
+ return UndefReg;
+ };
+ for (unsigned I = 0; I < MatchInfo.size(); ++I) {
+ if (!MatchInfo[I])
+ MatchInfo[I] = GetUndef();
+ }
+ Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::applySimplifyAddToSub(
MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
Builder.setInstr(MI);
@@ -2762,812 +2762,812 @@ bool CombinerHelper::applySimplifyAddToSub(
return true;
}
-bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
- MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
- // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
- //
- // Creates the new hand + logic instruction (but does not insert them.)
- //
- // On success, MatchInfo is populated with the new instructions. These are
- // inserted in applyHoistLogicOpWithSameOpcodeHands.
- unsigned LogicOpcode = MI.getOpcode();
- assert(LogicOpcode == TargetOpcode::G_AND ||
- LogicOpcode == TargetOpcode::G_OR ||
- LogicOpcode == TargetOpcode::G_XOR);
- MachineIRBuilder MIB(MI);
- Register Dst = MI.getOperand(0).getReg();
- Register LHSReg = MI.getOperand(1).getReg();
- Register RHSReg = MI.getOperand(2).getReg();
-
- // Don't recompute anything.
- if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
- return false;
-
- // Make sure we have (hand x, ...), (hand y, ...)
- MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
- MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
- if (!LeftHandInst || !RightHandInst)
- return false;
- unsigned HandOpcode = LeftHandInst->getOpcode();
- if (HandOpcode != RightHandInst->getOpcode())
- return false;
- if (!LeftHandInst->getOperand(1).isReg() ||
- !RightHandInst->getOperand(1).isReg())
- return false;
-
- // Make sure the types match up, and if we're doing this post-legalization,
- // we end up with legal types.
- Register X = LeftHandInst->getOperand(1).getReg();
- Register Y = RightHandInst->getOperand(1).getReg();
- LLT XTy = MRI.getType(X);
- LLT YTy = MRI.getType(Y);
- if (XTy != YTy)
- return false;
- if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
- return false;
-
- // Optional extra source register.
- Register ExtraHandOpSrcReg;
- switch (HandOpcode) {
- default:
- return false;
- case TargetOpcode::G_ANYEXT:
- case TargetOpcode::G_SEXT:
- case TargetOpcode::G_ZEXT: {
- // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
- break;
- }
- case TargetOpcode::G_AND:
- case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR:
- case TargetOpcode::G_SHL: {
- // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
- MachineOperand &ZOp = LeftHandInst->getOperand(2);
- if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
- return false;
- ExtraHandOpSrcReg = ZOp.getReg();
- break;
- }
- }
-
- // Record the steps to build the new instructions.
- //
- // Steps to build (logic x, y)
- auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
- OperandBuildSteps LogicBuildSteps = {
- [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
- [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
- [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
- InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
-
- // Steps to build hand (logic x, y), ...z
- OperandBuildSteps HandBuildSteps = {
- [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
- [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
- if (ExtraHandOpSrcReg.isValid())
- HandBuildSteps.push_back(
- [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
- InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
-
- MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
- return true;
-}
-
-bool CombinerHelper::applyBuildInstructionSteps(
- MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
- assert(MatchInfo.InstrsToBuild.size() &&
- "Expected at least one instr to build?");
- Builder.setInstr(MI);
- for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
- assert(InstrToBuild.Opcode && "Expected a valid opcode?");
- assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
- MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
- for (auto &OperandFn : InstrToBuild.OperandFns)
- OperandFn(Instr);
- }
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::matchAshrShlToSextInreg(
- MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_ASHR);
- int64_t ShlCst, AshrCst;
- Register Src;
- // FIXME: detect splat constant vectors.
- if (!mi_match(MI.getOperand(0).getReg(), MRI,
- m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
- return false;
- if (ShlCst != AshrCst)
- return false;
- if (!isLegalOrBeforeLegalizer(
- {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
- return false;
- MatchInfo = std::make_tuple(Src, ShlCst);
- return true;
-}
-bool CombinerHelper::applyAshShlToSextInreg(
- MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_ASHR);
- Register Src;
- int64_t ShiftAmt;
- std::tie(Src, ShiftAmt) = MatchInfo;
- unsigned Size = MRI.getType(Src).getScalarSizeInBits();
- Builder.setInstrAndDebugLoc(MI);
- Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
- Register &Replacement) {
- // Given
- //
- // %y:_(sN) = G_SOMETHING
- // %x:_(sN) = G_SOMETHING
- // %res:_(sN) = G_AND %x, %y
- //
- // Eliminate the G_AND when it is known that x & y == x or x & y == y.
- //
- // Patterns like this can appear as a result of legalization. E.g.
- //
- // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
- // %one:_(s32) = G_CONSTANT i32 1
- // %and:_(s32) = G_AND %cmp, %one
- //
- // In this case, G_ICMP only produces a single bit, so x & 1 == x.
- assert(MI.getOpcode() == TargetOpcode::G_AND);
- if (!KB)
- return false;
-
- Register AndDst = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(AndDst);
-
- // FIXME: This should be removed once GISelKnownBits supports vectors.
- if (DstTy.isVector())
- return false;
-
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
- KnownBits LHSBits = KB->getKnownBits(LHS);
- KnownBits RHSBits = KB->getKnownBits(RHS);
-
- // Check that x & Mask == x.
- // x & 1 == x, always
- // x & 0 == x, only if x is also 0
- // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
- //
- // Check if we can replace AndDst with the LHS of the G_AND
- if (canReplaceReg(AndDst, LHS, MRI) &&
- (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
- Replacement = LHS;
- return true;
- }
-
- // Check if we can replace AndDst with the RHS of the G_AND
- if (canReplaceReg(AndDst, RHS, MRI) &&
- (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
- Replacement = RHS;
- return true;
- }
-
- return false;
-}
-
-bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
- // Given
- //
- // %y:_(sN) = G_SOMETHING
- // %x:_(sN) = G_SOMETHING
- // %res:_(sN) = G_OR %x, %y
- //
- // Eliminate the G_OR when it is known that x | y == x or x | y == y.
- assert(MI.getOpcode() == TargetOpcode::G_OR);
- if (!KB)
- return false;
-
- Register OrDst = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(OrDst);
-
- // FIXME: This should be removed once GISelKnownBits supports vectors.
- if (DstTy.isVector())
- return false;
-
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
- KnownBits LHSBits = KB->getKnownBits(LHS);
- KnownBits RHSBits = KB->getKnownBits(RHS);
-
- // Check that x | Mask == x.
- // x | 0 == x, always
- // x | 1 == x, only if x is also 1
- // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
- //
- // Check if we can replace OrDst with the LHS of the G_OR
- if (canReplaceReg(OrDst, LHS, MRI) &&
- (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
- Replacement = LHS;
- return true;
- }
-
- // Check if we can replace OrDst with the RHS of the G_OR
- if (canReplaceReg(OrDst, RHS, MRI) &&
- (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
- Replacement = RHS;
- return true;
- }
-
- return false;
-}
-
-bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
- // If the input is already sign extended, just drop the extension.
- Register Src = MI.getOperand(1).getReg();
- unsigned ExtBits = MI.getOperand(2).getImm();
- unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
- return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
-}
-
-static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
- int64_t Cst, bool IsVector, bool IsFP) {
- // For i1, Cst will always be -1 regardless of boolean contents.
- return (ScalarSizeBits == 1 && Cst == -1) ||
- isConstTrueVal(TLI, Cst, IsVector, IsFP);
-}
-
-bool CombinerHelper::matchNotCmp(MachineInstr &MI,
- SmallVectorImpl<Register> &RegsToNegate) {
- assert(MI.getOpcode() == TargetOpcode::G_XOR);
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
- Register XorSrc;
- Register CstReg;
- // We match xor(src, true) here.
- if (!mi_match(MI.getOperand(0).getReg(), MRI,
- m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
- return false;
-
- if (!MRI.hasOneNonDBGUse(XorSrc))
- return false;
-
- // Check that XorSrc is the root of a tree of comparisons combined with ANDs
- // and ORs. The suffix of RegsToNegate starting from index I is used a work
- // list of tree nodes to visit.
- RegsToNegate.push_back(XorSrc);
- // Remember whether the comparisons are all integer or all floating point.
- bool IsInt = false;
- bool IsFP = false;
- for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
- Register Reg = RegsToNegate[I];
- if (!MRI.hasOneNonDBGUse(Reg))
- return false;
- MachineInstr *Def = MRI.getVRegDef(Reg);
- switch (Def->getOpcode()) {
- default:
- // Don't match if the tree contains anything other than ANDs, ORs and
- // comparisons.
- return false;
- case TargetOpcode::G_ICMP:
- if (IsFP)
- return false;
- IsInt = true;
- // When we apply the combine we will invert the predicate.
- break;
- case TargetOpcode::G_FCMP:
- if (IsInt)
- return false;
- IsFP = true;
- // When we apply the combine we will invert the predicate.
- break;
- case TargetOpcode::G_AND:
- case TargetOpcode::G_OR:
- // Implement De Morgan's laws:
- // ~(x & y) -> ~x | ~y
- // ~(x | y) -> ~x & ~y
- // When we apply the combine we will change the opcode and recursively
- // negate the operands.
- RegsToNegate.push_back(Def->getOperand(1).getReg());
- RegsToNegate.push_back(Def->getOperand(2).getReg());
- break;
- }
- }
-
- // Now we know whether the comparisons are integer or floating point, check
- // the constant in the xor.
- int64_t Cst;
- if (Ty.isVector()) {
- MachineInstr *CstDef = MRI.getVRegDef(CstReg);
- auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
- if (!MaybeCst)
- return false;
- if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
- return false;
- } else {
- if (!mi_match(CstReg, MRI, m_ICst(Cst)))
- return false;
- if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
- return false;
- }
-
- return true;
-}
-
-bool CombinerHelper::applyNotCmp(MachineInstr &MI,
- SmallVectorImpl<Register> &RegsToNegate) {
- for (Register Reg : RegsToNegate) {
- MachineInstr *Def = MRI.getVRegDef(Reg);
- Observer.changingInstr(*Def);
- // For each comparison, invert the opcode. For each AND and OR, change the
- // opcode.
- switch (Def->getOpcode()) {
- default:
- llvm_unreachable("Unexpected opcode");
- case TargetOpcode::G_ICMP:
- case TargetOpcode::G_FCMP: {
- MachineOperand &PredOp = Def->getOperand(1);
- CmpInst::Predicate NewP = CmpInst::getInversePredicate(
- (CmpInst::Predicate)PredOp.getPredicate());
- PredOp.setPredicate(NewP);
- break;
- }
- case TargetOpcode::G_AND:
- Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
- break;
- case TargetOpcode::G_OR:
- Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
- break;
- }
- Observer.changedInstr(*Def);
- }
-
- replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::matchXorOfAndWithSameReg(
- MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
- // Match (xor (and x, y), y) (or any of its commuted cases)
- assert(MI.getOpcode() == TargetOpcode::G_XOR);
- Register &X = MatchInfo.first;
- Register &Y = MatchInfo.second;
- Register AndReg = MI.getOperand(1).getReg();
- Register SharedReg = MI.getOperand(2).getReg();
-
- // Find a G_AND on either side of the G_XOR.
- // Look for one of
- //
- // (xor (and x, y), SharedReg)
- // (xor SharedReg, (and x, y))
- if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
- std::swap(AndReg, SharedReg);
- if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
- return false;
- }
-
- // Only do this if we'll eliminate the G_AND.
- if (!MRI.hasOneNonDBGUse(AndReg))
- return false;
-
- // We can combine if SharedReg is the same as either the LHS or RHS of the
- // G_AND.
- if (Y != SharedReg)
- std::swap(X, Y);
- return Y == SharedReg;
-}
-
-bool CombinerHelper::applyXorOfAndWithSameReg(
- MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
- // Fold (xor (and x, y), y) -> (and (not x), y)
- Builder.setInstrAndDebugLoc(MI);
- Register X, Y;
- std::tie(X, Y) = MatchInfo;
- auto Not = Builder.buildNot(MRI.getType(X), X);
- Observer.changingInstr(MI);
- MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
- MI.getOperand(1).setReg(Not->getOperand(0).getReg());
- MI.getOperand(2).setReg(Y);
- Observer.changedInstr(MI);
- return true;
-}
-
-bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- LLT Ty = MRI.getType(DstReg);
- const DataLayout &DL = Builder.getMF().getDataLayout();
-
- if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
- return false;
-
- if (Ty.isPointer()) {
- auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
- return ConstVal && *ConstVal == 0;
- }
-
- assert(Ty.isVector() && "Expecting a vector type");
- const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
- return isBuildVectorAllZeros(*VecMI, MRI);
-}
-
-bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
- Builder.setInstrAndDebugLoc(MI);
- Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
- MI.eraseFromParent();
- return true;
-}
-
-/// The second source operand is known to be a power of 2.
-bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Pow2Src1 = MI.getOperand(2).getReg();
- LLT Ty = MRI.getType(DstReg);
- Builder.setInstrAndDebugLoc(MI);
-
- // Fold (urem x, pow2) -> (and x, pow2-1)
- auto NegOne = Builder.buildConstant(Ty, -1);
- auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
- Builder.buildAnd(DstReg, Src0, Add);
- MI.eraseFromParent();
- return true;
-}
-
-Optional<SmallVector<Register, 8>>
-CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
- assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
- // We want to detect if Root is part of a tree which represents a bunch
- // of loads being merged into a larger load. We'll try to recognize patterns
- // like, for example:
- //
- // Reg Reg
- // \ /
- // OR_1 Reg
- // \ /
- // OR_2
- // \ Reg
- // .. /
- // Root
- //
- // Reg Reg Reg Reg
- // \ / \ /
- // OR_1 OR_2
- // \ /
- // \ /
- // ...
- // Root
- //
- // Each "Reg" may have been produced by a load + some arithmetic. This
- // function will save each of them.
- SmallVector<Register, 8> RegsToVisit;
- SmallVector<const MachineInstr *, 7> Ors = {Root};
-
- // In the "worst" case, we're dealing with a load for each byte. So, there
- // are at most #bytes - 1 ORs.
- const unsigned MaxIter =
- MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
- for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
- if (Ors.empty())
- break;
- const MachineInstr *Curr = Ors.pop_back_val();
- Register OrLHS = Curr->getOperand(1).getReg();
- Register OrRHS = Curr->getOperand(2).getReg();
-
- // In the combine, we want to elimate the entire tree.
- if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
- return None;
-
- // If it's a G_OR, save it and continue to walk. If it's not, then it's
- // something that may be a load + arithmetic.
- if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
- Ors.push_back(Or);
- else
- RegsToVisit.push_back(OrLHS);
- if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
- Ors.push_back(Or);
- else
- RegsToVisit.push_back(OrRHS);
- }
-
- // We're going to try and merge each register into a wider power-of-2 type,
- // so we ought to have an even number of registers.
- if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
- return None;
- return RegsToVisit;
-}
-
-/// Helper function for findLoadOffsetsForLoadOrCombine.
-///
-/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
-/// and then moving that value into a specific byte offset.
-///
-/// e.g. x[i] << 24
-///
-/// \returns The load instruction and the byte offset it is moved into.
-static Optional<std::pair<MachineInstr *, int64_t>>
-matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
- const MachineRegisterInfo &MRI) {
- assert(MRI.hasOneNonDBGUse(Reg) &&
- "Expected Reg to only have one non-debug use?");
- Register MaybeLoad;
- int64_t Shift;
- if (!mi_match(Reg, MRI,
- m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
- Shift = 0;
- MaybeLoad = Reg;
- }
-
- if (Shift % MemSizeInBits != 0)
- return None;
-
- // TODO: Handle other types of loads.
- auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI);
- if (!Load)
- return None;
-
- const auto &MMO = **Load->memoperands_begin();
- if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits)
- return None;
-
- return std::make_pair(Load, Shift / MemSizeInBits);
-}
-
-Optional<std::pair<MachineInstr *, int64_t>>
-CombinerHelper::findLoadOffsetsForLoadOrCombine(
- SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
- const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
-
- // Each load found for the pattern. There should be one for each RegsToVisit.
- SmallSetVector<const MachineInstr *, 8> Loads;
-
- // The lowest index used in any load. (The lowest "i" for each x[i].)
- int64_t LowestIdx = INT64_MAX;
-
- // The load which uses the lowest index.
- MachineInstr *LowestIdxLoad = nullptr;
-
- // Keeps track of the load indices we see. We shouldn't see any indices twice.
- SmallSet<int64_t, 8> SeenIdx;
-
- // Ensure each load is in the same MBB.
- // TODO: Support multiple MachineBasicBlocks.
- MachineBasicBlock *MBB = nullptr;
- const MachineMemOperand *MMO = nullptr;
-
- // Earliest instruction-order load in the pattern.
- MachineInstr *EarliestLoad = nullptr;
-
- // Latest instruction-order load in the pattern.
- MachineInstr *LatestLoad = nullptr;
-
- // Base pointer which every load should share.
- Register BasePtr;
-
- // We want to find a load for each register. Each load should have some
- // appropriate bit twiddling arithmetic. During this loop, we will also keep
- // track of the load which uses the lowest index. Later, we will check if we
- // can use its pointer in the final, combined load.
- for (auto Reg : RegsToVisit) {
- // Find the load, and find the position that it will end up in (e.g. a
- // shifted) value.
- auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
- if (!LoadAndPos)
- return None;
- MachineInstr *Load;
- int64_t DstPos;
- std::tie(Load, DstPos) = *LoadAndPos;
-
- // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
- // it is difficult to check for stores/calls/etc between loads.
- MachineBasicBlock *LoadMBB = Load->getParent();
- if (!MBB)
- MBB = LoadMBB;
- if (LoadMBB != MBB)
- return None;
-
- // Make sure that the MachineMemOperands of every seen load are compatible.
- const MachineMemOperand *LoadMMO = *Load->memoperands_begin();
- if (!MMO)
- MMO = LoadMMO;
- if (MMO->getAddrSpace() != LoadMMO->getAddrSpace())
- return None;
-
- // Find out what the base pointer and index for the load is.
- Register LoadPtr;
- int64_t Idx;
- if (!mi_match(Load->getOperand(1).getReg(), MRI,
- m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
- LoadPtr = Load->getOperand(1).getReg();
- Idx = 0;
- }
-
- // Don't combine things like a[i], a[i] -> a bigger load.
- if (!SeenIdx.insert(Idx).second)
- return None;
-
- // Every load must share the same base pointer; don't combine things like:
- //
- // a[i], b[i + 1] -> a bigger load.
- if (!BasePtr.isValid())
- BasePtr = LoadPtr;
- if (BasePtr != LoadPtr)
- return None;
-
- if (Idx < LowestIdx) {
- LowestIdx = Idx;
- LowestIdxLoad = Load;
- }
-
- // Keep track of the byte offset that this load ends up at. If we have seen
- // the byte offset, then stop here. We do not want to combine:
- //
- // a[i] << 16, a[i + k] << 16 -> a bigger load.
- if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
- return None;
- Loads.insert(Load);
-
- // Keep track of the position of the earliest/latest loads in the pattern.
- // We will check that there are no load fold barriers between them later
- // on.
- //
- // FIXME: Is there a better way to check for load fold barriers?
- if (!EarliestLoad || dominates(*Load, *EarliestLoad))
- EarliestLoad = Load;
- if (!LatestLoad || dominates(*LatestLoad, *Load))
- LatestLoad = Load;
- }
-
- // We found a load for each register. Let's check if each load satisfies the
- // pattern.
- assert(Loads.size() == RegsToVisit.size() &&
- "Expected to find a load for each register?");
- assert(EarliestLoad != LatestLoad && EarliestLoad &&
- LatestLoad && "Expected at least two loads?");
-
- // Check if there are any stores, calls, etc. between any of the loads. If
- // there are, then we can't safely perform the combine.
- //
- // MaxIter is chosen based off the (worst case) number of iterations it
- // typically takes to succeed in the LLVM test suite plus some padding.
- //
- // FIXME: Is there a better way to check for load fold barriers?
- const unsigned MaxIter = 20;
- unsigned Iter = 0;
- for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
- LatestLoad->getIterator())) {
- if (Loads.count(&MI))
- continue;
- if (MI.isLoadFoldBarrier())
- return None;
- if (Iter++ == MaxIter)
- return None;
- }
-
- return std::make_pair(LowestIdxLoad, LowestIdx);
-}
-
-bool CombinerHelper::matchLoadOrCombine(
- MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_OR);
- MachineFunction &MF = *MI.getMF();
- // Assuming a little-endian target, transform:
- // s8 *a = ...
- // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
- // =>
- // s32 val = *((i32)a)
- //
- // s8 *a = ...
- // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
- // =>
- // s32 val = BSWAP(*((s32)a))
- Register Dst = MI.getOperand(0).getReg();
- LLT Ty = MRI.getType(Dst);
- if (Ty.isVector())
- return false;
-
- // We need to combine at least two loads into this type. Since the smallest
- // possible load is into a byte, we need at least a 16-bit wide type.
- const unsigned WideMemSizeInBits = Ty.getSizeInBits();
- if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
- return false;
-
- // Match a collection of non-OR instructions in the pattern.
- auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
- if (!RegsToVisit)
- return false;
-
- // We have a collection of non-OR instructions. Figure out how wide each of
- // the small loads should be based off of the number of potential loads we
- // found.
- const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
- if (NarrowMemSizeInBits % 8 != 0)
- return false;
-
- // Check if each register feeding into each OR is a load from the same
- // base pointer + some arithmetic.
- //
- // e.g. a[0], a[1] << 8, a[2] << 16, etc.
- //
- // Also verify that each of these ends up putting a[i] into the same memory
- // offset as a load into a wide type would.
- SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
- MachineInstr *LowestIdxLoad;
- int64_t LowestIdx;
- auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
- MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
- if (!MaybeLoadInfo)
- return false;
- std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo;
-
- // We have a bunch of loads being OR'd together. Using the addresses + offsets
- // we found before, check if this corresponds to a big or little endian byte
- // pattern. If it does, then we can represent it using a load + possibly a
- // BSWAP.
- bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
- Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
- if (!IsBigEndian.hasValue())
- return false;
- bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
- if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
- return false;
-
- // Make sure that the load from the lowest index produces offset 0 in the
- // final value.
- //
- // This ensures that we won't combine something like this:
- //
- // load x[i] -> byte 2
- // load x[i+1] -> byte 0 ---> wide_load x[i]
- // load x[i+2] -> byte 1
- const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
- const unsigned ZeroByteOffset =
- *IsBigEndian
- ? bigEndianByteAt(NumLoadsInTy, 0)
- : littleEndianByteAt(NumLoadsInTy, 0);
- auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
- if (ZeroOffsetIdx == MemOffset2Idx.end() ||
- ZeroOffsetIdx->second != LowestIdx)
- return false;
-
- // We wil reuse the pointer from the load which ends up at byte offset 0. It
- // may not use index 0.
- Register Ptr = LowestIdxLoad->getOperand(1).getReg();
- const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin();
- LegalityQuery::MemDesc MMDesc;
- MMDesc.SizeInBits = WideMemSizeInBits;
- MMDesc.AlignInBits = MMO.getAlign().value() * 8;
- MMDesc.Ordering = MMO.getOrdering();
- if (!isLegalOrBeforeLegalizer(
- {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
- return false;
- auto PtrInfo = MMO.getPointerInfo();
- auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
-
- // Load must be allowed and fast on the target.
- LLVMContext &C = MF.getFunction().getContext();
- auto &DL = MF.getDataLayout();
- bool Fast = false;
- if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
- !Fast)
- return false;
-
- MatchInfo = [=](MachineIRBuilder &MIB) {
- Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
- MIB.buildLoad(LoadDst, Ptr, *NewMMO);
- if (NeedsBSwap)
- MIB.buildBSwap(Dst, LoadDst);
- };
- return true;
-}
-
-bool CombinerHelper::applyLoadOrCombine(
- MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
- Builder.setInstrAndDebugLoc(MI);
- MatchInfo(Builder);
- MI.eraseFromParent();
- return true;
-}
-
+bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
+ MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
+ // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
+ //
+ // Creates the new hand + logic instruction (but does not insert them.)
+ //
+ // On success, MatchInfo is populated with the new instructions. These are
+ // inserted in applyHoistLogicOpWithSameOpcodeHands.
+ unsigned LogicOpcode = MI.getOpcode();
+ assert(LogicOpcode == TargetOpcode::G_AND ||
+ LogicOpcode == TargetOpcode::G_OR ||
+ LogicOpcode == TargetOpcode::G_XOR);
+ MachineIRBuilder MIB(MI);
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ // Don't recompute anything.
+ if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
+ return false;
+
+ // Make sure we have (hand x, ...), (hand y, ...)
+ MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
+ MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
+ if (!LeftHandInst || !RightHandInst)
+ return false;
+ unsigned HandOpcode = LeftHandInst->getOpcode();
+ if (HandOpcode != RightHandInst->getOpcode())
+ return false;
+ if (!LeftHandInst->getOperand(1).isReg() ||
+ !RightHandInst->getOperand(1).isReg())
+ return false;
+
+ // Make sure the types match up, and if we're doing this post-legalization,
+ // we end up with legal types.
+ Register X = LeftHandInst->getOperand(1).getReg();
+ Register Y = RightHandInst->getOperand(1).getReg();
+ LLT XTy = MRI.getType(X);
+ LLT YTy = MRI.getType(Y);
+ if (XTy != YTy)
+ return false;
+ if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+ return false;
+
+ // Optional extra source register.
+ Register ExtraHandOpSrcReg;
+ switch (HandOpcode) {
+ default:
+ return false;
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT: {
+ // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
+ break;
+ }
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_SHL: {
+ // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
+ MachineOperand &ZOp = LeftHandInst->getOperand(2);
+ if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
+ return false;
+ ExtraHandOpSrcReg = ZOp.getReg();
+ break;
+ }
+ }
+
+ // Record the steps to build the new instructions.
+ //
+ // Steps to build (logic x, y)
+ auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
+ OperandBuildSteps LogicBuildSteps = {
+ [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
+ InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
+
+ // Steps to build hand (logic x, y), ...z
+ OperandBuildSteps HandBuildSteps = {
+ [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
+ if (ExtraHandOpSrcReg.isValid())
+ HandBuildSteps.push_back(
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
+ InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
+
+ MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
+ return true;
+}
+
+bool CombinerHelper::applyBuildInstructionSteps(
+ MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
+ assert(MatchInfo.InstrsToBuild.size() &&
+ "Expected at least one instr to build?");
+ Builder.setInstr(MI);
+ for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
+ assert(InstrToBuild.Opcode && "Expected a valid opcode?");
+ assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
+ MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
+ for (auto &OperandFn : InstrToBuild.OperandFns)
+ OperandFn(Instr);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchAshrShlToSextInreg(
+ MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ int64_t ShlCst, AshrCst;
+ Register Src;
+ // FIXME: detect splat constant vectors.
+ if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
+ return false;
+ if (ShlCst != AshrCst)
+ return false;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
+ return false;
+ MatchInfo = std::make_tuple(Src, ShlCst);
+ return true;
+}
+bool CombinerHelper::applyAshShlToSextInreg(
+ MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ Register Src;
+ int64_t ShiftAmt;
+ std::tie(Src, ShiftAmt) = MatchInfo;
+ unsigned Size = MRI.getType(Src).getScalarSizeInBits();
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
+ Register &Replacement) {
+ // Given
+ //
+ // %y:_(sN) = G_SOMETHING
+ // %x:_(sN) = G_SOMETHING
+ // %res:_(sN) = G_AND %x, %y
+ //
+ // Eliminate the G_AND when it is known that x & y == x or x & y == y.
+ //
+ // Patterns like this can appear as a result of legalization. E.g.
+ //
+ // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
+ // %one:_(s32) = G_CONSTANT i32 1
+ // %and:_(s32) = G_AND %cmp, %one
+ //
+ // In this case, G_ICMP only produces a single bit, so x & 1 == x.
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+ if (!KB)
+ return false;
+
+ Register AndDst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(AndDst);
+
+ // FIXME: This should be removed once GISelKnownBits supports vectors.
+ if (DstTy.isVector())
+ return false;
+
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ KnownBits LHSBits = KB->getKnownBits(LHS);
+ KnownBits RHSBits = KB->getKnownBits(RHS);
+
+ // Check that x & Mask == x.
+ // x & 1 == x, always
+ // x & 0 == x, only if x is also 0
+ // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
+ //
+ // Check if we can replace AndDst with the LHS of the G_AND
+ if (canReplaceReg(AndDst, LHS, MRI) &&
+ (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ Replacement = LHS;
+ return true;
+ }
+
+ // Check if we can replace AndDst with the RHS of the G_AND
+ if (canReplaceReg(AndDst, RHS, MRI) &&
+ (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ Replacement = RHS;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
+ // Given
+ //
+ // %y:_(sN) = G_SOMETHING
+ // %x:_(sN) = G_SOMETHING
+ // %res:_(sN) = G_OR %x, %y
+ //
+ // Eliminate the G_OR when it is known that x | y == x or x | y == y.
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+ if (!KB)
+ return false;
+
+ Register OrDst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(OrDst);
+
+ // FIXME: This should be removed once GISelKnownBits supports vectors.
+ if (DstTy.isVector())
+ return false;
+
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ KnownBits LHSBits = KB->getKnownBits(LHS);
+ KnownBits RHSBits = KB->getKnownBits(RHS);
+
+ // Check that x | Mask == x.
+ // x | 0 == x, always
+ // x | 1 == x, only if x is also 1
+ // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
+ //
+ // Check if we can replace OrDst with the LHS of the G_OR
+ if (canReplaceReg(OrDst, LHS, MRI) &&
+ (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ Replacement = LHS;
+ return true;
+ }
+
+ // Check if we can replace OrDst with the RHS of the G_OR
+ if (canReplaceReg(OrDst, RHS, MRI) &&
+ (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ Replacement = RHS;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
+ // If the input is already sign extended, just drop the extension.
+ Register Src = MI.getOperand(1).getReg();
+ unsigned ExtBits = MI.getOperand(2).getImm();
+ unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
+ return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
+}
+
+static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
+ int64_t Cst, bool IsVector, bool IsFP) {
+ // For i1, Cst will always be -1 regardless of boolean contents.
+ return (ScalarSizeBits == 1 && Cst == -1) ||
+ isConstTrueVal(TLI, Cst, IsVector, IsFP);
+}
+
+bool CombinerHelper::matchNotCmp(MachineInstr &MI,
+ SmallVectorImpl<Register> &RegsToNegate) {
+ assert(MI.getOpcode() == TargetOpcode::G_XOR);
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
+ Register XorSrc;
+ Register CstReg;
+ // We match xor(src, true) here.
+ if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
+ return false;
+
+ if (!MRI.hasOneNonDBGUse(XorSrc))
+ return false;
+
+ // Check that XorSrc is the root of a tree of comparisons combined with ANDs
+ // and ORs. The suffix of RegsToNegate starting from index I is used a work
+ // list of tree nodes to visit.
+ RegsToNegate.push_back(XorSrc);
+ // Remember whether the comparisons are all integer or all floating point.
+ bool IsInt = false;
+ bool IsFP = false;
+ for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
+ Register Reg = RegsToNegate[I];
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return false;
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ switch (Def->getOpcode()) {
+ default:
+ // Don't match if the tree contains anything other than ANDs, ORs and
+ // comparisons.
+ return false;
+ case TargetOpcode::G_ICMP:
+ if (IsFP)
+ return false;
+ IsInt = true;
+ // When we apply the combine we will invert the predicate.
+ break;
+ case TargetOpcode::G_FCMP:
+ if (IsInt)
+ return false;
+ IsFP = true;
+ // When we apply the combine we will invert the predicate.
+ break;
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ // Implement De Morgan's laws:
+ // ~(x & y) -> ~x | ~y
+ // ~(x | y) -> ~x & ~y
+ // When we apply the combine we will change the opcode and recursively
+ // negate the operands.
+ RegsToNegate.push_back(Def->getOperand(1).getReg());
+ RegsToNegate.push_back(Def->getOperand(2).getReg());
+ break;
+ }
+ }
+
+ // Now we know whether the comparisons are integer or floating point, check
+ // the constant in the xor.
+ int64_t Cst;
+ if (Ty.isVector()) {
+ MachineInstr *CstDef = MRI.getVRegDef(CstReg);
+ auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
+ if (!MaybeCst)
+ return false;
+ if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
+ return false;
+ } else {
+ if (!mi_match(CstReg, MRI, m_ICst(Cst)))
+ return false;
+ if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
+ return false;
+ }
+
+ return true;
+}
+
+bool CombinerHelper::applyNotCmp(MachineInstr &MI,
+ SmallVectorImpl<Register> &RegsToNegate) {
+ for (Register Reg : RegsToNegate) {
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ Observer.changingInstr(*Def);
+ // For each comparison, invert the opcode. For each AND and OR, change the
+ // opcode.
+ switch (Def->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case TargetOpcode::G_ICMP:
+ case TargetOpcode::G_FCMP: {
+ MachineOperand &PredOp = Def->getOperand(1);
+ CmpInst::Predicate NewP = CmpInst::getInversePredicate(
+ (CmpInst::Predicate)PredOp.getPredicate());
+ PredOp.setPredicate(NewP);
+ break;
+ }
+ case TargetOpcode::G_AND:
+ Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
+ break;
+ case TargetOpcode::G_OR:
+ Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
+ break;
+ }
+ Observer.changedInstr(*Def);
+ }
+
+ replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchXorOfAndWithSameReg(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ // Match (xor (and x, y), y) (or any of its commuted cases)
+ assert(MI.getOpcode() == TargetOpcode::G_XOR);
+ Register &X = MatchInfo.first;
+ Register &Y = MatchInfo.second;
+ Register AndReg = MI.getOperand(1).getReg();
+ Register SharedReg = MI.getOperand(2).getReg();
+
+ // Find a G_AND on either side of the G_XOR.
+ // Look for one of
+ //
+ // (xor (and x, y), SharedReg)
+ // (xor SharedReg, (and x, y))
+ if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
+ std::swap(AndReg, SharedReg);
+ if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
+ return false;
+ }
+
+ // Only do this if we'll eliminate the G_AND.
+ if (!MRI.hasOneNonDBGUse(AndReg))
+ return false;
+
+ // We can combine if SharedReg is the same as either the LHS or RHS of the
+ // G_AND.
+ if (Y != SharedReg)
+ std::swap(X, Y);
+ return Y == SharedReg;
+}
+
+bool CombinerHelper::applyXorOfAndWithSameReg(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ // Fold (xor (and x, y), y) -> (and (not x), y)
+ Builder.setInstrAndDebugLoc(MI);
+ Register X, Y;
+ std::tie(X, Y) = MatchInfo;
+ auto Not = Builder.buildNot(MRI.getType(X), X);
+ Observer.changingInstr(MI);
+ MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
+ MI.getOperand(1).setReg(Not->getOperand(0).getReg());
+ MI.getOperand(2).setReg(Y);
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ const DataLayout &DL = Builder.getMF().getDataLayout();
+
+ if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
+ return false;
+
+ if (Ty.isPointer()) {
+ auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
+ return ConstVal && *ConstVal == 0;
+ }
+
+ assert(Ty.isVector() && "Expecting a vector type");
+ const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ return isBuildVectorAllZeros(*VecMI, MRI);
+}
+
+bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
+ MI.eraseFromParent();
+ return true;
+}
+
+/// The second source operand is known to be a power of 2.
+bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Pow2Src1 = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ Builder.setInstrAndDebugLoc(MI);
+
+ // Fold (urem x, pow2) -> (and x, pow2-1)
+ auto NegOne = Builder.buildConstant(Ty, -1);
+ auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
+ Builder.buildAnd(DstReg, Src0, Add);
+ MI.eraseFromParent();
+ return true;
+}
+
+Optional<SmallVector<Register, 8>>
+CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
+ assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
+ // We want to detect if Root is part of a tree which represents a bunch
+ // of loads being merged into a larger load. We'll try to recognize patterns
+ // like, for example:
+ //
+ // Reg Reg
+ // \ /
+ // OR_1 Reg
+ // \ /
+ // OR_2
+ // \ Reg
+ // .. /
+ // Root
+ //
+ // Reg Reg Reg Reg
+ // \ / \ /
+ // OR_1 OR_2
+ // \ /
+ // \ /
+ // ...
+ // Root
+ //
+ // Each "Reg" may have been produced by a load + some arithmetic. This
+ // function will save each of them.
+ SmallVector<Register, 8> RegsToVisit;
+ SmallVector<const MachineInstr *, 7> Ors = {Root};
+
+ // In the "worst" case, we're dealing with a load for each byte. So, there
+ // are at most #bytes - 1 ORs.
+ const unsigned MaxIter =
+ MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
+ for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
+ if (Ors.empty())
+ break;
+ const MachineInstr *Curr = Ors.pop_back_val();
+ Register OrLHS = Curr->getOperand(1).getReg();
+ Register OrRHS = Curr->getOperand(2).getReg();
+
+ // In the combine, we want to elimate the entire tree.
+ if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
+ return None;
+
+ // If it's a G_OR, save it and continue to walk. If it's not, then it's
+ // something that may be a load + arithmetic.
+ if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
+ Ors.push_back(Or);
+ else
+ RegsToVisit.push_back(OrLHS);
+ if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
+ Ors.push_back(Or);
+ else
+ RegsToVisit.push_back(OrRHS);
+ }
+
+ // We're going to try and merge each register into a wider power-of-2 type,
+ // so we ought to have an even number of registers.
+ if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
+ return None;
+ return RegsToVisit;
+}
+
+/// Helper function for findLoadOffsetsForLoadOrCombine.
+///
+/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
+/// and then moving that value into a specific byte offset.
+///
+/// e.g. x[i] << 24
+///
+/// \returns The load instruction and the byte offset it is moved into.
+static Optional<std::pair<MachineInstr *, int64_t>>
+matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
+ const MachineRegisterInfo &MRI) {
+ assert(MRI.hasOneNonDBGUse(Reg) &&
+ "Expected Reg to only have one non-debug use?");
+ Register MaybeLoad;
+ int64_t Shift;
+ if (!mi_match(Reg, MRI,
+ m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
+ Shift = 0;
+ MaybeLoad = Reg;
+ }
+
+ if (Shift % MemSizeInBits != 0)
+ return None;
+
+ // TODO: Handle other types of loads.
+ auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI);
+ if (!Load)
+ return None;
+
+ const auto &MMO = **Load->memoperands_begin();
+ if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits)
+ return None;
+
+ return std::make_pair(Load, Shift / MemSizeInBits);
+}
+
+Optional<std::pair<MachineInstr *, int64_t>>
+CombinerHelper::findLoadOffsetsForLoadOrCombine(
+ SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
+ const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
+
+ // Each load found for the pattern. There should be one for each RegsToVisit.
+ SmallSetVector<const MachineInstr *, 8> Loads;
+
+ // The lowest index used in any load. (The lowest "i" for each x[i].)
+ int64_t LowestIdx = INT64_MAX;
+
+ // The load which uses the lowest index.
+ MachineInstr *LowestIdxLoad = nullptr;
+
+ // Keeps track of the load indices we see. We shouldn't see any indices twice.
+ SmallSet<int64_t, 8> SeenIdx;
+
+ // Ensure each load is in the same MBB.
+ // TODO: Support multiple MachineBasicBlocks.
+ MachineBasicBlock *MBB = nullptr;
+ const MachineMemOperand *MMO = nullptr;
+
+ // Earliest instruction-order load in the pattern.
+ MachineInstr *EarliestLoad = nullptr;
+
+ // Latest instruction-order load in the pattern.
+ MachineInstr *LatestLoad = nullptr;
+
+ // Base pointer which every load should share.
+ Register BasePtr;
+
+ // We want to find a load for each register. Each load should have some
+ // appropriate bit twiddling arithmetic. During this loop, we will also keep
+ // track of the load which uses the lowest index. Later, we will check if we
+ // can use its pointer in the final, combined load.
+ for (auto Reg : RegsToVisit) {
+ // Find the load, and find the position that it will end up in (e.g. a
+ // shifted) value.
+ auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
+ if (!LoadAndPos)
+ return None;
+ MachineInstr *Load;
+ int64_t DstPos;
+ std::tie(Load, DstPos) = *LoadAndPos;
+
+ // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
+ // it is difficult to check for stores/calls/etc between loads.
+ MachineBasicBlock *LoadMBB = Load->getParent();
+ if (!MBB)
+ MBB = LoadMBB;
+ if (LoadMBB != MBB)
+ return None;
+
+ // Make sure that the MachineMemOperands of every seen load are compatible.
+ const MachineMemOperand *LoadMMO = *Load->memoperands_begin();
+ if (!MMO)
+ MMO = LoadMMO;
+ if (MMO->getAddrSpace() != LoadMMO->getAddrSpace())
+ return None;
+
+ // Find out what the base pointer and index for the load is.
+ Register LoadPtr;
+ int64_t Idx;
+ if (!mi_match(Load->getOperand(1).getReg(), MRI,
+ m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
+ LoadPtr = Load->getOperand(1).getReg();
+ Idx = 0;
+ }
+
+ // Don't combine things like a[i], a[i] -> a bigger load.
+ if (!SeenIdx.insert(Idx).second)
+ return None;
+
+ // Every load must share the same base pointer; don't combine things like:
+ //
+ // a[i], b[i + 1] -> a bigger load.
+ if (!BasePtr.isValid())
+ BasePtr = LoadPtr;
+ if (BasePtr != LoadPtr)
+ return None;
+
+ if (Idx < LowestIdx) {
+ LowestIdx = Idx;
+ LowestIdxLoad = Load;
+ }
+
+ // Keep track of the byte offset that this load ends up at. If we have seen
+ // the byte offset, then stop here. We do not want to combine:
+ //
+ // a[i] << 16, a[i + k] << 16 -> a bigger load.
+ if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
+ return None;
+ Loads.insert(Load);
+
+ // Keep track of the position of the earliest/latest loads in the pattern.
+ // We will check that there are no load fold barriers between them later
+ // on.
+ //
+ // FIXME: Is there a better way to check for load fold barriers?
+ if (!EarliestLoad || dominates(*Load, *EarliestLoad))
+ EarliestLoad = Load;
+ if (!LatestLoad || dominates(*LatestLoad, *Load))
+ LatestLoad = Load;
+ }
+
+ // We found a load for each register. Let's check if each load satisfies the
+ // pattern.
+ assert(Loads.size() == RegsToVisit.size() &&
+ "Expected to find a load for each register?");
+ assert(EarliestLoad != LatestLoad && EarliestLoad &&
+ LatestLoad && "Expected at least two loads?");
+
+ // Check if there are any stores, calls, etc. between any of the loads. If
+ // there are, then we can't safely perform the combine.
+ //
+ // MaxIter is chosen based off the (worst case) number of iterations it
+ // typically takes to succeed in the LLVM test suite plus some padding.
+ //
+ // FIXME: Is there a better way to check for load fold barriers?
+ const unsigned MaxIter = 20;
+ unsigned Iter = 0;
+ for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
+ LatestLoad->getIterator())) {
+ if (Loads.count(&MI))
+ continue;
+ if (MI.isLoadFoldBarrier())
+ return None;
+ if (Iter++ == MaxIter)
+ return None;
+ }
+
+ return std::make_pair(LowestIdxLoad, LowestIdx);
+}
+
+bool CombinerHelper::matchLoadOrCombine(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+ MachineFunction &MF = *MI.getMF();
+ // Assuming a little-endian target, transform:
+ // s8 *a = ...
+ // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
+ // =>
+ // s32 val = *((i32)a)
+ //
+ // s8 *a = ...
+ // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
+ // =>
+ // s32 val = BSWAP(*((s32)a))
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (Ty.isVector())
+ return false;
+
+ // We need to combine at least two loads into this type. Since the smallest
+ // possible load is into a byte, we need at least a 16-bit wide type.
+ const unsigned WideMemSizeInBits = Ty.getSizeInBits();
+ if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
+ return false;
+
+ // Match a collection of non-OR instructions in the pattern.
+ auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
+ if (!RegsToVisit)
+ return false;
+
+ // We have a collection of non-OR instructions. Figure out how wide each of
+ // the small loads should be based off of the number of potential loads we
+ // found.
+ const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
+ if (NarrowMemSizeInBits % 8 != 0)
+ return false;
+
+ // Check if each register feeding into each OR is a load from the same
+ // base pointer + some arithmetic.
+ //
+ // e.g. a[0], a[1] << 8, a[2] << 16, etc.
+ //
+ // Also verify that each of these ends up putting a[i] into the same memory
+ // offset as a load into a wide type would.
+ SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
+ MachineInstr *LowestIdxLoad;
+ int64_t LowestIdx;
+ auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
+ MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
+ if (!MaybeLoadInfo)
+ return false;
+ std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo;
+
+ // We have a bunch of loads being OR'd together. Using the addresses + offsets
+ // we found before, check if this corresponds to a big or little endian byte
+ // pattern. If it does, then we can represent it using a load + possibly a
+ // BSWAP.
+ bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
+ Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
+ if (!IsBigEndian.hasValue())
+ return false;
+ bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
+ if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
+ return false;
+
+ // Make sure that the load from the lowest index produces offset 0 in the
+ // final value.
+ //
+ // This ensures that we won't combine something like this:
+ //
+ // load x[i] -> byte 2
+ // load x[i+1] -> byte 0 ---> wide_load x[i]
+ // load x[i+2] -> byte 1
+ const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
+ const unsigned ZeroByteOffset =
+ *IsBigEndian
+ ? bigEndianByteAt(NumLoadsInTy, 0)
+ : littleEndianByteAt(NumLoadsInTy, 0);
+ auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
+ if (ZeroOffsetIdx == MemOffset2Idx.end() ||
+ ZeroOffsetIdx->second != LowestIdx)
+ return false;
+
+ // We wil reuse the pointer from the load which ends up at byte offset 0. It
+ // may not use index 0.
+ Register Ptr = LowestIdxLoad->getOperand(1).getReg();
+ const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin();
+ LegalityQuery::MemDesc MMDesc;
+ MMDesc.SizeInBits = WideMemSizeInBits;
+ MMDesc.AlignInBits = MMO.getAlign().value() * 8;
+ MMDesc.Ordering = MMO.getOrdering();
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
+ return false;
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
+
+ // Load must be allowed and fast on the target.
+ LLVMContext &C = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ bool Fast = false;
+ if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
+ !Fast)
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
+ MIB.buildLoad(LoadDst, Ptr, *NewMMO);
+ if (NeedsBSwap)
+ MIB.buildBSwap(Dst, LoadDst);
+ };
+ return true;
+}
+
+bool CombinerHelper::applyLoadOrCombine(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ Builder.setInstrAndDebugLoc(MI);
+ MatchInfo(Builder);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
index 59f4d60a41..6bc72e4aa9 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
@@ -16,7 +16,7 @@
using namespace llvm;
void GISelChangeObserver::changingAllUsesOfReg(
- const MachineRegisterInfo &MRI, Register Reg) {
+ const MachineRegisterInfo &MRI, Register Reg) {
for (auto &ChangingMI : MRI.use_instructions(Reg)) {
changingInstr(ChangingMI);
ChangingAllUsesOfReg.insert(&ChangingMI);
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 2de20489e1..e38ede1b67 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -94,25 +94,25 @@ dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
<< "\n";
}
-/// Compute known bits for the intersection of \p Src0 and \p Src1
-void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
- KnownBits &Known,
- const APInt &DemandedElts,
- unsigned Depth) {
- // Test src1 first, since we canonicalize simpler expressions to the RHS.
- computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
-
- // If we don't know any bits, early out.
- if (Known.isUnknown())
- return;
-
- KnownBits Known2;
- computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
-
- // Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
-}
-
+/// Compute known bits for the intersection of \p Src0 and \p Src1
+void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ // Test src1 first, since we canonicalize simpler expressions to the RHS.
+ computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
+
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ return;
+
+ KnownBits Known2;
+ computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
+
+ // Only known if known in both the LHS and RHS.
+ Known = KnownBits::commonBits(Known, Known2);
+}
+
void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
const APInt &DemandedElts,
unsigned Depth) {
@@ -200,7 +200,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
// For COPYs we don't do anything, don't increase the depth.
computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
Depth + (Opcode != TargetOpcode::COPY));
- Known = KnownBits::commonBits(Known, Known2);
+ Known = KnownBits::commonBits(Known, Known2);
// If we reach a point where we don't know anything
// just stop looking through the operands.
if (Known.One == 0 && Known.Zero == 0)
@@ -217,7 +217,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
auto CstVal = getConstantVRegVal(R, MRI);
if (!CstVal)
break;
- Known = KnownBits::makeConstant(*CstVal);
+ Known = KnownBits::makeConstant(*CstVal);
break;
}
case TargetOpcode::G_FRAME_INDEX: {
@@ -284,52 +284,52 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Depth + 1);
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
Depth + 1);
- Known = KnownBits::computeForMul(Known, Known2);
+ Known = KnownBits::computeForMul(Known, Known2);
break;
}
case TargetOpcode::G_SELECT: {
- computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
- Known, DemandedElts, Depth + 1);
- break;
- }
- case TargetOpcode::G_SMIN: {
- // TODO: Handle clamp pattern with number of sign bits
- KnownBits KnownRHS;
- computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
+ Known, DemandedElts, Depth + 1);
+ break;
+ }
+ case TargetOpcode::G_SMIN: {
+ // TODO: Handle clamp pattern with number of sign bits
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
Depth + 1);
- computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
Depth + 1);
- Known = KnownBits::smin(Known, KnownRHS);
- break;
- }
- case TargetOpcode::G_SMAX: {
- // TODO: Handle clamp pattern with number of sign bits
- KnownBits KnownRHS;
- computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
- Depth + 1);
- computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
- Depth + 1);
- Known = KnownBits::smax(Known, KnownRHS);
- break;
- }
- case TargetOpcode::G_UMIN: {
- KnownBits KnownRHS;
- computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
- DemandedElts, Depth + 1);
- computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
- DemandedElts, Depth + 1);
- Known = KnownBits::umin(Known, KnownRHS);
- break;
- }
- case TargetOpcode::G_UMAX: {
- KnownBits KnownRHS;
- computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
- DemandedElts, Depth + 1);
- computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
- DemandedElts, Depth + 1);
- Known = KnownBits::umax(Known, KnownRHS);
+ Known = KnownBits::smin(Known, KnownRHS);
break;
}
+ case TargetOpcode::G_SMAX: {
+ // TODO: Handle clamp pattern with number of sign bits
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::smax(Known, KnownRHS);
+ break;
+ }
+ case TargetOpcode::G_UMIN: {
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
+ DemandedElts, Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
+ DemandedElts, Depth + 1);
+ Known = KnownBits::umin(Known, KnownRHS);
+ break;
+ }
+ case TargetOpcode::G_UMAX: {
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
+ DemandedElts, Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
+ DemandedElts, Depth + 1);
+ Known = KnownBits::umax(Known, KnownRHS);
+ break;
+ }
case TargetOpcode::G_FCMP:
case TargetOpcode::G_ICMP: {
if (TL.getBooleanContents(DstTy.isVector(),
@@ -347,58 +347,58 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known = Known.sext(BitWidth);
break;
}
- case TargetOpcode::G_SEXT_INREG: {
- computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
- Depth + 1);
- Known = Known.sextInReg(MI.getOperand(2).getImm());
- break;
- }
+ case TargetOpcode::G_SEXT_INREG: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ Known = Known.sextInReg(MI.getOperand(2).getImm());
+ break;
+ }
case TargetOpcode::G_ANYEXT: {
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
Depth + 1);
- Known = Known.anyext(BitWidth);
+ Known = Known.anyext(BitWidth);
break;
}
case TargetOpcode::G_LOAD: {
- const MachineMemOperand *MMO = *MI.memoperands_begin();
- if (const MDNode *Ranges = MMO->getRanges()) {
- computeKnownBitsFromRangeMetadata(*Ranges, Known);
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ if (const MDNode *Ranges = MMO->getRanges()) {
+ computeKnownBitsFromRangeMetadata(*Ranges, Known);
}
-
+
break;
}
case TargetOpcode::G_ZEXTLOAD: {
// Everything above the retrieved bits is zero
- Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
+ Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
break;
}
- case TargetOpcode::G_ASHR: {
- KnownBits LHSKnown, RHSKnown;
- computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
- Depth + 1);
+ case TargetOpcode::G_ASHR: {
+ KnownBits LHSKnown, RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
+ Depth + 1);
computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
Depth + 1);
- Known = KnownBits::ashr(LHSKnown, RHSKnown);
- break;
- }
- case TargetOpcode::G_LSHR: {
- KnownBits LHSKnown, RHSKnown;
- computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
- Depth + 1);
- computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
- Depth + 1);
- Known = KnownBits::lshr(LHSKnown, RHSKnown);
- break;
- }
- case TargetOpcode::G_SHL: {
- KnownBits LHSKnown, RHSKnown;
- computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
- Depth + 1);
- computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
+ Known = KnownBits::ashr(LHSKnown, RHSKnown);
+ break;
+ }
+ case TargetOpcode::G_LSHR: {
+ KnownBits LHSKnown, RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
Depth + 1);
- Known = KnownBits::shl(LHSKnown, RHSKnown);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::lshr(LHSKnown, RHSKnown);
break;
}
+ case TargetOpcode::G_SHL: {
+ KnownBits LHSKnown, RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::shl(LHSKnown, RHSKnown);
+ break;
+ }
case TargetOpcode::G_INTTOPTR:
case TargetOpcode::G_PTRTOINT:
// Fall through and handle them the same as zext/trunc.
@@ -418,50 +418,50 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known.Zero.setBitsFrom(SrcBitWidth);
break;
}
- case TargetOpcode::G_MERGE_VALUES: {
- unsigned NumOps = MI.getNumOperands();
- unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
-
- for (unsigned I = 0; I != NumOps - 1; ++I) {
- KnownBits SrcOpKnown;
- computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
- DemandedElts, Depth + 1);
- Known.insertBits(SrcOpKnown, I * OpSize);
- }
- break;
- }
- case TargetOpcode::G_UNMERGE_VALUES: {
- unsigned NumOps = MI.getNumOperands();
- Register SrcReg = MI.getOperand(NumOps - 1).getReg();
- if (MRI.getType(SrcReg).isVector())
- return; // TODO: Handle vectors.
-
- KnownBits SrcOpKnown;
- computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1);
-
- // Figure out the result operand index
- unsigned DstIdx = 0;
- for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
- ++DstIdx)
- ;
-
- Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
- break;
- }
- case TargetOpcode::G_BSWAP: {
- Register SrcReg = MI.getOperand(1).getReg();
- computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
- Known.byteSwap();
- break;
- }
- case TargetOpcode::G_BITREVERSE: {
- Register SrcReg = MI.getOperand(1).getReg();
- computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
- Known.reverseBits();
- break;
- }
- }
-
+ case TargetOpcode::G_MERGE_VALUES: {
+ unsigned NumOps = MI.getNumOperands();
+ unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+
+ for (unsigned I = 0; I != NumOps - 1; ++I) {
+ KnownBits SrcOpKnown;
+ computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
+ DemandedElts, Depth + 1);
+ Known.insertBits(SrcOpKnown, I * OpSize);
+ }
+ break;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ unsigned NumOps = MI.getNumOperands();
+ Register SrcReg = MI.getOperand(NumOps - 1).getReg();
+ if (MRI.getType(SrcReg).isVector())
+ return; // TODO: Handle vectors.
+
+ KnownBits SrcOpKnown;
+ computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1);
+
+ // Figure out the result operand index
+ unsigned DstIdx = 0;
+ for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
+ ++DstIdx)
+ ;
+
+ Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
+ break;
+ }
+ case TargetOpcode::G_BSWAP: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+ Known.byteSwap();
+ break;
+ }
+ case TargetOpcode::G_BITREVERSE: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+ Known.reverseBits();
+ break;
+ }
+ }
+
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
LLVM_DEBUG(dumpResult(MI, Known, Depth));
@@ -469,17 +469,17 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
ComputeKnownBitsCache[R] = Known;
}
-/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
-unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
- const APInt &DemandedElts,
- unsigned Depth) {
- // Test src1 first, since we canonicalize simpler expressions to the RHS.
- unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
- if (Src1SignBits == 1)
- return 1;
- return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
-}
-
+/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
+unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ // Test src1 first, since we canonicalize simpler expressions to the RHS.
+ unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
+ if (Src1SignBits == 1)
+ return 1;
+ return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
+}
+
unsigned GISelKnownBits::computeNumSignBits(Register R,
const APInt &DemandedElts,
unsigned Depth) {
@@ -523,31 +523,31 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
}
- case TargetOpcode::G_SEXT_INREG: {
- // Max of the input and what this extends.
- Register Src = MI.getOperand(1).getReg();
- unsigned SrcBits = MI.getOperand(2).getImm();
- unsigned InRegBits = TyBits - SrcBits + 1;
- return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
- }
+ case TargetOpcode::G_SEXT_INREG: {
+ // Max of the input and what this extends.
+ Register Src = MI.getOperand(1).getReg();
+ unsigned SrcBits = MI.getOperand(2).getImm();
+ unsigned InRegBits = TyBits - SrcBits + 1;
+ return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
+ }
case TargetOpcode::G_SEXTLOAD: {
- // FIXME: We need an in-memory type representation.
- if (DstTy.isVector())
- return 1;
-
- // e.g. i16->i32 = '17' bits known.
- const MachineMemOperand *MMO = *MI.memoperands_begin();
- return TyBits - MMO->getSizeInBits() + 1;
- }
- case TargetOpcode::G_ZEXTLOAD: {
- // FIXME: We need an in-memory type representation.
- if (DstTy.isVector())
- return 1;
-
- // e.g. i16->i32 = '16' bits known.
- const MachineMemOperand *MMO = *MI.memoperands_begin();
- return TyBits - MMO->getSizeInBits();
- }
+ // FIXME: We need an in-memory type representation.
+ if (DstTy.isVector())
+ return 1;
+
+ // e.g. i16->i32 = '17' bits known.
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ return TyBits - MMO->getSizeInBits() + 1;
+ }
+ case TargetOpcode::G_ZEXTLOAD: {
+ // FIXME: We need an in-memory type representation.
+ if (DstTy.isVector())
+ return 1;
+
+ // e.g. i16->i32 = '16' bits known.
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ return TyBits - MMO->getSizeInBits();
+ }
case TargetOpcode::G_TRUNC: {
Register Src = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(Src);
@@ -560,11 +560,11 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
return NumSrcSignBits - (NumSrcBits - DstTyBits);
break;
}
- case TargetOpcode::G_SELECT: {
- return computeNumSignBitsMin(MI.getOperand(2).getReg(),
- MI.getOperand(3).getReg(), DemandedElts,
- Depth + 1);
- }
+ case TargetOpcode::G_SELECT: {
+ return computeNumSignBitsMin(MI.getOperand(2).getReg(),
+ MI.getOperand(3).getReg(), DemandedElts,
+ Depth + 1);
+ }
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
default: {
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b7883cbc31..c81add2e6b 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -29,11 +29,11 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/CodeGen/SwitchLoweringUtils.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -50,13 +50,13 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -74,7 +74,7 @@
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
+#include <cstddef>
#include <cstdint>
#include <iterator>
#include <string>
@@ -95,8 +95,8 @@ INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(StackProtector)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
@@ -117,8 +117,8 @@ static void reportTranslationError(MachineFunction &MF,
ORE.emit(R);
}
-IRTranslator::IRTranslator(CodeGenOpt::Level optlevel)
- : MachineFunctionPass(ID), OptLevel(optlevel) {}
+IRTranslator::IRTranslator(CodeGenOpt::Level optlevel)
+ : MachineFunctionPass(ID), OptLevel(optlevel) {}
#ifndef NDEBUG
namespace {
@@ -162,17 +162,17 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<StackProtector>();
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelCSEAnalysisWrapperPass>();
- if (OptLevel != CodeGenOpt::None)
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ if (OptLevel != CodeGenOpt::None)
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
IRTranslator::ValueToVRegInfo::VRegListT &
IRTranslator::allocateVRegs(const Value &Val) {
- auto VRegsIt = VMap.findVRegs(Val);
- if (VRegsIt != VMap.vregs_end())
- return *VRegsIt->second;
+ auto VRegsIt = VMap.findVRegs(Val);
+ if (VRegsIt != VMap.vregs_end())
+ return *VRegsIt->second;
auto *Regs = VMap.getVRegs(Val);
auto *Offsets = VMap.getOffsets(Val);
SmallVector<LLT, 4> SplitTys;
@@ -234,9 +234,9 @@ ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
}
int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
- auto MapEntry = FrameIndices.find(&AI);
- if (MapEntry != FrameIndices.end())
- return MapEntry->second;
+ auto MapEntry = FrameIndices.find(&AI);
+ if (MapEntry != FrameIndices.end())
+ return MapEntry->second;
uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
uint64_t Size =
@@ -306,8 +306,8 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
return true;
}
-bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
- MachineIRBuilder &MIRBuilder) {
+bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
+ MachineIRBuilder &MIRBuilder) {
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Res = getOrCreateVReg(U);
uint16_t Flags = 0;
@@ -315,14 +315,14 @@ bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
}
- MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags);
+ MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags);
return true;
}
-bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
- return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder);
-}
-
+bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
+ return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder);
+}
+
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
auto *CI = dyn_cast<CmpInst>(&U);
@@ -368,289 +368,289 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
// The target may mess up with the insertion point, but
// this is not important as a return is the last instruction
// of the block anyway.
- return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg);
-}
-
-void IRTranslator::emitBranchForMergedCondition(
- const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
- MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
- BranchProbability TProb, BranchProbability FProb, bool InvertCond) {
- // If the leaf of the tree is a comparison, merge the condition into
- // the caseblock.
- if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
- CmpInst::Predicate Condition;
- if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
- Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate();
- } else {
- const FCmpInst *FC = cast<FCmpInst>(Cond);
- Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate();
- }
-
- SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0),
- BOp->getOperand(1), nullptr, TBB, FBB, CurBB,
- CurBuilder->getDebugLoc(), TProb, FProb);
- SL->SwitchCases.push_back(CB);
- return;
- }
-
- // Create a CaseBlock record representing this branch.
- CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
- SwitchCG::CaseBlock CB(
- Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()),
- nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb);
- SL->SwitchCases.push_back(CB);
-}
-
-static bool isValInBlock(const Value *V, const BasicBlock *BB) {
- if (const Instruction *I = dyn_cast<Instruction>(V))
- return I->getParent() == BB;
- return true;
-}
-
-void IRTranslator::findMergedConditions(
- const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
- MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
- Instruction::BinaryOps Opc, BranchProbability TProb,
- BranchProbability FProb, bool InvertCond) {
- using namespace PatternMatch;
- assert((Opc == Instruction::And || Opc == Instruction::Or) &&
- "Expected Opc to be AND/OR");
- // Skip over not part of the tree and remember to invert op and operands at
- // next level.
- Value *NotCond;
- if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
- isValInBlock(NotCond, CurBB->getBasicBlock())) {
- findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
- !InvertCond);
- return;
- }
-
- const Instruction *BOp = dyn_cast<Instruction>(Cond);
- const Value *BOpOp0, *BOpOp1;
- // Compute the effective opcode for Cond, taking into account whether it needs
- // to be inverted, e.g.
- // and (not (or A, B)), C
- // gets lowered as
- // and (and (not A, not B), C)
- Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
- if (BOp) {
- BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
- ? Instruction::And
- : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
- ? Instruction::Or
- : (Instruction::BinaryOps)0);
- if (InvertCond) {
- if (BOpc == Instruction::And)
- BOpc = Instruction::Or;
- else if (BOpc == Instruction::Or)
- BOpc = Instruction::And;
- }
- }
-
- // If this node is not part of the or/and tree, emit it as a branch.
- // Note that all nodes in the tree should have same opcode.
- bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
- if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
- !isValInBlock(BOpOp0, CurBB->getBasicBlock()) ||
- !isValInBlock(BOpOp1, CurBB->getBasicBlock())) {
- emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb,
- InvertCond);
- return;
- }
-
- // Create TmpBB after CurBB.
- MachineFunction::iterator BBI(CurBB);
- MachineBasicBlock *TmpBB =
- MF->CreateMachineBasicBlock(CurBB->getBasicBlock());
- CurBB->getParent()->insert(++BBI, TmpBB);
-
- if (Opc == Instruction::Or) {
- // Codegen X | Y as:
- // BB1:
- // jmp_if_X TBB
- // jmp TmpBB
- // TmpBB:
- // jmp_if_Y TBB
- // jmp FBB
- //
-
- // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
- // The requirement is that
- // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
- // = TrueProb for original BB.
- // Assuming the original probabilities are A and B, one choice is to set
- // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
- // A/(1+B) and 2B/(1+B). This choice assumes that
- // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
- // Another choice is to assume TrueProb for BB1 equals to TrueProb for
- // TmpBB, but the math is more complicated.
-
- auto NewTrueProb = TProb / 2;
- auto NewFalseProb = TProb / 2 + FProb;
- // Emit the LHS condition.
- findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
- NewFalseProb, InvertCond);
-
- // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
- SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
- BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
- // Emit the RHS condition into TmpBB.
- findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
- Probs[1], InvertCond);
- } else {
- assert(Opc == Instruction::And && "Unknown merge op!");
- // Codegen X & Y as:
- // BB1:
- // jmp_if_X TmpBB
- // jmp FBB
- // TmpBB:
- // jmp_if_Y TBB
- // jmp FBB
- //
- // This requires creation of TmpBB after CurBB.
-
- // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
- // The requirement is that
- // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
- // = FalseProb for original BB.
- // Assuming the original probabilities are A and B, one choice is to set
- // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
- // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
- // TrueProb for BB1 * FalseProb for TmpBB.
-
- auto NewTrueProb = TProb + FProb / 2;
- auto NewFalseProb = FProb / 2;
- // Emit the LHS condition.
- findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
- NewFalseProb, InvertCond);
-
- // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
- SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
- BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
- // Emit the RHS condition into TmpBB.
- findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
- Probs[1], InvertCond);
- }
-}
-
-bool IRTranslator::shouldEmitAsBranches(
- const std::vector<SwitchCG::CaseBlock> &Cases) {
- // For multiple cases, it's better to emit as branches.
- if (Cases.size() != 2)
- return true;
-
- // If this is two comparisons of the same values or'd or and'd together, they
- // will get folded into a single comparison, so don't emit two blocks.
- if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
- Cases[0].CmpRHS == Cases[1].CmpRHS) ||
- (Cases[0].CmpRHS == Cases[1].CmpLHS &&
- Cases[0].CmpLHS == Cases[1].CmpRHS)) {
- return false;
- }
-
- // Handle: (X != null) | (Y != null) --> (X|Y) != 0
- // Handle: (X == null) & (Y == null) --> (X|Y) == 0
- if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
- Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred &&
- isa<Constant>(Cases[0].CmpRHS) &&
- cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
- if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ &&
- Cases[0].TrueBB == Cases[1].ThisBB)
- return false;
- if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE &&
- Cases[0].FalseBB == Cases[1].ThisBB)
- return false;
- }
-
- return true;
-}
-
+ return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg);
+}
+
+void IRTranslator::emitBranchForMergedCondition(
+ const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
+ BranchProbability TProb, BranchProbability FProb, bool InvertCond) {
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+ CmpInst::Predicate Condition;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate();
+ } else {
+ const FCmpInst *FC = cast<FCmpInst>(Cond);
+ Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate();
+ }
+
+ SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0),
+ BOp->getOperand(1), nullptr, TBB, FBB, CurBB,
+ CurBuilder->getDebugLoc(), TProb, FProb);
+ SL->SwitchCases.push_back(CB);
+ return;
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
+ SwitchCG::CaseBlock CB(
+ Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()),
+ nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb);
+ SL->SwitchCases.push_back(CB);
+}
+
+static bool isValInBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+void IRTranslator::findMergedConditions(
+ const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc, BranchProbability TProb,
+ BranchProbability FProb, bool InvertCond) {
+ using namespace PatternMatch;
+ assert((Opc == Instruction::And || Opc == Instruction::Or) &&
+ "Expected Opc to be AND/OR");
+ // Skip over not part of the tree and remember to invert op and operands at
+ // next level.
+ Value *NotCond;
+ if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
+ isValInBlock(NotCond, CurBB->getBasicBlock())) {
+ findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+ !InvertCond);
+ return;
+ }
+
+ const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ const Value *BOpOp0, *BOpOp1;
+ // Compute the effective opcode for Cond, taking into account whether it needs
+ // to be inverted, e.g.
+ // and (not (or A, B)), C
+ // gets lowered as
+ // and (and (not A, not B), C)
+ Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
+ if (BOp) {
+ BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::And
+ : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::Or
+ : (Instruction::BinaryOps)0);
+ if (InvertCond) {
+ if (BOpc == Instruction::And)
+ BOpc = Instruction::Or;
+ else if (BOpc == Instruction::Or)
+ BOpc = Instruction::And;
+ }
+ }
+
+ // If this node is not part of the or/and tree, emit it as a branch.
+ // Note that all nodes in the tree should have same opcode.
+ bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
+ if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
+ !isValInBlock(BOpOp0, CurBB->getBasicBlock()) ||
+ !isValInBlock(BOpOp1, CurBB->getBasicBlock())) {
+ emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb,
+ InvertCond);
+ return;
+ }
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI(CurBB);
+ MachineBasicBlock *TmpBB =
+ MF->CreateMachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // BB1:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+ // = TrueProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
+ // A/(1+B) and 2B/(1+B). This choice assumes that
+ // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+ // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+ // TmpBB, but the math is more complicated.
+
+ auto NewTrueProb = TProb / 2;
+ auto NewFalseProb = TProb / 2 + FProb;
+ // Emit the LHS condition.
+ findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
+
+ // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
+ SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // BB1:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+ // = FalseProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
+ // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
+ // TrueProb for BB1 * FalseProb for TmpBB.
+
+ auto NewTrueProb = TProb + FProb / 2;
+ auto NewFalseProb = FProb / 2;
+ // Emit the LHS condition.
+ findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
+
+ // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
+ SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
+ }
+}
+
+bool IRTranslator::shouldEmitAsBranches(
+ const std::vector<SwitchCG::CaseBlock> &Cases) {
+ // For multiple cases, it's better to emit as branches.
+ if (Cases.size() != 2)
+ return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+ // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+ if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+ Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred &&
+ isa<Constant>(Cases[0].CmpRHS) &&
+ cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+ if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ &&
+ Cases[0].TrueBB == Cases[1].ThisBB)
+ return false;
+ if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE &&
+ Cases[0].FalseBB == Cases[1].ThisBB)
+ return false;
+ }
+
+ return true;
+}
+
bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
const BranchInst &BrInst = cast<BranchInst>(U);
- auto &CurMBB = MIRBuilder.getMBB();
- auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0));
-
- if (BrInst.isUnconditional()) {
- // If the unconditional target is the layout successor, fallthrough.
- if (!CurMBB.isLayoutSuccessor(Succ0MBB))
- MIRBuilder.buildBr(*Succ0MBB);
-
- // Link successors.
- for (const BasicBlock *Succ : successors(&BrInst))
- CurMBB.addSuccessor(&getMBB(*Succ));
- return true;
- }
-
- // If this condition is one of the special cases we handle, do special stuff
- // now.
- const Value *CondVal = BrInst.getCondition();
- MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1));
-
- const auto &TLI = *MF->getSubtarget().getTargetLowering();
-
- // If this is a series of conditions that are or'd or and'd together, emit
- // this as a sequence of branches instead of setcc's with and/or operations.
- // As long as jumps are not expensive (exceptions for multi-use logic ops,
- // unpredictable branches, and vector extracts because those jumps are likely
- // expensive for any target), this should improve performance.
- // For example, instead of something like:
- // cmp A, B
- // C = seteq
- // cmp D, E
- // F = setle
- // or C, F
- // jnz foo
- // Emit:
- // cmp A, B
- // je foo
- // cmp D, E
- // jle foo
- using namespace PatternMatch;
- const Instruction *CondI = dyn_cast<Instruction>(CondVal);
- if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() &&
- !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) {
- Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
- Value *Vec;
- const Value *BOp0, *BOp1;
- if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
- Opcode = Instruction::And;
- else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
- Opcode = Instruction::Or;
-
- if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
- match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
- findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode,
- getEdgeProbability(&CurMBB, Succ0MBB),
- getEdgeProbability(&CurMBB, Succ1MBB),
- /*InvertCond=*/false);
- assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!");
-
- // Allow some cases to be rejected.
- if (shouldEmitAsBranches(SL->SwitchCases)) {
- // Emit the branch for this block.
- emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder);
- SL->SwitchCases.erase(SL->SwitchCases.begin());
- return true;
- }
-
- // Okay, we decided not to do this, remove any inserted MBB's and clear
- // SwitchCases.
- for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I)
- MF->erase(SL->SwitchCases[I].ThisBB);
-
- SL->SwitchCases.clear();
- }
- }
-
- // Create a CaseBlock record representing this branch.
- SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal,
- ConstantInt::getTrue(MF->getFunction().getContext()),
- nullptr, Succ0MBB, Succ1MBB, &CurMBB,
- CurBuilder->getDebugLoc());
-
- // Use emitSwitchCase to actually insert the fast branch sequence for this
- // cond branch.
- emitSwitchCase(CB, &CurMBB, *CurBuilder);
+ auto &CurMBB = MIRBuilder.getMBB();
+ auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0));
+
+ if (BrInst.isUnconditional()) {
+ // If the unconditional target is the layout successor, fallthrough.
+ if (!CurMBB.isLayoutSuccessor(Succ0MBB))
+ MIRBuilder.buildBr(*Succ0MBB);
+
+ // Link successors.
+ for (const BasicBlock *Succ : successors(&BrInst))
+ CurMBB.addSuccessor(&getMBB(*Succ));
+ return true;
+ }
+
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ const Value *CondVal = BrInst.getCondition();
+ MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1));
+
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive (exceptions for multi-use logic ops,
+ // unpredictable branches, and vector extracts because those jumps are likely
+ // expensive for any target), this should improve performance.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ using namespace PatternMatch;
+ const Instruction *CondI = dyn_cast<Instruction>(CondVal);
+ if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() &&
+ !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) {
+ Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
+ Value *Vec;
+ const Value *BOp0, *BOp1;
+ if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::And;
+ else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::Or;
+
+ if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+ match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
+ findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode,
+ getEdgeProbability(&CurMBB, Succ0MBB),
+ getEdgeProbability(&CurMBB, Succ1MBB),
+ /*InvertCond=*/false);
+ assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (shouldEmitAsBranches(SL->SwitchCases)) {
+ // Emit the branch for this block.
+ emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder);
+ SL->SwitchCases.erase(SL->SwitchCases.begin());
+ return true;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I)
+ MF->erase(SL->SwitchCases[I].ThisBB);
+
+ SL->SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal,
+ ConstantInt::getTrue(MF->getFunction().getContext()),
+ nullptr, Succ0MBB, Succ1MBB, &CurMBB,
+ CurBuilder->getDebugLoc());
+
+ // Use emitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ emitSwitchCase(CB, &CurMBB, *CurBuilder);
return true;
}
@@ -715,7 +715,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
}
SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
- SL->findBitTestClusters(Clusters, &SI);
+ SL->findBitTestClusters(Clusters, &SI);
LLVM_DEBUG({
dbgs() << "Case clusters: ";
@@ -836,22 +836,22 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
const LLT i1Ty = LLT::scalar(1);
// Build the compare.
if (!CB.CmpMHS) {
- const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS);
- // For conditional branch lowering, we might try to do something silly like
- // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
- // just re-use the existing condition vreg.
- if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
- CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
- Cond = CondLHS;
- } else {
- Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
- if (CmpInst::isFPPredicate(CB.PredInfo.Pred))
- Cond =
- MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
- else
- Cond =
- MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
- }
+ const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS);
+ // For conditional branch lowering, we might try to do something silly like
+ // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
+ // just re-use the existing condition vreg.
+ if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
+ CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
+ Cond = CondLHS;
+ } else {
+ Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
+ if (CmpInst::isFPPredicate(CB.PredInfo.Pred))
+ Cond =
+ MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+ else
+ Cond =
+ MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+ }
} else {
assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
"Can only handle SLE ranges");
@@ -884,8 +884,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
CB.ThisBB->normalizeSuccProbs();
- addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
- CB.ThisBB);
+ addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
+ CB.ThisBB);
MIB.buildBrCond(Cond, *CB.TrueBB);
MIB.buildBr(*CB.FalseBB);
@@ -998,156 +998,156 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
return true;
}
-void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
- MachineBasicBlock *SwitchBB) {
- MachineIRBuilder &MIB = *CurBuilder;
- MIB.setMBB(*SwitchBB);
-
- // Subtract the minimum value.
- Register SwitchOpReg = getOrCreateVReg(*B.SValue);
-
- LLT SwitchOpTy = MRI->getType(SwitchOpReg);
- Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
- auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
-
- // Ensure that the type will fit the mask value.
- LLT MaskTy = SwitchOpTy;
- for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
- if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
- // Switch table case range are encoded into series of masks.
- // Just use pointer type, it's guaranteed to fit.
- MaskTy = LLT::scalar(64);
- break;
- }
- }
- Register SubReg = RangeSub.getReg(0);
- if (SwitchOpTy != MaskTy)
- SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0);
-
- B.RegVT = getMVTForLLT(MaskTy);
- B.Reg = SubReg;
-
- MachineBasicBlock *MBB = B.Cases[0].ThisBB;
-
- if (!B.OmitRangeCheck)
- addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
- addSuccessorWithProb(SwitchBB, MBB, B.Prob);
-
- SwitchBB->normalizeSuccProbs();
-
- if (!B.OmitRangeCheck) {
- // Conditional branch to the default block.
- auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
- auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
- RangeSub, RangeCst);
- MIB.buildBrCond(RangeCmp, *B.Default);
- }
-
- // Avoid emitting unnecessary branches to the next block.
- if (MBB != SwitchBB->getNextNode())
- MIB.buildBr(*MBB);
-}
-
-void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
- MachineBasicBlock *NextMBB,
- BranchProbability BranchProbToNext,
- Register Reg, SwitchCG::BitTestCase &B,
- MachineBasicBlock *SwitchBB) {
- MachineIRBuilder &MIB = *CurBuilder;
- MIB.setMBB(*SwitchBB);
-
- LLT SwitchTy = getLLTForMVT(BB.RegVT);
- Register Cmp;
- unsigned PopCount = countPopulation(B.Mask);
- if (PopCount == 1) {
- // Testing for a single bit; just compare the shift count with what it
- // would need to be to shift a 1 bit in that position.
- auto MaskTrailingZeros =
- MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask));
- Cmp =
- MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
- .getReg(0);
- } else if (PopCount == BB.Range) {
- // There is only one zero bit in the range, test for it directly.
- auto MaskTrailingOnes =
- MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask));
- Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
- .getReg(0);
- } else {
- // Make desired shift.
- auto CstOne = MIB.buildConstant(SwitchTy, 1);
- auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg);
-
- // Emit bit tests and jumps.
- auto CstMask = MIB.buildConstant(SwitchTy, B.Mask);
- auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask);
- auto CstZero = MIB.buildConstant(SwitchTy, 0);
- Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero)
- .getReg(0);
- }
-
- // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
- addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
- // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
- addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
- // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
- // one as they are relative probabilities (and thus work more like weights),
- // and hence we need to normalize them to let the sum of them become one.
- SwitchBB->normalizeSuccProbs();
-
- // Record the fact that the IR edge from the header to the bit test target
- // will go through our new block. Neeeded for PHIs to have nodes added.
- addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()},
- SwitchBB);
-
- MIB.buildBrCond(Cmp, *B.TargetBB);
-
- // Avoid emitting unnecessary branches to the next block.
- if (NextMBB != SwitchBB->getNextNode())
- MIB.buildBr(*NextMBB);
-}
-
-bool IRTranslator::lowerBitTestWorkItem(
- SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
- MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
- MachineIRBuilder &MIB, MachineFunction::iterator BBI,
- BranchProbability DefaultProb, BranchProbability UnhandledProbs,
- SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
- bool FallthroughUnreachable) {
- using namespace SwitchCG;
- MachineFunction *CurMF = SwitchMBB->getParent();
- // FIXME: Optimize away range check based on pivot comparisons.
- BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
- // The bit test blocks haven't been inserted yet; insert them here.
- for (BitTestCase &BTC : BTB->Cases)
- CurMF->insert(BBI, BTC.ThisBB);
-
- // Fill in fields of the BitTestBlock.
- BTB->Parent = CurMBB;
- BTB->Default = Fallthrough;
-
- BTB->DefaultProb = UnhandledProbs;
- // If the cases in bit test don't form a contiguous range, we evenly
- // distribute the probability on the edge to Fallthrough to two
- // successors of CurMBB.
- if (!BTB->ContiguousRange) {
- BTB->Prob += DefaultProb / 2;
- BTB->DefaultProb -= DefaultProb / 2;
- }
-
- if (FallthroughUnreachable) {
- // Skip the range check if the fallthrough block is unreachable.
- BTB->OmitRangeCheck = true;
- }
-
- // If we're in the right place, emit the bit test header right now.
- if (CurMBB == SwitchMBB) {
- emitBitTestHeader(*BTB, SwitchMBB);
- BTB->Emitted = true;
- }
- return true;
-}
-
+void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
+ MachineBasicBlock *SwitchBB) {
+ MachineIRBuilder &MIB = *CurBuilder;
+ MIB.setMBB(*SwitchBB);
+
+ // Subtract the minimum value.
+ Register SwitchOpReg = getOrCreateVReg(*B.SValue);
+
+ LLT SwitchOpTy = MRI->getType(SwitchOpReg);
+ Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
+ auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
+
+ // Ensure that the type will fit the mask value.
+ LLT MaskTy = SwitchOpTy;
+ for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
+ if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ MaskTy = LLT::scalar(64);
+ break;
+ }
+ }
+ Register SubReg = RangeSub.getReg(0);
+ if (SwitchOpTy != MaskTy)
+ SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0);
+
+ B.RegVT = getMVTForLLT(MaskTy);
+ B.Reg = SubReg;
+
+ MachineBasicBlock *MBB = B.Cases[0].ThisBB;
+
+ if (!B.OmitRangeCheck)
+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+ addSuccessorWithProb(SwitchBB, MBB, B.Prob);
+
+ SwitchBB->normalizeSuccProbs();
+
+ if (!B.OmitRangeCheck) {
+ // Conditional branch to the default block.
+ auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
+ auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
+ RangeSub, RangeCst);
+ MIB.buildBrCond(RangeCmp, *B.Default);
+ }
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (MBB != SwitchBB->getNextNode())
+ MIB.buildBr(*MBB);
+}
+
+void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
+ MachineBasicBlock *NextMBB,
+ BranchProbability BranchProbToNext,
+ Register Reg, SwitchCG::BitTestCase &B,
+ MachineBasicBlock *SwitchBB) {
+ MachineIRBuilder &MIB = *CurBuilder;
+ MIB.setMBB(*SwitchBB);
+
+ LLT SwitchTy = getLLTForMVT(BB.RegVT);
+ Register Cmp;
+ unsigned PopCount = countPopulation(B.Mask);
+ if (PopCount == 1) {
+ // Testing for a single bit; just compare the shift count with what it
+ // would need to be to shift a 1 bit in that position.
+ auto MaskTrailingZeros =
+ MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask));
+ Cmp =
+ MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
+ .getReg(0);
+ } else if (PopCount == BB.Range) {
+ // There is only one zero bit in the range, test for it directly.
+ auto MaskTrailingOnes =
+ MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask));
+ Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
+ .getReg(0);
+ } else {
+ // Make desired shift.
+ auto CstOne = MIB.buildConstant(SwitchTy, 1);
+ auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg);
+
+ // Emit bit tests and jumps.
+ auto CstMask = MIB.buildConstant(SwitchTy, B.Mask);
+ auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask);
+ auto CstZero = MIB.buildConstant(SwitchTy, 0);
+ Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero)
+ .getReg(0);
+ }
+
+ // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
+ addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
+ // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
+ addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
+ // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
+ // one as they are relative probabilities (and thus work more like weights),
+ // and hence we need to normalize them to let the sum of them become one.
+ SwitchBB->normalizeSuccProbs();
+
+ // Record the fact that the IR edge from the header to the bit test target
+ // will go through our new block. Neeeded for PHIs to have nodes added.
+ addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()},
+ SwitchBB);
+
+ MIB.buildBrCond(Cmp, *B.TargetBB);
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (NextMBB != SwitchBB->getNextNode())
+ MIB.buildBr(*NextMBB);
+}
+
+bool IRTranslator::lowerBitTestWorkItem(
+ SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB, MachineFunction::iterator BBI,
+ BranchProbability DefaultProb, BranchProbability UnhandledProbs,
+ SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable) {
+ using namespace SwitchCG;
+ MachineFunction *CurMF = SwitchMBB->getParent();
+ // FIXME: Optimize away range check based on pivot comparisons.
+ BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
+ // The bit test blocks haven't been inserted yet; insert them here.
+ for (BitTestCase &BTC : BTB->Cases)
+ CurMF->insert(BBI, BTC.ThisBB);
+
+ // Fill in fields of the BitTestBlock.
+ BTB->Parent = CurMBB;
+ BTB->Default = Fallthrough;
+
+ BTB->DefaultProb = UnhandledProbs;
+ // If the cases in bit test don't form a contiguous range, we evenly
+ // distribute the probability on the edge to Fallthrough to two
+ // successors of CurMBB.
+ if (!BTB->ContiguousRange) {
+ BTB->Prob += DefaultProb / 2;
+ BTB->DefaultProb -= DefaultProb / 2;
+ }
+
+ if (FallthroughUnreachable) {
+ // Skip the range check if the fallthrough block is unreachable.
+ BTB->OmitRangeCheck = true;
+ }
+
+ // If we're in the right place, emit the bit test header right now.
+ if (CurMBB == SwitchMBB) {
+ emitBitTestHeader(*BTB, SwitchMBB);
+ BTB->Emitted = true;
+ }
+ return true;
+}
+
bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
Value *Cond,
MachineBasicBlock *SwitchMBB,
@@ -1208,15 +1208,15 @@ bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
switch (I->Kind) {
case CC_BitTests: {
- if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
- DefaultProb, UnhandledProbs, I, Fallthrough,
- FallthroughUnreachable)) {
- LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch");
- return false;
- }
- break;
+ if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
+ DefaultProb, UnhandledProbs, I, Fallthrough,
+ FallthroughUnreachable)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch");
+ return false;
+ }
+ break;
}
-
+
case CC_JumpTable: {
if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
UnhandledProbs, I, Fallthrough,
@@ -1557,34 +1557,34 @@ bool IRTranslator::translateGetElementPtr(const User &U,
bool IRTranslator::translateMemFunc(const CallInst &CI,
MachineIRBuilder &MIRBuilder,
- unsigned Opcode) {
+ unsigned Opcode) {
// If the source is undef, then just emit a nop.
if (isa<UndefValue>(CI.getArgOperand(1)))
return true;
- SmallVector<Register, 3> SrcRegs;
-
- unsigned MinPtrSize = UINT_MAX;
- for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) {
- Register SrcReg = getOrCreateVReg(**AI);
- LLT SrcTy = MRI->getType(SrcReg);
- if (SrcTy.isPointer())
- MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize);
- SrcRegs.push_back(SrcReg);
- }
-
- LLT SizeTy = LLT::scalar(MinPtrSize);
-
- // The size operand should be the minimum of the pointer sizes.
- Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1];
- if (MRI->getType(SizeOpReg) != SizeTy)
- SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0);
-
- auto ICall = MIRBuilder.buildInstr(Opcode);
- for (Register SrcReg : SrcRegs)
- ICall.addUse(SrcReg);
-
+ SmallVector<Register, 3> SrcRegs;
+
+ unsigned MinPtrSize = UINT_MAX;
+ for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) {
+ Register SrcReg = getOrCreateVReg(**AI);
+ LLT SrcTy = MRI->getType(SrcReg);
+ if (SrcTy.isPointer())
+ MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize);
+ SrcRegs.push_back(SrcReg);
+ }
+
+ LLT SizeTy = LLT::scalar(MinPtrSize);
+
+ // The size operand should be the minimum of the pointer sizes.
+ Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1];
+ if (MRI->getType(SizeOpReg) != SizeTy)
+ SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0);
+
+ auto ICall = MIRBuilder.buildInstr(Opcode);
+ for (Register SrcReg : SrcRegs)
+ ICall.addUse(SrcReg);
+
Align DstAlign;
Align SrcAlign;
unsigned IsVol =
@@ -1612,7 +1612,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
ICall.addMemOperand(MF->getMachineMemOperand(
MachinePointerInfo(CI.getArgOperand(0)),
MachineMemOperand::MOStore | VolFlag, 1, DstAlign));
- if (Opcode != TargetOpcode::G_MEMSET)
+ if (Opcode != TargetOpcode::G_MEMSET)
ICall.addMemOperand(MF->getMachineMemOperand(
MachinePointerInfo(CI.getArgOperand(1)),
MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign));
@@ -1651,16 +1651,16 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
return true;
}
-bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
- MachineIRBuilder &MIRBuilder) {
- Register Dst = getOrCreateVReg(CI);
- Register Src0 = getOrCreateVReg(*CI.getOperand(0));
- Register Src1 = getOrCreateVReg(*CI.getOperand(1));
- uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
- MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale });
- return true;
-}
-
+bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
+ MachineIRBuilder &MIRBuilder) {
+ Register Dst = getOrCreateVReg(CI);
+ Register Src0 = getOrCreateVReg(*CI.getOperand(0));
+ Register Src1 = getOrCreateVReg(*CI.getOperand(1));
+ uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
+ MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale });
+ return true;
+}
+
unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
switch (ID) {
default:
@@ -1711,14 +1711,14 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_FNEARBYINT;
case Intrinsic::pow:
return TargetOpcode::G_FPOW;
- case Intrinsic::powi:
- return TargetOpcode::G_FPOWI;
+ case Intrinsic::powi:
+ return TargetOpcode::G_FPOWI;
case Intrinsic::rint:
return TargetOpcode::G_FRINT;
case Intrinsic::round:
return TargetOpcode::G_INTRINSIC_ROUND;
- case Intrinsic::roundeven:
- return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
+ case Intrinsic::roundeven:
+ return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
case Intrinsic::sin:
return TargetOpcode::G_FSIN;
case Intrinsic::sqrt:
@@ -1729,31 +1729,31 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_READCYCLECOUNTER;
case Intrinsic::ptrmask:
return TargetOpcode::G_PTRMASK;
- case Intrinsic::lrint:
- return TargetOpcode::G_INTRINSIC_LRINT;
- // FADD/FMUL require checking the FMF, so are handled elsewhere.
- case Intrinsic::vector_reduce_fmin:
- return TargetOpcode::G_VECREDUCE_FMIN;
- case Intrinsic::vector_reduce_fmax:
- return TargetOpcode::G_VECREDUCE_FMAX;
- case Intrinsic::vector_reduce_add:
- return TargetOpcode::G_VECREDUCE_ADD;
- case Intrinsic::vector_reduce_mul:
- return TargetOpcode::G_VECREDUCE_MUL;
- case Intrinsic::vector_reduce_and:
- return TargetOpcode::G_VECREDUCE_AND;
- case Intrinsic::vector_reduce_or:
- return TargetOpcode::G_VECREDUCE_OR;
- case Intrinsic::vector_reduce_xor:
- return TargetOpcode::G_VECREDUCE_XOR;
- case Intrinsic::vector_reduce_smax:
- return TargetOpcode::G_VECREDUCE_SMAX;
- case Intrinsic::vector_reduce_smin:
- return TargetOpcode::G_VECREDUCE_SMIN;
- case Intrinsic::vector_reduce_umax:
- return TargetOpcode::G_VECREDUCE_UMAX;
- case Intrinsic::vector_reduce_umin:
- return TargetOpcode::G_VECREDUCE_UMIN;
+ case Intrinsic::lrint:
+ return TargetOpcode::G_INTRINSIC_LRINT;
+ // FADD/FMUL require checking the FMF, so are handled elsewhere.
+ case Intrinsic::vector_reduce_fmin:
+ return TargetOpcode::G_VECREDUCE_FMIN;
+ case Intrinsic::vector_reduce_fmax:
+ return TargetOpcode::G_VECREDUCE_FMAX;
+ case Intrinsic::vector_reduce_add:
+ return TargetOpcode::G_VECREDUCE_ADD;
+ case Intrinsic::vector_reduce_mul:
+ return TargetOpcode::G_VECREDUCE_MUL;
+ case Intrinsic::vector_reduce_and:
+ return TargetOpcode::G_VECREDUCE_AND;
+ case Intrinsic::vector_reduce_or:
+ return TargetOpcode::G_VECREDUCE_OR;
+ case Intrinsic::vector_reduce_xor:
+ return TargetOpcode::G_VECREDUCE_XOR;
+ case Intrinsic::vector_reduce_smax:
+ return TargetOpcode::G_VECREDUCE_SMAX;
+ case Intrinsic::vector_reduce_smin:
+ return TargetOpcode::G_VECREDUCE_SMIN;
+ case Intrinsic::vector_reduce_umax:
+ return TargetOpcode::G_VECREDUCE_UMAX;
+ case Intrinsic::vector_reduce_umin:
+ return TargetOpcode::G_VECREDUCE_UMIN;
}
return Intrinsic::not_intrinsic;
}
@@ -1846,7 +1846,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// Get the underlying objects for the location passed on the lifetime
// marker.
SmallVector<const Value *, 4> Allocas;
- getUnderlyingObjects(CI.getArgOperand(1), Allocas);
+ getUnderlyingObjects(CI.getArgOperand(1), Allocas);
// Iterate over each underlying object, creating lifetime markers for each
// static alloca. Quit if we find a non-static alloca.
@@ -1960,37 +1960,37 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder);
case Intrinsic::ssub_sat:
return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder);
- case Intrinsic::ushl_sat:
- return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder);
- case Intrinsic::sshl_sat:
- return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder);
- case Intrinsic::umin:
- return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder);
- case Intrinsic::umax:
- return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder);
- case Intrinsic::smin:
- return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder);
- case Intrinsic::smax:
- return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder);
- case Intrinsic::abs:
- // TODO: Preserve "int min is poison" arg in GMIR?
- return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
- case Intrinsic::smul_fix:
- return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder);
- case Intrinsic::umul_fix:
- return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
- case Intrinsic::smul_fix_sat:
- return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
- case Intrinsic::umul_fix_sat:
- return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
- case Intrinsic::sdiv_fix:
- return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
- case Intrinsic::udiv_fix:
- return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
- case Intrinsic::sdiv_fix_sat:
- return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
- case Intrinsic::udiv_fix_sat:
- return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
+ case Intrinsic::ushl_sat:
+ return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder);
+ case Intrinsic::sshl_sat:
+ return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder);
+ case Intrinsic::umin:
+ return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder);
+ case Intrinsic::umax:
+ return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder);
+ case Intrinsic::smin:
+ return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder);
+ case Intrinsic::smax:
+ return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder);
+ case Intrinsic::abs:
+ // TODO: Preserve "int min is poison" arg in GMIR?
+ return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
+ case Intrinsic::smul_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder);
+ case Intrinsic::umul_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
+ case Intrinsic::smul_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
+ case Intrinsic::umul_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
+ case Intrinsic::sdiv_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
+ case Intrinsic::udiv_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
+ case Intrinsic::sdiv_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
+ case Intrinsic::udiv_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
case Intrinsic::fmuladd: {
const TargetMachine &TM = MF->getTarget();
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
@@ -2014,24 +2014,24 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
}
return true;
}
- case Intrinsic::convert_from_fp16:
- // FIXME: This intrinsic should probably be removed from the IR.
- MIRBuilder.buildFPExt(getOrCreateVReg(CI),
- getOrCreateVReg(*CI.getArgOperand(0)),
- MachineInstr::copyFlagsFromInstruction(CI));
- return true;
- case Intrinsic::convert_to_fp16:
- // FIXME: This intrinsic should probably be removed from the IR.
- MIRBuilder.buildFPTrunc(getOrCreateVReg(CI),
- getOrCreateVReg(*CI.getArgOperand(0)),
- MachineInstr::copyFlagsFromInstruction(CI));
- return true;
+ case Intrinsic::convert_from_fp16:
+ // FIXME: This intrinsic should probably be removed from the IR.
+ MIRBuilder.buildFPExt(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)),
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ case Intrinsic::convert_to_fp16:
+ // FIXME: This intrinsic should probably be removed from the IR.
+ MIRBuilder.buildFPTrunc(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)),
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
case Intrinsic::memcpy:
- return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
case Intrinsic::memmove:
- return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE);
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE);
case Intrinsic::memset:
- return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET);
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET);
case Intrinsic::eh_typeid_for: {
GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
Register Reg = getOrCreateVReg(CI);
@@ -2114,18 +2114,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
}
case Intrinsic::invariant_end:
return true;
- case Intrinsic::expect:
- case Intrinsic::annotation:
- case Intrinsic::ptr_annotation:
- case Intrinsic::launder_invariant_group:
- case Intrinsic::strip_invariant_group: {
- // Drop the intrinsic, but forward the value.
- MIRBuilder.buildCopy(getOrCreateVReg(CI),
- getOrCreateVReg(*CI.getArgOperand(0)));
- return true;
- }
+ case Intrinsic::expect:
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group: {
+ // Drop the intrinsic, but forward the value.
+ MIRBuilder.buildCopy(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ }
case Intrinsic::assume:
- case Intrinsic::experimental_noalias_scope_decl:
+ case Intrinsic::experimental_noalias_scope_decl:
case Intrinsic::var_annotation:
case Intrinsic::sideeffect:
// Discard annotate attributes, assumptions, and artificial side-effects.
@@ -2145,68 +2145,68 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
.addUse(getOrCreateVReg(*CI.getArgOperand(1)));
return true;
}
- case Intrinsic::localescape: {
- MachineBasicBlock &EntryMBB = MF->front();
- StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName());
-
- // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
- // is the same on all targets.
- for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) {
- Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
- if (isa<ConstantPointerNull>(Arg))
- continue; // Skip null pointers. They represent a hole in index space.
-
- int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg));
- MCSymbol *FrameAllocSym =
- MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName,
- Idx);
-
- // This should be inserted at the start of the entry block.
- auto LocalEscape =
- MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE)
- .addSym(FrameAllocSym)
- .addFrameIndex(FI);
-
- EntryMBB.insert(EntryMBB.begin(), LocalEscape);
- }
-
- return true;
- }
- case Intrinsic::vector_reduce_fadd:
- case Intrinsic::vector_reduce_fmul: {
- // Need to check for the reassoc flag to decide whether we want a
- // sequential reduction opcode or not.
- Register Dst = getOrCreateVReg(CI);
- Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0));
- Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1));
- unsigned Opc = 0;
- if (!CI.hasAllowReassoc()) {
- // The sequential ordering case.
- Opc = ID == Intrinsic::vector_reduce_fadd
- ? TargetOpcode::G_VECREDUCE_SEQ_FADD
- : TargetOpcode::G_VECREDUCE_SEQ_FMUL;
- MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
- MachineInstr::copyFlagsFromInstruction(CI));
- return true;
- }
- // We split the operation into a separate G_FADD/G_FMUL + the reduce,
- // since the associativity doesn't matter.
- unsigned ScalarOpc;
- if (ID == Intrinsic::vector_reduce_fadd) {
- Opc = TargetOpcode::G_VECREDUCE_FADD;
- ScalarOpc = TargetOpcode::G_FADD;
- } else {
- Opc = TargetOpcode::G_VECREDUCE_FMUL;
- ScalarOpc = TargetOpcode::G_FMUL;
- }
- LLT DstTy = MRI->getType(Dst);
- auto Rdx = MIRBuilder.buildInstr(
- Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI));
- MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx},
- MachineInstr::copyFlagsFromInstruction(CI));
-
- return true;
- }
+ case Intrinsic::localescape: {
+ MachineBasicBlock &EntryMBB = MF->front();
+ StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName());
+
+ // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
+ // is the same on all targets.
+ for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) {
+ Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
+ if (isa<ConstantPointerNull>(Arg))
+ continue; // Skip null pointers. They represent a hole in index space.
+
+ int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg));
+ MCSymbol *FrameAllocSym =
+ MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName,
+ Idx);
+
+ // This should be inserted at the start of the entry block.
+ auto LocalEscape =
+ MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE)
+ .addSym(FrameAllocSym)
+ .addFrameIndex(FI);
+
+ EntryMBB.insert(EntryMBB.begin(), LocalEscape);
+ }
+
+ return true;
+ }
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul: {
+ // Need to check for the reassoc flag to decide whether we want a
+ // sequential reduction opcode or not.
+ Register Dst = getOrCreateVReg(CI);
+ Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0));
+ Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1));
+ unsigned Opc = 0;
+ if (!CI.hasAllowReassoc()) {
+ // The sequential ordering case.
+ Opc = ID == Intrinsic::vector_reduce_fadd
+ ? TargetOpcode::G_VECREDUCE_SEQ_FADD
+ : TargetOpcode::G_VECREDUCE_SEQ_FMUL;
+ MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ }
+ // We split the operation into a separate G_FADD/G_FMUL + the reduce,
+ // since the associativity doesn't matter.
+ unsigned ScalarOpc;
+ if (ID == Intrinsic::vector_reduce_fadd) {
+ Opc = TargetOpcode::G_VECREDUCE_FADD;
+ ScalarOpc = TargetOpcode::G_FADD;
+ } else {
+ Opc = TargetOpcode::G_VECREDUCE_FMUL;
+ ScalarOpc = TargetOpcode::G_FMUL;
+ }
+ LLT DstTy = MRI->getType(Dst);
+ auto Rdx = MIRBuilder.buildInstr(
+ Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI));
+ MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx},
+ MachineInstr::copyFlagsFromInstruction(CI));
+
+ return true;
+ }
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
@@ -2328,11 +2328,11 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
} else {
MIB.addFPImm(cast<ConstantFP>(Arg.value()));
}
- } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) {
- auto *MDN = dyn_cast<MDNode>(MD->getMetadata());
- if (!MDN) // This was probably an MDString.
- return false;
- MIB.addMetadata(MDN);
+ } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) {
+ auto *MDN = dyn_cast<MDNode>(MD->getMetadata());
+ if (!MDN) // This was probably an MDString.
+ return false;
+ MIB.addMetadata(MDN);
} else {
ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
if (VRegs.size() > 1)
@@ -2357,62 +2357,62 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
-bool IRTranslator::findUnwindDestinations(
- const BasicBlock *EHPadBB,
- BranchProbability Prob,
- SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
- &UnwindDests) {
- EHPersonality Personality = classifyEHPersonality(
- EHPadBB->getParent()->getFunction().getPersonalityFn());
- bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
- bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
- bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
- bool IsSEH = isAsynchronousEHPersonality(Personality);
-
- if (IsWasmCXX) {
- // Ignore this for now.
- return false;
- }
-
- while (EHPadBB) {
- const Instruction *Pad = EHPadBB->getFirstNonPHI();
- BasicBlock *NewEHPadBB = nullptr;
- if (isa<LandingPadInst>(Pad)) {
- // Stop on landingpads. They are not funclets.
- UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
- break;
- }
- if (isa<CleanupPadInst>(Pad)) {
- // Stop on cleanup pads. Cleanups are always funclet entries for all known
- // personalities.
- UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
- UnwindDests.back().first->setIsEHScopeEntry();
- UnwindDests.back().first->setIsEHFuncletEntry();
- break;
- }
- if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
- // Add the catchpad handlers to the possible destinations.
- for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
- UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob);
- // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
- if (IsMSVCCXX || IsCoreCLR)
- UnwindDests.back().first->setIsEHFuncletEntry();
- if (!IsSEH)
- UnwindDests.back().first->setIsEHScopeEntry();
- }
- NewEHPadBB = CatchSwitch->getUnwindDest();
- } else {
- continue;
- }
-
- BranchProbabilityInfo *BPI = FuncInfo.BPI;
- if (BPI && NewEHPadBB)
- Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
- EHPadBB = NewEHPadBB;
- }
- return true;
-}
-
+bool IRTranslator::findUnwindDestinations(
+ const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ EHPersonality Personality = classifyEHPersonality(
+ EHPadBB->getParent()->getFunction().getPersonalityFn());
+ bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+ bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
+ bool IsSEH = isAsynchronousEHPersonality(Personality);
+
+ if (IsWasmCXX) {
+ // Ignore this for now.
+ return false;
+ }
+
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ BasicBlock *NewEHPadBB = nullptr;
+ if (isa<LandingPadInst>(Pad)) {
+ // Stop on landingpads. They are not funclets.
+ UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
+ break;
+ }
+ if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads. Cleanups are always funclet entries for all known
+ // personalities.
+ UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ break;
+ }
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob);
+ // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ if (!IsSEH)
+ UnwindDests.back().first->setIsEHScopeEntry();
+ }
+ NewEHPadBB = CatchSwitch->getUnwindDest();
+ } else {
+ continue;
+ }
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (BPI && NewEHPadBB)
+ Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
+ EHPadBB = NewEHPadBB;
+ }
+ return true;
+}
+
bool IRTranslator::translateInvoke(const User &U,
MachineIRBuilder &MIRBuilder) {
const InvokeInst &I = cast<InvokeInst>(U);
@@ -2438,7 +2438,7 @@ bool IRTranslator::translateInvoke(const User &U,
return false;
// FIXME: support Windows exception handling.
- if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
+ if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
return false;
// Emit the actual call, bracketed by EH_LABELs so that the MF knows about
@@ -2452,26 +2452,26 @@ bool IRTranslator::translateInvoke(const User &U,
MCSymbol *EndSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
- SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
- BranchProbabilityInfo *BPI = FuncInfo.BPI;
- MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB();
- BranchProbability EHPadBBProb =
- BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
- : BranchProbability::getZero();
-
- if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests))
- return false;
-
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB();
+ BranchProbability EHPadBBProb =
+ BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
+ : BranchProbability::getZero();
+
+ if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests))
+ return false;
+
MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
&ReturnMBB = getMBB(*ReturnBB);
- // Update successor info.
- addSuccessorWithProb(InvokeMBB, &ReturnMBB);
- for (auto &UnwindDest : UnwindDests) {
- UnwindDest.first->setIsEHPad();
- addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
- }
- InvokeMBB->normalizeSuccProbs();
-
+ // Update successor info.
+ addSuccessorWithProb(InvokeMBB, &ReturnMBB);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
+ }
+ InvokeMBB->normalizeSuccProbs();
+
MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
MIRBuilder.buildBr(ReturnMBB);
return true;
@@ -2511,12 +2511,12 @@ bool IRTranslator::translateLandingPad(const User &U,
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
.addSym(MF->addLandingPad(&MBB));
- // If the unwinder does not preserve all registers, ensure that the
- // function marks the clobbered registers as used.
- const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
- if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
- MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
-
+ // If the unwinder does not preserve all registers, ensure that the
+ // function marks the clobbered registers as used.
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
+ MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
+
LLT Ty = getLLTForType(*LP.getType(), *DL);
Register Undef = MRI->createGenericVirtualRegister(Ty);
MIRBuilder.buildUndef(Undef);
@@ -2855,8 +2855,8 @@ bool IRTranslator::translate(const Instruction &Inst) {
// We only emit constants into the entry block from here. To prevent jumpy
// debug behaviour set the line to 0.
if (const DebugLoc &DL = Inst.getDebugLoc())
- EntryBuilder->setDebugLoc(DILocation::get(
- Inst.getContext(), 0, 0, DL.getScope(), DL.getInlinedAt()));
+ EntryBuilder->setDebugLoc(DILocation::get(
+ Inst.getContext(), 0, 0, DL.getScope(), DL.getInlinedAt()));
else
EntryBuilder->setDebugLoc(DebugLoc());
@@ -2934,57 +2934,57 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
}
void IRTranslator::finalizeBasicBlock() {
- for (auto &BTB : SL->BitTestCases) {
- // Emit header first, if it wasn't already emitted.
- if (!BTB.Emitted)
- emitBitTestHeader(BTB, BTB.Parent);
-
- BranchProbability UnhandledProb = BTB.Prob;
- for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
- UnhandledProb -= BTB.Cases[j].ExtraProb;
- // Set the current basic block to the mbb we wish to insert the code into
- MachineBasicBlock *MBB = BTB.Cases[j].ThisBB;
- // If all cases cover a contiguous range, it is not necessary to jump to
- // the default block after the last bit test fails. This is because the
- // range check during bit test header creation has guaranteed that every
- // case here doesn't go outside the range. In this case, there is no need
- // to perform the last bit test, as it will always be true. Instead, make
- // the second-to-last bit-test fall through to the target of the last bit
- // test, and delete the last bit test.
-
- MachineBasicBlock *NextMBB;
- if (BTB.ContiguousRange && j + 2 == ej) {
- // Second-to-last bit-test with contiguous range: fall through to the
- // target of the final bit test.
- NextMBB = BTB.Cases[j + 1].TargetBB;
- } else if (j + 1 == ej) {
- // For the last bit test, fall through to Default.
- NextMBB = BTB.Default;
- } else {
- // Otherwise, fall through to the next bit test.
- NextMBB = BTB.Cases[j + 1].ThisBB;
- }
-
- emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
-
- // FIXME delete this block below?
- if (BTB.ContiguousRange && j + 2 == ej) {
- // Since we're not going to use the final bit test, remove it.
- BTB.Cases.pop_back();
- break;
- }
- }
- // This is "default" BB. We have two jumps to it. From "header" BB and from
- // last "case" BB, unless the latter was skipped.
- CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(),
- BTB.Default->getBasicBlock()};
- addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent);
- if (!BTB.ContiguousRange) {
- addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB);
- }
- }
- SL->BitTestCases.clear();
-
+ for (auto &BTB : SL->BitTestCases) {
+ // Emit header first, if it wasn't already emitted.
+ if (!BTB.Emitted)
+ emitBitTestHeader(BTB, BTB.Parent);
+
+ BranchProbability UnhandledProb = BTB.Prob;
+ for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
+ UnhandledProb -= BTB.Cases[j].ExtraProb;
+ // Set the current basic block to the mbb we wish to insert the code into
+ MachineBasicBlock *MBB = BTB.Cases[j].ThisBB;
+ // If all cases cover a contiguous range, it is not necessary to jump to
+ // the default block after the last bit test fails. This is because the
+ // range check during bit test header creation has guaranteed that every
+ // case here doesn't go outside the range. In this case, there is no need
+ // to perform the last bit test, as it will always be true. Instead, make
+ // the second-to-last bit-test fall through to the target of the last bit
+ // test, and delete the last bit test.
+
+ MachineBasicBlock *NextMBB;
+ if (BTB.ContiguousRange && j + 2 == ej) {
+ // Second-to-last bit-test with contiguous range: fall through to the
+ // target of the final bit test.
+ NextMBB = BTB.Cases[j + 1].TargetBB;
+ } else if (j + 1 == ej) {
+ // For the last bit test, fall through to Default.
+ NextMBB = BTB.Default;
+ } else {
+ // Otherwise, fall through to the next bit test.
+ NextMBB = BTB.Cases[j + 1].ThisBB;
+ }
+
+ emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
+
+ // FIXME delete this block below?
+ if (BTB.ContiguousRange && j + 2 == ej) {
+ // Since we're not going to use the final bit test, remove it.
+ BTB.Cases.pop_back();
+ break;
+ }
+ }
+ // This is "default" BB. We have two jumps to it. From "header" BB and from
+ // last "case" BB, unless the latter was skipped.
+ CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(),
+ BTB.Default->getBasicBlock()};
+ addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent);
+ if (!BTB.ContiguousRange) {
+ addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB);
+ }
+ }
+ SL->BitTestCases.clear();
+
for (auto &JTCase : SL->JTCases) {
// Emit header first, if it wasn't already emitted.
if (!JTCase.first.Emitted)
@@ -2993,10 +2993,10 @@ void IRTranslator::finalizeBasicBlock() {
emitJumpTable(JTCase.second, JTCase.second.MBB);
}
SL->JTCases.clear();
-
- for (auto &SwCase : SL->SwitchCases)
- emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
- SL->SwitchCases.clear();
+
+ for (auto &SwCase : SL->SwitchCases)
+ emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
+ SL->SwitchCases.clear();
}
void IRTranslator::finalizeFunction() {
@@ -3058,24 +3058,24 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
- const TargetMachine &TM = MF->getTarget();
- TM.resetTargetOptions(F);
- EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
+ const TargetMachine &TM = MF->getTarget();
+ TM.resetTargetOptions(F);
+ EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
FuncInfo.MF = MF;
- if (EnableOpts)
- FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
- else
- FuncInfo.BPI = nullptr;
-
- FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF);
-
+ if (EnableOpts)
+ FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ else
+ FuncInfo.BPI = nullptr;
+
+ FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF);
+
const auto &TLI = *MF->getSubtarget().getTargetLowering();
-
+
SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo);
SL->init(TLI, TM, *DL);
-
+
assert(PendingPHIs.empty() && "stale PHIs");
if (!DL->isLittleEndian()) {
@@ -3142,7 +3142,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
}
}
- if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) {
+ if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index bb4d41cfd6..8bdf9f8862 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -562,11 +562,11 @@ bool InlineAsmLowering::lowerInlineAsm(
}
unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs);
- if (OpInfo.Regs.front().isVirtual()) {
- // Put the register class of the virtual registers in the flag word.
- const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front());
- Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
- }
+ if (OpInfo.Regs.front().isVirtual()) {
+ // Put the register class of the virtual registers in the flag word.
+ const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front());
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ }
Inst.addImm(Flag);
if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder))
return false;
@@ -662,7 +662,7 @@ bool InlineAsmLowering::lowerAsmOperandForConstraint(
default:
return false;
case 'i': // Simple Integer or Relocatable Constant
- case 'n': // immediate integer with a known value.
+ case 'n': // immediate integer with a known value.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
assert(CI->getBitWidth() <= 64 &&
"expected immediate to fit into 64-bits");
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 25fae54871..bbd09edaf1 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -41,7 +41,7 @@ static cl::opt<std::string>
cl::desc("Record GlobalISel rule coverage files of this "
"prefix if instrumentation was generated"));
#else
-static const std::string CoveragePrefix;
+static const std::string CoveragePrefix;
#endif
char InstructionSelect::ID = 0;
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 4fec9e628d..1f39b5bf2c 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -38,7 +38,7 @@ bool InstructionSelector::isOperandImmEqual(
const MachineRegisterInfo &MRI) const {
if (MO.isReg() && MO.getReg())
if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI))
- return VRegVal->Value.getSExtValue() == Value;
+ return VRegVal->Value.getSExtValue() == Value;
return false;
}
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 1993f60332..5d2979e053 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -10,17 +10,17 @@
//
//===----------------------------------------------------------------------===//
-// Enable optimizations to work around MSVC debug mode bug in 32-bit:
-// https://developercommunity.visualstudio.com/content/problem/1179643/msvc-copies-overaligned-non-trivially-copyable-par.html
-// FIXME: Remove this when the issue is closed.
-#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)
-// We have to disable runtime checks in order to enable optimizations. This is
-// done for the entire file because the problem is actually observed in STL
-// template functions.
-#pragma runtime_checks("", off)
-#pragma optimize("gs", on)
-#endif
-
+// Enable optimizations to work around MSVC debug mode bug in 32-bit:
+// https://developercommunity.visualstudio.com/content/problem/1179643/msvc-copies-overaligned-non-trivially-copyable-par.html
+// FIXME: Remove this when the issue is closed.
+#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)
+// We have to disable runtime checks in order to enable optimizations. This is
+// done for the entire file because the problem is actually observed in STL
+// template functions.
+#pragma runtime_checks("", off)
+#pragma optimize("gs", on)
+#endif
+
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
using namespace llvm;
@@ -35,7 +35,7 @@ LegalityPredicates::typeInSet(unsigned TypeIdx,
std::initializer_list<LLT> TypesInit) {
SmallVector<LLT, 4> Types = TypesInit;
return [=](const LegalityQuery &Query) {
- return llvm::is_contained(Types, Query.Types[TypeIdx]);
+ return llvm::is_contained(Types, Query.Types[TypeIdx]);
};
}
@@ -45,7 +45,7 @@ LegalityPredicate LegalityPredicates::typePairInSet(
SmallVector<std::pair<LLT, LLT>, 4> Types = TypesInit;
return [=](const LegalityQuery &Query) {
std::pair<LLT, LLT> Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1]};
- return llvm::is_contained(Types, Match);
+ return llvm::is_contained(Types, Match);
};
}
@@ -57,10 +57,10 @@ LegalityPredicate LegalityPredicates::typePairAndMemDescInSet(
TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
Query.MMODescrs[MMOIdx].SizeInBits,
Query.MMODescrs[MMOIdx].AlignInBits};
- return llvm::any_of(TypesAndMemDesc,
- [=](const TypePairAndMemDesc &Entry) -> bool {
- return Match.isCompatible(Entry);
- });
+ return llvm::any_of(TypesAndMemDesc,
+ [=](const TypePairAndMemDesc &Entry) -> bool {
+ return Match.isCompatible(Entry);
+ });
};
}
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index f3ba3f0801..a5169a9239 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -43,16 +43,16 @@ LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
};
}
-LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx,
- unsigned FromTypeIdx) {
- return [=](const LegalityQuery &Query) {
- const LLT OldTy = Query.Types[TypeIdx];
- const LLT NewTy = Query.Types[FromTypeIdx];
- const LLT NewEltTy = LLT::scalar(NewTy.getScalarSizeInBits());
- return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy));
- };
-}
-
+LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx,
+ unsigned FromTypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ const LLT NewTy = Query.Types[FromTypeIdx];
+ const LLT NewEltTy = LLT::scalar(NewTy.getScalarSizeInBits());
+ return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy));
+ };
+}
+
LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx,
unsigned Min) {
return [=](const LegalityQuery &Query) {
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Legalizer.cpp
index 5ba9367cac..c0629d955d 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -284,7 +284,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
WrapperObserver)) {
WorkListObserver.printNewInstrs();
for (auto *DeadMI : DeadInstructions) {
- LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI);
+ LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI);
RemoveDeadInstFromLists(DeadMI);
DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
}
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 66871ca3b9..995abb85d0 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -16,7 +16,7 @@
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -30,7 +30,7 @@
using namespace llvm;
using namespace LegalizeActions;
-using namespace MIPatternMatch;
+using namespace MIPatternMatch;
/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
///
@@ -77,8 +77,8 @@ static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
return Type::getFloatTy(Ctx);
case 64:
return Type::getDoubleTy(Ctx);
- case 80:
- return Type::getX86_FP80Ty(Ctx);
+ case 80:
+ return Type::getX86_FP80Ty(Ctx);
case 128:
return Type::getFP128Ty(Ctx);
default:
@@ -90,15 +90,15 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF,
GISelChangeObserver &Observer,
MachineIRBuilder &Builder)
: MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
- LI(*MF.getSubtarget().getLegalizerInfo()),
- TLI(*MF.getSubtarget().getTargetLowering()) { }
+ LI(*MF.getSubtarget().getLegalizerInfo()),
+ TLI(*MF.getSubtarget().getTargetLowering()) { }
LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
GISelChangeObserver &Observer,
MachineIRBuilder &B)
- : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
- TLI(*MF.getSubtarget().getTargetLowering()) { }
-
+ : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
+ TLI(*MF.getSubtarget().getTargetLowering()) { }
+
LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
@@ -240,20 +240,20 @@ void LegalizerHelper::insertParts(Register DstReg,
}
}
-/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
+/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
- const int StartIdx = Regs.size();
+ const int StartIdx = Regs.size();
const int NumResults = MI.getNumOperands() - 1;
- Regs.resize(Regs.size() + NumResults);
+ Regs.resize(Regs.size() + NumResults);
for (int I = 0; I != NumResults; ++I)
- Regs[StartIdx + I] = MI.getOperand(I).getReg();
+ Regs[StartIdx + I] = MI.getOperand(I).getReg();
}
-void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
- LLT GCDTy, Register SrcReg) {
+void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
+ LLT GCDTy, Register SrcReg) {
LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy == GCDTy) {
// If the source already evenly divides the result type, we don't need to do
@@ -264,13 +264,13 @@ void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
getUnmergeResults(Parts, *Unmerge);
}
-}
+}
-LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
- LLT NarrowTy, Register SrcReg) {
- LLT SrcTy = MRI.getType(SrcReg);
- LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
- extractGCDType(Parts, GCDTy, SrcReg);
+LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
+ LLT NarrowTy, Register SrcReg) {
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
+ extractGCDType(Parts, GCDTy, SrcReg);
return GCDTy;
}
@@ -384,14 +384,14 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
}
if (LCMTy.isVector()) {
- unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
- SmallVector<Register, 8> UnmergeDefs(NumDefs);
- UnmergeDefs[0] = DstReg;
- for (unsigned I = 1; I != NumDefs; ++I)
- UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
-
- MIRBuilder.buildUnmerge(UnmergeDefs,
- MIRBuilder.buildMerge(LCMTy, RemergeRegs));
+ unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
+ SmallVector<Register, 8> UnmergeDefs(NumDefs);
+ UnmergeDefs[0] = DstReg;
+ for (unsigned I = 1; I != NumDefs; ++I)
+ UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
+
+ MIRBuilder.buildUnmerge(UnmergeDefs,
+ MIRBuilder.buildMerge(LCMTy, RemergeRegs));
return;
}
@@ -399,20 +399,20 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
}
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
-#define RTLIBCASE_INT(LibcallPrefix) \
- do { \
- switch (Size) { \
- case 32: \
- return RTLIB::LibcallPrefix##32; \
- case 64: \
- return RTLIB::LibcallPrefix##64; \
- case 128: \
- return RTLIB::LibcallPrefix##128; \
- default: \
- llvm_unreachable("unexpected size"); \
- } \
- } while (0)
-
+#define RTLIBCASE_INT(LibcallPrefix) \
+ do { \
+ switch (Size) { \
+ case 32: \
+ return RTLIB::LibcallPrefix##32; \
+ case 64: \
+ return RTLIB::LibcallPrefix##64; \
+ case 128: \
+ return RTLIB::LibcallPrefix##128; \
+ default: \
+ llvm_unreachable("unexpected size"); \
+ } \
+ } while (0)
+
#define RTLIBCASE(LibcallPrefix) \
do { \
switch (Size) { \
@@ -420,8 +420,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
return RTLIB::LibcallPrefix##32; \
case 64: \
return RTLIB::LibcallPrefix##64; \
- case 80: \
- return RTLIB::LibcallPrefix##80; \
+ case 80: \
+ return RTLIB::LibcallPrefix##80; \
case 128: \
return RTLIB::LibcallPrefix##128; \
default: \
@@ -431,15 +431,15 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
switch (Opcode) {
case TargetOpcode::G_SDIV:
- RTLIBCASE_INT(SDIV_I);
+ RTLIBCASE_INT(SDIV_I);
case TargetOpcode::G_UDIV:
- RTLIBCASE_INT(UDIV_I);
+ RTLIBCASE_INT(UDIV_I);
case TargetOpcode::G_SREM:
- RTLIBCASE_INT(SREM_I);
+ RTLIBCASE_INT(SREM_I);
case TargetOpcode::G_UREM:
- RTLIBCASE_INT(UREM_I);
+ RTLIBCASE_INT(UREM_I);
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
- RTLIBCASE_INT(CTLZ_I);
+ RTLIBCASE_INT(CTLZ_I);
case TargetOpcode::G_FADD:
RTLIBCASE(ADD_F);
case TargetOpcode::G_FSUB:
@@ -482,16 +482,16 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(RINT_F);
case TargetOpcode::G_FNEARBYINT:
RTLIBCASE(NEARBYINT_F);
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
- RTLIBCASE(ROUNDEVEN_F);
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ RTLIBCASE(ROUNDEVEN_F);
}
llvm_unreachable("Unknown libcall function");
}
/// True if an instruction is in tail position in its caller. Intended for
/// legalizing libcalls as tail calls when possible.
-static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
- MachineInstr &MI) {
+static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
+ MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const Function &F = MBB.getParent()->getFunction();
@@ -566,7 +566,7 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
SmallVector<CallLowering::ArgInfo, 3> Args;
// Add all the args, except for the last which is an imm denoting 'tail'.
- for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
+ for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
Register Reg = MI.getOperand(i).getReg();
// Need derive an IR type for call lowering.
@@ -582,14 +582,14 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
RTLIB::Libcall RTLibcall;
- switch (MI.getOpcode()) {
- case TargetOpcode::G_MEMCPY:
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_MEMCPY:
RTLibcall = RTLIB::MEMCPY;
break;
- case TargetOpcode::G_MEMMOVE:
- RTLibcall = RTLIB::MEMMOVE;
- break;
- case TargetOpcode::G_MEMSET:
+ case TargetOpcode::G_MEMMOVE:
+ RTLibcall = RTLIB::MEMMOVE;
+ break;
+ case TargetOpcode::G_MEMSET:
RTLibcall = RTLIB::MEMSET;
break;
default:
@@ -601,8 +601,8 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
Info.Callee = MachineOperand::CreateES(Name);
Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
- Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
- isLibCallInTailPosition(MIRBuilder.getTII(), MI);
+ Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
+ isLibCallInTailPosition(MIRBuilder.getTII(), MI);
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
if (!CLI.lowerCall(MIRBuilder, Info))
@@ -695,11 +695,11 @@ LegalizerHelper::libcall(MachineInstr &MI) {
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FRINT:
- case TargetOpcode::G_FNEARBYINT:
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
- if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
- LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
+ if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
+ LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
return UnableToLegalize;
}
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
@@ -748,14 +748,14 @@ LegalizerHelper::libcall(MachineInstr &MI) {
return Status;
break;
}
- case TargetOpcode::G_MEMCPY:
- case TargetOpcode::G_MEMMOVE:
- case TargetOpcode::G_MEMSET: {
- LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
- MI.eraseFromParent();
- return Result;
- }
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMMOVE:
+ case TargetOpcode::G_MEMSET: {
+ LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
+ MI.eraseFromParent();
+ return Result;
}
+ }
MI.eraseFromParent();
return Legalized;
@@ -935,7 +935,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
case TargetOpcode::G_INSERT:
return narrowScalarInsert(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_LOAD: {
- auto &MMO = **MI.memoperands_begin();
+ auto &MMO = **MI.memoperands_begin();
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
@@ -959,15 +959,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
auto &MMO = **MI.memoperands_begin();
- unsigned MemSize = MMO.getSizeInBits();
-
- if (MemSize == NarrowSize) {
+ unsigned MemSize = MMO.getSizeInBits();
+
+ if (MemSize == NarrowSize) {
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
- } else if (MemSize < NarrowSize) {
+ } else if (MemSize < NarrowSize) {
MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
- } else if (MemSize > NarrowSize) {
- // FIXME: Need to split the load.
- return UnableToLegalize;
+ } else if (MemSize > NarrowSize) {
+ // FIXME: Need to split the load.
+ return UnableToLegalize;
}
if (ZExt)
@@ -1063,11 +1063,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PHI: {
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
-
+ // FIXME: add support for when SizeOp0 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+
unsigned NumParts = SizeOp0 / NarrowSize;
SmallVector<Register, 2> DstRegs(NumParts);
SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
@@ -1248,7 +1248,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_PTR_ADD:
+ case TargetOpcode::G_PTR_ADD:
case TargetOpcode::G_PTRMASK: {
if (TypeIdx != 1)
return UnableToLegalize;
@@ -1257,17 +1257,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changedInstr(MI);
return Legalized;
}
- case TargetOpcode::G_FPTOUI:
- case TargetOpcode::G_FPTOSI:
- return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
- case TargetOpcode::G_FPEXT:
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
- Observer.changedInstr(MI);
- return Legalized;
- }
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FPTOSI:
+ return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_FPEXT:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
}
Register LegalizerHelper::coerceToScalar(Register Val) {
@@ -1328,7 +1328,7 @@ void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
+ MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
}
void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
@@ -1496,40 +1496,40 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
}
-Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
- Register WideReg = MRI.createGenericVirtualRegister(WideTy);
- LLT OrigTy = MRI.getType(OrigReg);
- LLT LCMTy = getLCMType(WideTy, OrigTy);
-
- const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
- const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
-
- Register UnmergeSrc = WideReg;
-
- // Create a merge to the LCM type, padding with undef
- // %0:_(<3 x s32>) = G_FOO => <4 x s32>
- // =>
- // %1:_(<4 x s32>) = G_FOO
- // %2:_(<4 x s32>) = G_IMPLICIT_DEF
- // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
- // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
- if (NumMergeParts > 1) {
- Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
- SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
- MergeParts[0] = WideReg;
- UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
- }
-
- // Unmerge to the original register and pad with dead defs.
- SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
- UnmergeResults[0] = OrigReg;
- for (int I = 1; I != NumUnmergeParts; ++I)
- UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
-
- MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
- return WideReg;
-}
-
+Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
+ Register WideReg = MRI.createGenericVirtualRegister(WideTy);
+ LLT OrigTy = MRI.getType(OrigReg);
+ LLT LCMTy = getLCMType(WideTy, OrigTy);
+
+ const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
+ const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
+
+ Register UnmergeSrc = WideReg;
+
+ // Create a merge to the LCM type, padding with undef
+ // %0:_(<3 x s32>) = G_FOO => <4 x s32>
+ // =>
+ // %1:_(<4 x s32>) = G_FOO
+ // %2:_(<4 x s32>) = G_IMPLICIT_DEF
+ // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
+ // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
+ if (NumMergeParts > 1) {
+ Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
+ SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
+ MergeParts[0] = WideReg;
+ UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
+ }
+
+ // Unmerge to the original register and pad with dead defs.
+ SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
+ UnmergeResults[0] = OrigReg;
+ for (int I = 1; I != NumUnmergeParts; ++I)
+ UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
+
+ MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
+ return WideReg;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
@@ -1599,60 +1599,60 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
- // Create a sequence of unmerges and merges to the original results. Since we
- // may have widened the source, we will need to pad the results with dead defs
- // to cover the source register.
- // e.g. widen s48 to s64:
- // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
+ // Create a sequence of unmerges and merges to the original results. Since we
+ // may have widened the source, we will need to pad the results with dead defs
+ // to cover the source register.
+ // e.g. widen s48 to s64:
+ // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
//
// =>
- // %4:_(s192) = G_ANYEXT %0:_(s96)
- // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
- // ; unpack to GCD type, with extra dead defs
- // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
- // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
- // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
- // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
- // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
- const LLT GCDTy = getGCDType(WideTy, DstTy);
+ // %4:_(s192) = G_ANYEXT %0:_(s96)
+ // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
+ // ; unpack to GCD type, with extra dead defs
+ // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
+ // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
+ // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
+ // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
+ // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
+ const LLT GCDTy = getGCDType(WideTy, DstTy);
const int NumUnmerge = Unmerge->getNumOperands() - 1;
- const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
-
- // Directly unmerge to the destination without going through a GCD type
- // if possible
- if (PartsPerRemerge == 1) {
- const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
-
- for (int I = 0; I != NumUnmerge; ++I) {
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
-
- for (int J = 0; J != PartsPerUnmerge; ++J) {
- int Idx = I * PartsPerUnmerge + J;
- if (Idx < NumDst)
- MIB.addDef(MI.getOperand(Idx).getReg());
- else {
- // Create dead def for excess components.
- MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
- }
+ const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
+
+ // Directly unmerge to the destination without going through a GCD type
+ // if possible
+ if (PartsPerRemerge == 1) {
+ const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
+
+ for (int I = 0; I != NumUnmerge; ++I) {
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+
+ for (int J = 0; J != PartsPerUnmerge; ++J) {
+ int Idx = I * PartsPerUnmerge + J;
+ if (Idx < NumDst)
+ MIB.addDef(MI.getOperand(Idx).getReg());
+ else {
+ // Create dead def for excess components.
+ MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
+ }
}
-
- MIB.addUse(Unmerge.getReg(I));
- }
- } else {
- SmallVector<Register, 16> Parts;
- for (int J = 0; J != NumUnmerge; ++J)
- extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
-
- SmallVector<Register, 8> RemergeParts;
- for (int I = 0; I != NumDst; ++I) {
- for (int J = 0; J < PartsPerRemerge; ++J) {
- const int Idx = I * PartsPerRemerge + J;
- RemergeParts.emplace_back(Parts[Idx]);
- }
-
- MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
- RemergeParts.clear();
+
+ MIB.addUse(Unmerge.getReg(I));
}
+ } else {
+ SmallVector<Register, 16> Parts;
+ for (int J = 0; J != NumUnmerge; ++J)
+ extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
+
+ SmallVector<Register, 8> RemergeParts;
+ for (int I = 0; I != NumDst; ++I) {
+ for (int J = 0; J < PartsPerRemerge; ++J) {
+ const int Idx = I * PartsPerRemerge + J;
+ RemergeParts.emplace_back(Parts[Idx]);
+ }
+
+ MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
+ RemergeParts.clear();
+ }
}
MI.eraseFromParent();
@@ -1702,7 +1702,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
Src = MIRBuilder.buildAnyExt(WideTy, Src);
ShiftTy = WideTy;
- }
+ }
auto LShr = MIRBuilder.buildLShr(
ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
@@ -1740,7 +1740,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
- if (TypeIdx != 0 || WideTy.isVector())
+ if (TypeIdx != 0 || WideTy.isVector())
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
@@ -1750,45 +1750,45 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy) {
- if (TypeIdx == 1)
- return UnableToLegalize; // TODO
- unsigned Op = MI.getOpcode();
- unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO
- ? TargetOpcode::G_ADD
- : TargetOpcode::G_SUB;
- unsigned ExtOpcode =
- Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO
- ? TargetOpcode::G_ZEXT
- : TargetOpcode::G_SEXT;
- auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
- auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
- // Do the arithmetic in the larger type.
- auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt});
- LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
- auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
- auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
- // There is no overflow if the ExtOp is the same as NewOp.
- MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
- // Now trunc the NewOp to the original result.
- MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy) {
+LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx == 1)
+ return UnableToLegalize; // TODO
+ unsigned Op = MI.getOpcode();
+ unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO
+ ? TargetOpcode::G_ADD
+ : TargetOpcode::G_SUB;
+ unsigned ExtOpcode =
+ Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO
+ ? TargetOpcode::G_ZEXT
+ : TargetOpcode::G_SEXT;
+ auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
+ auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
+ // Do the arithmetic in the larger type.
+ auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt});
+ LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
+ auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
+ auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
+ // There is no overflow if the ExtOp is the same as NewOp.
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
+ // Now trunc the NewOp to the original result.
+ MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
- MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
- MI.getOpcode() == TargetOpcode::G_SSHLSAT;
- bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
- MI.getOpcode() == TargetOpcode::G_USHLSAT;
+ MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
+ MI.getOpcode() == TargetOpcode::G_SSHLSAT;
+ bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
+ MI.getOpcode() == TargetOpcode::G_USHLSAT;
// We can convert this to:
// 1. Any extend iN to iM
// 2. SHL by M-N
- // 3. [US][ADD|SUB|SHL]SAT
+ // 3. [US][ADD|SUB|SHL]SAT
// 4. L/ASHR by M-N
//
// It may be more efficient to lower this to a min and a max operation in
@@ -1799,14 +1799,14 @@ LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
unsigned NewBits = WideTy.getScalarSizeInBits();
unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
- // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
- // must not left shift the RHS to preserve the shift amount.
+ // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
+ // must not left shift the RHS to preserve the shift amount.
auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
- auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
- : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
+ auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
+ : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
- auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
+ auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
{ShiftL, ShiftR}, MI.getFlags());
@@ -1834,18 +1834,18 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return widenScalarMergeValues(MI, TypeIdx, WideTy);
case TargetOpcode::G_UNMERGE_VALUES:
return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
- case TargetOpcode::G_SADDO:
- case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SSUBO:
case TargetOpcode::G_UADDO:
- case TargetOpcode::G_USUBO:
- return widenScalarAddoSubo(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_USUBO:
+ return widenScalarAddoSubo(MI, TypeIdx, WideTy);
case TargetOpcode::G_SADDSAT:
case TargetOpcode::G_SSUBSAT:
- case TargetOpcode::G_SSHLSAT:
+ case TargetOpcode::G_SSHLSAT:
case TargetOpcode::G_UADDSAT:
case TargetOpcode::G_USUBSAT:
- case TargetOpcode::G_USHLSAT:
- return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_USHLSAT:
+ return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
@@ -2038,22 +2038,22 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
case TargetOpcode::G_SITOFP:
Observer.changingInstr(MI);
-
- if (TypeIdx == 0)
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
- else
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
-
+
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_UITOFP:
Observer.changingInstr(MI);
-
- if (TypeIdx == 0)
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
- else
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
-
+
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_LOAD:
@@ -2069,7 +2069,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return UnableToLegalize;
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- if (!Ty.isScalar())
+ if (!Ty.isScalar())
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -2267,7 +2267,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FPOW:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
assert(TypeIdx == 0);
Observer.changingInstr(MI);
@@ -2277,15 +2277,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_FPOWI: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
- Observer.changedInstr(MI);
- return Legalized;
- }
+ case TargetOpcode::G_FPOWI: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
case TargetOpcode::G_INTTOPTR:
if (TypeIdx != 1)
return UnableToLegalize;
@@ -2312,7 +2312,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
// Avoid changing the result vector type if the source element type was
// requested.
if (TypeIdx == 1) {
- MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
+ MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
} else {
widenScalarDst(MI, WideTy, 0);
}
@@ -2415,377 +2415,377 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) {
return UnableToLegalize;
}
-/// Figure out the bit offset into a register when coercing a vector index for
-/// the wide element type. This is only for the case when promoting vector to
-/// one with larger elements.
-//
-///
-/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
-/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
-static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
- Register Idx,
- unsigned NewEltSize,
- unsigned OldEltSize) {
- const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
- LLT IdxTy = B.getMRI()->getType(Idx);
-
- // Now figure out the amount we need to shift to get the target bits.
- auto OffsetMask = B.buildConstant(
- IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
- auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
- return B.buildShl(IdxTy, OffsetIdx,
- B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
-}
-
-/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
-/// is casting to a vector with a smaller element size, perform multiple element
-/// extracts and merge the results. If this is coercing to a vector with larger
-/// elements, index the bitcasted vector and extract the target element with bit
-/// operations. This is intended to force the indexing in the native register
-/// size for architectures that can dynamically index the register file.
-LegalizerHelper::LegalizeResult
-LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
- LLT CastTy) {
- if (TypeIdx != 1)
- return UnableToLegalize;
-
- Register Dst = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register Idx = MI.getOperand(2).getReg();
- LLT SrcVecTy = MRI.getType(SrcVec);
- LLT IdxTy = MRI.getType(Idx);
-
- LLT SrcEltTy = SrcVecTy.getElementType();
- unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
- unsigned OldNumElts = SrcVecTy.getNumElements();
-
- LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
- Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
-
- const unsigned NewEltSize = NewEltTy.getSizeInBits();
- const unsigned OldEltSize = SrcEltTy.getSizeInBits();
- if (NewNumElts > OldNumElts) {
- // Decreasing the vector element size
- //
- // e.g. i64 = extract_vector_elt x:v2i64, y:i32
- // =>
- // v4i32:castx = bitcast x:v2i64
- //
- // i64 = bitcast
- // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
- // (i32 (extract_vector_elt castx, (2 * y + 1)))
- //
- if (NewNumElts % OldNumElts != 0)
- return UnableToLegalize;
-
- // Type of the intermediate result vector.
- const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
- LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
-
- auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
-
- SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
- auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
-
- for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
- auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
- auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
- auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
- NewOps[I] = Elt.getReg(0);
- }
-
- auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
- MIRBuilder.buildBitcast(Dst, NewVec);
- MI.eraseFromParent();
- return Legalized;
- }
-
- if (NewNumElts < OldNumElts) {
- if (NewEltSize % OldEltSize != 0)
- return UnableToLegalize;
-
- // This only depends on powers of 2 because we use bit tricks to figure out
- // the bit offset we need to shift to get the target element. A general
- // expansion could emit division/multiply.
- if (!isPowerOf2_32(NewEltSize / OldEltSize))
- return UnableToLegalize;
-
- // Increasing the vector element size.
- // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
- //
- // =>
- //
- // %cast = G_BITCAST %vec
- // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
- // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
- // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
- // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
- // %elt_bits = G_LSHR %wide_elt, %offset_bits
- // %elt = G_TRUNC %elt_bits
-
- const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
- auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
-
- // Divide to get the index in the wider element type.
- auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
-
- Register WideElt = CastVec;
- if (CastTy.isVector()) {
- WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
- ScaledIdx).getReg(0);
- }
-
- // Compute the bit offset into the register of the target element.
- Register OffsetBits = getBitcastWiderVectorElementOffset(
- MIRBuilder, Idx, NewEltSize, OldEltSize);
-
- // Shift the wide element to get the target element.
- auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
- MIRBuilder.buildTrunc(Dst, ExtractedBits);
- MI.eraseFromParent();
- return Legalized;
- }
-
- return UnableToLegalize;
-}
-
-/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
-/// TargetReg, while preserving other bits in \p TargetReg.
-///
-/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
-static Register buildBitFieldInsert(MachineIRBuilder &B,
- Register TargetReg, Register InsertReg,
- Register OffsetBits) {
- LLT TargetTy = B.getMRI()->getType(TargetReg);
- LLT InsertTy = B.getMRI()->getType(InsertReg);
- auto ZextVal = B.buildZExt(TargetTy, InsertReg);
- auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
-
- // Produce a bitmask of the value to insert
- auto EltMask = B.buildConstant(
- TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
- InsertTy.getSizeInBits()));
- // Shift it into position
- auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
- auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
-
- // Clear out the bits in the wide element
- auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
-
- // The value to insert has all zeros already, so stick it into the masked
- // wide element.
- return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
-}
-
-/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
-/// is increasing the element size, perform the indexing in the target element
-/// type, and use bit operations to insert at the element position. This is
-/// intended for architectures that can dynamically index the register file and
-/// want to force indexing in the native register size.
-LegalizerHelper::LegalizeResult
-LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
- LLT CastTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
-
- Register Dst = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register Val = MI.getOperand(2).getReg();
- Register Idx = MI.getOperand(3).getReg();
-
- LLT VecTy = MRI.getType(Dst);
- LLT IdxTy = MRI.getType(Idx);
-
- LLT VecEltTy = VecTy.getElementType();
- LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
- const unsigned NewEltSize = NewEltTy.getSizeInBits();
- const unsigned OldEltSize = VecEltTy.getSizeInBits();
-
- unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
- unsigned OldNumElts = VecTy.getNumElements();
-
- Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
- if (NewNumElts < OldNumElts) {
- if (NewEltSize % OldEltSize != 0)
- return UnableToLegalize;
-
- // This only depends on powers of 2 because we use bit tricks to figure out
- // the bit offset we need to shift to get the target element. A general
- // expansion could emit division/multiply.
- if (!isPowerOf2_32(NewEltSize / OldEltSize))
- return UnableToLegalize;
-
- const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
- auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
-
- // Divide to get the index in the wider element type.
- auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
-
- Register ExtractedElt = CastVec;
- if (CastTy.isVector()) {
- ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
- ScaledIdx).getReg(0);
- }
-
- // Compute the bit offset into the register of the target element.
- Register OffsetBits = getBitcastWiderVectorElementOffset(
- MIRBuilder, Idx, NewEltSize, OldEltSize);
-
- Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
- Val, OffsetBits);
- if (CastTy.isVector()) {
- InsertedElt = MIRBuilder.buildInsertVectorElement(
- CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
- }
-
- MIRBuilder.buildBitcast(Dst, InsertedElt);
- MI.eraseFromParent();
- return Legalized;
- }
-
- return UnableToLegalize;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerLoad(MachineInstr &MI) {
- // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
- Register DstReg = MI.getOperand(0).getReg();
- Register PtrReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- auto &MMO = **MI.memoperands_begin();
-
- if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
- if (MI.getOpcode() == TargetOpcode::G_LOAD) {
- // This load needs splitting into power of 2 sized loads.
- if (DstTy.isVector())
- return UnableToLegalize;
- if (isPowerOf2_32(DstTy.getSizeInBits()))
- return UnableToLegalize; // Don't know what we're being asked to do.
-
- // Our strategy here is to generate anyextending loads for the smaller
- // types up to next power-2 result type, and then combine the two larger
- // result values together, before truncating back down to the non-pow-2
- // type.
- // E.g. v1 = i24 load =>
- // v2 = i32 zextload (2 byte)
- // v3 = i32 load (1 byte)
- // v4 = i32 shl v3, 16
- // v5 = i32 or v4, v2
- // v1 = i24 trunc v5
- // By doing this we generate the correct truncate which should get
- // combined away as an artifact with a matching extend.
- uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
- uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
-
- MachineFunction &MF = MIRBuilder.getMF();
- MachineMemOperand *LargeMMO =
- MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
- MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
- &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
-
- LLT PtrTy = MRI.getType(PtrReg);
- unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
- LLT AnyExtTy = LLT::scalar(AnyExtSize);
- Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
- Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
- auto LargeLoad = MIRBuilder.buildLoadInstr(
- TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
-
- auto OffsetCst = MIRBuilder.buildConstant(
- LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
- auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
- *SmallMMO);
-
- auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
- auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
- auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
- MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
- MI.eraseFromParent();
- return Legalized;
- }
-
- MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
- MI.eraseFromParent();
- return Legalized;
- }
-
- if (DstTy.isScalar()) {
- Register TmpReg =
- MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
- MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Unexpected opcode");
- case TargetOpcode::G_LOAD:
- MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
- break;
- case TargetOpcode::G_SEXTLOAD:
- MIRBuilder.buildSExt(DstReg, TmpReg);
- break;
- case TargetOpcode::G_ZEXTLOAD:
- MIRBuilder.buildZExt(DstReg, TmpReg);
- break;
- }
-
- MI.eraseFromParent();
- return Legalized;
- }
-
- return UnableToLegalize;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerStore(MachineInstr &MI) {
- // Lower a non-power of 2 store into multiple pow-2 stores.
- // E.g. split an i24 store into an i16 store + i8 store.
- // We do this by first extending the stored value to the next largest power
- // of 2 type, and then using truncating stores to store the components.
- // By doing this, likewise with G_LOAD, generate an extend that can be
- // artifact-combined away instead of leaving behind extracts.
- Register SrcReg = MI.getOperand(0).getReg();
- Register PtrReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- MachineMemOperand &MMO = **MI.memoperands_begin();
- if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
- return UnableToLegalize;
- if (SrcTy.isVector())
- return UnableToLegalize;
- if (isPowerOf2_32(SrcTy.getSizeInBits()))
- return UnableToLegalize; // Don't know what we're being asked to do.
-
- // Extend to the next pow-2.
- const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
- auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
-
- // Obtain the smaller value by shifting away the larger value.
- uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
- uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
- auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
- auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
-
- // Generate the PtrAdd and truncating stores.
- LLT PtrTy = MRI.getType(PtrReg);
- auto OffsetCst = MIRBuilder.buildConstant(
- LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
-
- MachineFunction &MF = MIRBuilder.getMF();
- MachineMemOperand *LargeMMO =
- MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
- MachineMemOperand *SmallMMO =
- MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
- MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
- MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
- MI.eraseFromParent();
- return Legalized;
-}
-
+/// Figure out the bit offset into a register when coercing a vector index for
+/// the wide element type. This is only for the case when promoting vector to
+/// one with larger elements.
+//
+///
+/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
+/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
+static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
+ Register Idx,
+ unsigned NewEltSize,
+ unsigned OldEltSize) {
+ const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+ LLT IdxTy = B.getMRI()->getType(Idx);
+
+ // Now figure out the amount we need to shift to get the target bits.
+ auto OffsetMask = B.buildConstant(
+ IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
+ auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
+ return B.buildShl(IdxTy, OffsetIdx,
+ B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
+}
+
+/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
+/// is casting to a vector with a smaller element size, perform multiple element
+/// extracts and merge the results. If this is coercing to a vector with larger
+/// elements, index the bitcasted vector and extract the target element with bit
+/// operations. This is intended to force the indexing in the native register
+/// size for architectures that can dynamically index the register file.
LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register Idx = MI.getOperand(2).getReg();
+ LLT SrcVecTy = MRI.getType(SrcVec);
+ LLT IdxTy = MRI.getType(Idx);
+
+ LLT SrcEltTy = SrcVecTy.getElementType();
+ unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
+ unsigned OldNumElts = SrcVecTy.getNumElements();
+
+ LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
+ Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
+
+ const unsigned NewEltSize = NewEltTy.getSizeInBits();
+ const unsigned OldEltSize = SrcEltTy.getSizeInBits();
+ if (NewNumElts > OldNumElts) {
+ // Decreasing the vector element size
+ //
+ // e.g. i64 = extract_vector_elt x:v2i64, y:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ //
+ // i64 = bitcast
+ // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
+ // (i32 (extract_vector_elt castx, (2 * y + 1)))
+ //
+ if (NewNumElts % OldNumElts != 0)
+ return UnableToLegalize;
+
+ // Type of the intermediate result vector.
+ const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
+ LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
+
+ auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
+
+ SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
+ auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
+
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
+ auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
+ auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
+ NewOps[I] = Elt.getReg(0);
+ }
+
+ auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
+ MIRBuilder.buildBitcast(Dst, NewVec);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (NewNumElts < OldNumElts) {
+ if (NewEltSize % OldEltSize != 0)
+ return UnableToLegalize;
+
+ // This only depends on powers of 2 because we use bit tricks to figure out
+ // the bit offset we need to shift to get the target element. A general
+ // expansion could emit division/multiply.
+ if (!isPowerOf2_32(NewEltSize / OldEltSize))
+ return UnableToLegalize;
+
+ // Increasing the vector element size.
+ // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
+ //
+ // =>
+ //
+ // %cast = G_BITCAST %vec
+ // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
+ // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
+ // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
+ // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
+ // %elt_bits = G_LSHR %wide_elt, %offset_bits
+ // %elt = G_TRUNC %elt_bits
+
+ const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+ auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
+
+ // Divide to get the index in the wider element type.
+ auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
+
+ Register WideElt = CastVec;
+ if (CastTy.isVector()) {
+ WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
+ ScaledIdx).getReg(0);
+ }
+
+ // Compute the bit offset into the register of the target element.
+ Register OffsetBits = getBitcastWiderVectorElementOffset(
+ MIRBuilder, Idx, NewEltSize, OldEltSize);
+
+ // Shift the wide element to get the target element.
+ auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
+ MIRBuilder.buildTrunc(Dst, ExtractedBits);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
+/// TargetReg, while preserving other bits in \p TargetReg.
+///
+/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
+static Register buildBitFieldInsert(MachineIRBuilder &B,
+ Register TargetReg, Register InsertReg,
+ Register OffsetBits) {
+ LLT TargetTy = B.getMRI()->getType(TargetReg);
+ LLT InsertTy = B.getMRI()->getType(InsertReg);
+ auto ZextVal = B.buildZExt(TargetTy, InsertReg);
+ auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
+
+ // Produce a bitmask of the value to insert
+ auto EltMask = B.buildConstant(
+ TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
+ InsertTy.getSizeInBits()));
+ // Shift it into position
+ auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
+ auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
+
+ // Clear out the bits in the wide element
+ auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
+
+ // The value to insert has all zeros already, so stick it into the masked
+ // wide element.
+ return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
+}
+
+/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
+/// is increasing the element size, perform the indexing in the target element
+/// type, and use bit operations to insert at the element position. This is
+/// intended for architectures that can dynamically index the register file and
+/// want to force indexing in the native register size.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register Val = MI.getOperand(2).getReg();
+ Register Idx = MI.getOperand(3).getReg();
+
+ LLT VecTy = MRI.getType(Dst);
+ LLT IdxTy = MRI.getType(Idx);
+
+ LLT VecEltTy = VecTy.getElementType();
+ LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
+ const unsigned NewEltSize = NewEltTy.getSizeInBits();
+ const unsigned OldEltSize = VecEltTy.getSizeInBits();
+
+ unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
+ unsigned OldNumElts = VecTy.getNumElements();
+
+ Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
+ if (NewNumElts < OldNumElts) {
+ if (NewEltSize % OldEltSize != 0)
+ return UnableToLegalize;
+
+ // This only depends on powers of 2 because we use bit tricks to figure out
+ // the bit offset we need to shift to get the target element. A general
+ // expansion could emit division/multiply.
+ if (!isPowerOf2_32(NewEltSize / OldEltSize))
+ return UnableToLegalize;
+
+ const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+ auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
+
+ // Divide to get the index in the wider element type.
+ auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
+
+ Register ExtractedElt = CastVec;
+ if (CastTy.isVector()) {
+ ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
+ ScaledIdx).getReg(0);
+ }
+
+ // Compute the bit offset into the register of the target element.
+ Register OffsetBits = getBitcastWiderVectorElementOffset(
+ MIRBuilder, Idx, NewEltSize, OldEltSize);
+
+ Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
+ Val, OffsetBits);
+ if (CastTy.isVector()) {
+ InsertedElt = MIRBuilder.buildInsertVectorElement(
+ CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
+ }
+
+ MIRBuilder.buildBitcast(Dst, InsertedElt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerLoad(MachineInstr &MI) {
+ // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
+ Register DstReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ auto &MMO = **MI.memoperands_begin();
+
+ if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
+ if (MI.getOpcode() == TargetOpcode::G_LOAD) {
+ // This load needs splitting into power of 2 sized loads.
+ if (DstTy.isVector())
+ return UnableToLegalize;
+ if (isPowerOf2_32(DstTy.getSizeInBits()))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ // Our strategy here is to generate anyextending loads for the smaller
+ // types up to next power-2 result type, and then combine the two larger
+ // result values together, before truncating back down to the non-pow-2
+ // type.
+ // E.g. v1 = i24 load =>
+ // v2 = i32 zextload (2 byte)
+ // v3 = i32 load (1 byte)
+ // v4 = i32 shl v3, 16
+ // v5 = i32 or v4, v2
+ // v1 = i24 trunc v5
+ // By doing this we generate the correct truncate which should get
+ // combined away as an artifact with a matching extend.
+ uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
+ uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
+ &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+
+ LLT PtrTy = MRI.getType(PtrReg);
+ unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
+ LLT AnyExtTy = LLT::scalar(AnyExtSize);
+ Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
+ Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
+ auto LargeLoad = MIRBuilder.buildLoadInstr(
+ TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
+
+ auto OffsetCst = MIRBuilder.buildConstant(
+ LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr =
+ MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
+ auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
+ *SmallMMO);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
+ auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
+ auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+ MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (DstTy.isScalar()) {
+ Register TmpReg =
+ MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
+ MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case TargetOpcode::G_LOAD:
+ MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
+ break;
+ case TargetOpcode::G_SEXTLOAD:
+ MIRBuilder.buildSExt(DstReg, TmpReg);
+ break;
+ case TargetOpcode::G_ZEXTLOAD:
+ MIRBuilder.buildZExt(DstReg, TmpReg);
+ break;
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerStore(MachineInstr &MI) {
+ // Lower a non-power of 2 store into multiple pow-2 stores.
+ // E.g. split an i24 store into an i16 store + i8 store.
+ // We do this by first extending the stored value to the next largest power
+ // of 2 type, and then using truncating stores to store the components.
+ // By doing this, likewise with G_LOAD, generate an extend that can be
+ // artifact-combined away instead of leaving behind extracts.
+ Register SrcReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ MachineMemOperand &MMO = **MI.memoperands_begin();
+ if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
+ return UnableToLegalize;
+ if (SrcTy.isVector())
+ return UnableToLegalize;
+ if (isPowerOf2_32(SrcTy.getSizeInBits()))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ // Extend to the next pow-2.
+ const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
+ auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
+
+ // Obtain the smaller value by shifting away the larger value.
+ uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
+ uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
+ auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
+ auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
+
+ // Generate the PtrAdd and truncating stores.
+ LLT PtrTy = MRI.getType(PtrReg);
+ auto OffsetCst = MIRBuilder.buildConstant(
+ LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr =
+ MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO =
+ MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+ MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
+ MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
switch (MI.getOpcode()) {
case TargetOpcode::G_LOAD: {
@@ -2833,24 +2833,24 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
Observer.changedInstr(MI);
return Legalized;
}
- case TargetOpcode::G_EXTRACT_VECTOR_ELT:
- return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
- case TargetOpcode::G_INSERT_VECTOR_ELT:
- return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
+ case TargetOpcode::G_INSERT_VECTOR_ELT:
+ return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
default:
return UnableToLegalize;
}
}
-// Legalize an instruction by changing the opcode in place.
-void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
- Observer.changingInstr(MI);
- MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
- Observer.changedInstr(MI);
-}
-
+// Legalize an instruction by changing the opcode in place.
+void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
+ Observer.changingInstr(MI);
+ MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
+ Observer.changedInstr(MI);
+}
+
LegalizerHelper::LegalizeResult
-LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
+LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
using namespace TargetOpcode;
switch(MI.getOpcode()) {
@@ -2860,7 +2860,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerBitcast(MI);
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM: {
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
auto Quot =
MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
{MI.getOperand(1), MI.getOperand(2)});
@@ -2873,9 +2873,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case TargetOpcode::G_SADDO:
case TargetOpcode::G_SSUBO:
return lowerSADDO_SSUBO(MI);
- case TargetOpcode::G_UMULH:
- case TargetOpcode::G_SMULH:
- return lowerSMULH_UMULH(MI);
+ case TargetOpcode::G_UMULH:
+ case TargetOpcode::G_SMULH:
+ return lowerSMULH_UMULH(MI);
case TargetOpcode::G_SMULO:
case TargetOpcode::G_UMULO: {
// Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
@@ -2884,7 +2884,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
Register Overflow = MI.getOperand(1).getReg();
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
- LLT Ty = MRI.getType(Res);
+ LLT Ty = MRI.getType(Res);
unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
? TargetOpcode::G_SMULH
@@ -2914,24 +2914,24 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case TargetOpcode::G_FNEG: {
- Register Res = MI.getOperand(0).getReg();
- LLT Ty = MRI.getType(Res);
-
+ Register Res = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Res);
+
// TODO: Handle vector types once we are able to
// represent them.
if (Ty.isVector())
return UnableToLegalize;
- auto SignMask =
- MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
+ auto SignMask =
+ MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
Register SubByReg = MI.getOperand(1).getReg();
- MIRBuilder.buildXor(Res, SubByReg, SignMask);
+ MIRBuilder.buildXor(Res, SubByReg, SignMask);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_FSUB: {
- Register Res = MI.getOperand(0).getReg();
- LLT Ty = MRI.getType(Res);
-
+ Register Res = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Res);
+
// Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
// First, check if G_FNEG is marked as Lower. If so, we may
// end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
@@ -2951,12 +2951,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerFFloor(MI);
case TargetOpcode::G_INTRINSIC_ROUND:
return lowerIntrinsicRound(MI);
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
- // Since round even is the assumed rounding mode for unconstrained FP
- // operations, rint and roundeven are the same operation.
- changeOpcode(MI, TargetOpcode::G_FRINT);
- return Legalized;
- }
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
+ // Since round even is the assumed rounding mode for unconstrained FP
+ // operations, rint and roundeven are the same operation.
+ changeOpcode(MI, TargetOpcode::G_FRINT);
+ return Legalized;
+ }
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
Register OldValRes = MI.getOperand(0).getReg();
Register SuccessRes = MI.getOperand(1).getReg();
@@ -2971,16 +2971,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
}
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
- case TargetOpcode::G_ZEXTLOAD:
- return lowerLoad(MI);
- case TargetOpcode::G_STORE:
- return lowerStore(MI);
+ case TargetOpcode::G_ZEXTLOAD:
+ return lowerLoad(MI);
+ case TargetOpcode::G_STORE:
+ return lowerStore(MI);
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTPOP:
- return lowerBitCount(MI);
+ return lowerBitCount(MI);
case G_UADDO: {
Register Res = MI.getOperand(0).getReg();
Register CarryOut = MI.getOperand(1).getReg();
@@ -3042,24 +3042,24 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case G_UITOFP:
- return lowerUITOFP(MI);
+ return lowerUITOFP(MI);
case G_SITOFP:
- return lowerSITOFP(MI);
+ return lowerSITOFP(MI);
case G_FPTOUI:
- return lowerFPTOUI(MI);
+ return lowerFPTOUI(MI);
case G_FPTOSI:
return lowerFPTOSI(MI);
case G_FPTRUNC:
- return lowerFPTRUNC(MI);
- case G_FPOWI:
- return lowerFPOWI(MI);
+ return lowerFPTRUNC(MI);
+ case G_FPOWI:
+ return lowerFPOWI(MI);
case G_SMIN:
case G_SMAX:
case G_UMIN:
case G_UMAX:
- return lowerMinMax(MI);
+ return lowerMinMax(MI);
case G_FCOPYSIGN:
- return lowerFCopySign(MI);
+ return lowerFCopySign(MI);
case G_FMINNUM:
case G_FMAXNUM:
return lowerFMinNumMaxNum(MI);
@@ -3082,9 +3082,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
MI.eraseFromParent();
return Legalized;
}
- case G_EXTRACT_VECTOR_ELT:
- case G_INSERT_VECTOR_ELT:
- return lowerExtractInsertVectorElt(MI);
+ case G_EXTRACT_VECTOR_ELT:
+ case G_INSERT_VECTOR_ELT:
+ return lowerExtractInsertVectorElt(MI);
case G_SHUFFLE_VECTOR:
return lowerShuffleVector(MI);
case G_DYN_STACKALLOC:
@@ -3100,123 +3100,123 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_READ_REGISTER:
case G_WRITE_REGISTER:
return lowerReadWriteRegister(MI);
- case G_UADDSAT:
- case G_USUBSAT: {
- // Try to make a reasonable guess about which lowering strategy to use. The
- // target can override this with custom lowering and calling the
- // implementation functions.
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- if (LI.isLegalOrCustom({G_UMIN, Ty}))
- return lowerAddSubSatToMinMax(MI);
- return lowerAddSubSatToAddoSubo(MI);
- }
- case G_SADDSAT:
- case G_SSUBSAT: {
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
-
- // FIXME: It would probably make more sense to see if G_SADDO is preferred,
- // since it's a shorter expansion. However, we would need to figure out the
- // preferred boolean type for the carry out for the query.
- if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
- return lowerAddSubSatToMinMax(MI);
- return lowerAddSubSatToAddoSubo(MI);
- }
- case G_SSHLSAT:
- case G_USHLSAT:
- return lowerShlSat(MI);
- case G_ABS: {
- // Expand %res = G_ABS %a into:
- // %v1 = G_ASHR %a, scalar_size-1
- // %v2 = G_ADD %a, %v1
- // %res = G_XOR %v2, %v1
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- Register OpReg = MI.getOperand(1).getReg();
- auto ShiftAmt =
- MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
- auto Shift =
- MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
- auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
- MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
- MI.eraseFromParent();
- return Legalized;
- }
- case G_SELECT:
- return lowerSelect(MI);
- }
-}
-
-Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
- Align MinAlign) const {
- // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
- // datalayout for the preferred alignment. Also there should be a target hook
- // for this to allow targets to reduce the alignment and ignore the
- // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
- // the type.
- return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
-}
-
-MachineInstrBuilder
-LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
- MachinePointerInfo &PtrInfo) {
- MachineFunction &MF = MIRBuilder.getMF();
- const DataLayout &DL = MIRBuilder.getDataLayout();
- int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
-
- unsigned AddrSpace = DL.getAllocaAddrSpace();
- LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
-
- PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
- return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
-}
-
-static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
- LLT VecTy) {
- int64_t IdxVal;
- if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
- return IdxReg;
-
- LLT IdxTy = B.getMRI()->getType(IdxReg);
- unsigned NElts = VecTy.getNumElements();
- if (isPowerOf2_32(NElts)) {
- APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
- return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
- }
-
- return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
- .getReg(0);
-}
-
-Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
- Register Index) {
- LLT EltTy = VecTy.getElementType();
-
- // Calculate the element offset and add it to the pointer.
- unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
- assert(EltSize * 8 == EltTy.getSizeInBits() &&
- "Converting bits to bytes lost precision");
-
- Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
-
- LLT IdxTy = MRI.getType(Index);
- auto Mul = MIRBuilder.buildMul(IdxTy, Index,
- MIRBuilder.buildConstant(IdxTy, EltSize));
-
- LLT PtrTy = MRI.getType(VecPtr);
- return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
+ case G_UADDSAT:
+ case G_USUBSAT: {
+ // Try to make a reasonable guess about which lowering strategy to use. The
+ // target can override this with custom lowering and calling the
+ // implementation functions.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (LI.isLegalOrCustom({G_UMIN, Ty}))
+ return lowerAddSubSatToMinMax(MI);
+ return lowerAddSubSatToAddoSubo(MI);
+ }
+ case G_SADDSAT:
+ case G_SSUBSAT: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+ // FIXME: It would probably make more sense to see if G_SADDO is preferred,
+ // since it's a shorter expansion. However, we would need to figure out the
+ // preferred boolean type for the carry out for the query.
+ if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
+ return lowerAddSubSatToMinMax(MI);
+ return lowerAddSubSatToAddoSubo(MI);
+ }
+ case G_SSHLSAT:
+ case G_USHLSAT:
+ return lowerShlSat(MI);
+ case G_ABS: {
+ // Expand %res = G_ABS %a into:
+ // %v1 = G_ASHR %a, scalar_size-1
+ // %v2 = G_ADD %a, %v1
+ // %res = G_XOR %v2, %v1
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register OpReg = MI.getOperand(1).getReg();
+ auto ShiftAmt =
+ MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
+ auto Shift =
+ MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
+ auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
+ MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case G_SELECT:
+ return lowerSelect(MI);
+ }
}
+Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
+ Align MinAlign) const {
+ // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
+ // datalayout for the preferred alignment. Also there should be a target hook
+ // for this to allow targets to reduce the alignment and ignore the
+ // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
+ // the type.
+ return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
+}
+
+MachineInstrBuilder
+LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
+ MachinePointerInfo &PtrInfo) {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
+
+ unsigned AddrSpace = DL.getAllocaAddrSpace();
+ LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+
+ PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
+ return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
+}
+
+static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
+ LLT VecTy) {
+ int64_t IdxVal;
+ if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
+ return IdxReg;
+
+ LLT IdxTy = B.getMRI()->getType(IdxReg);
+ unsigned NElts = VecTy.getNumElements();
+ if (isPowerOf2_32(NElts)) {
+ APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
+ return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
+ }
+
+ return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
+ .getReg(0);
+}
+
+Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
+ Register Index) {
+ LLT EltTy = VecTy.getElementType();
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
+ assert(EltSize * 8 == EltTy.getSizeInBits() &&
+ "Converting bits to bytes lost precision");
+
+ Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
+
+ LLT IdxTy = MRI.getType(Index);
+ auto Mul = MIRBuilder.buildMul(IdxTy, Index,
+ MIRBuilder.buildConstant(IdxTy, EltSize));
+
+ LLT PtrTy = MRI.getType(VecPtr);
+ return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
+}
+
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT LCMTy = getLCMType(DstTy, NarrowTy);
+ LLT DstTy = MRI.getType(DstReg);
+ LLT LCMTy = getLCMType(DstTy, NarrowTy);
- unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
+ unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
- auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
- SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
+ auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
+ SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
- buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
+ buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
MI.eraseFromParent();
return Legalized;
}
@@ -3337,7 +3337,7 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
return UnableToLegalize;
- NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
+ NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
} else {
NumParts = DstTy.getNumElements();
NarrowTy1 = SrcTy.getElementType();
@@ -3610,116 +3610,116 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
return Legalized;
}
-// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
-// a vector
-//
-// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
-// undef as necessary.
-//
-// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
-// -> <2 x s16>
-//
-// %4:_(s16) = G_IMPLICIT_DEF
-// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
-// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
-// %7:_(<2 x s16>) = G_IMPLICIT_DEF
-// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
-// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
+// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
+// a vector
+//
+// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
+// undef as necessary.
+//
+// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
+// -> <2 x s16>
+//
+// %4:_(s16) = G_IMPLICIT_DEF
+// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
+// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
+// %7:_(<2 x s16>) = G_IMPLICIT_DEF
+// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
+// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
+LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
- LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
-
- // Break into a common type
- SmallVector<Register, 16> Parts;
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
- extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
-
- // Build the requested new merge, padding with undef.
- LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
- TargetOpcode::G_ANYEXT);
-
- // Pack into the original result register.
- buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
- unsigned TypeIdx,
- LLT NarrowVecTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register InsertVal;
- bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
-
- assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
- if (IsInsert)
- InsertVal = MI.getOperand(2).getReg();
-
- Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
-
- // TODO: Handle total scalarization case.
- if (!NarrowVecTy.isVector())
- return UnableToLegalize;
-
- LLT VecTy = MRI.getType(SrcVec);
-
- // If the index is a constant, we can really break this down as you would
- // expect, and index into the target size pieces.
- int64_t IdxVal;
- if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
- // Avoid out of bounds indexing the pieces.
- if (IdxVal >= VecTy.getNumElements()) {
- MIRBuilder.buildUndef(DstReg);
- MI.eraseFromParent();
- return Legalized;
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
+
+ // Break into a common type
+ SmallVector<Register, 16> Parts;
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
+ extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
+
+ // Build the requested new merge, padding with undef.
+ LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
+ TargetOpcode::G_ANYEXT);
+
+ // Pack into the original result register.
+ buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowVecTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register InsertVal;
+ bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
+
+ assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
+ if (IsInsert)
+ InsertVal = MI.getOperand(2).getReg();
+
+ Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
+
+ // TODO: Handle total scalarization case.
+ if (!NarrowVecTy.isVector())
+ return UnableToLegalize;
+
+ LLT VecTy = MRI.getType(SrcVec);
+
+ // If the index is a constant, we can really break this down as you would
+ // expect, and index into the target size pieces.
+ int64_t IdxVal;
+ if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
+ // Avoid out of bounds indexing the pieces.
+ if (IdxVal >= VecTy.getNumElements()) {
+ MIRBuilder.buildUndef(DstReg);
+ MI.eraseFromParent();
+ return Legalized;
}
- SmallVector<Register, 8> VecParts;
- LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
-
- // Build a sequence of NarrowTy pieces in VecParts for this operand.
- LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
- TargetOpcode::G_ANYEXT);
-
- unsigned NewNumElts = NarrowVecTy.getNumElements();
-
- LLT IdxTy = MRI.getType(Idx);
- int64_t PartIdx = IdxVal / NewNumElts;
- auto NewIdx =
- MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
-
- if (IsInsert) {
- LLT PartTy = MRI.getType(VecParts[PartIdx]);
-
- // Use the adjusted index to insert into one of the subvectors.
- auto InsertPart = MIRBuilder.buildInsertVectorElement(
- PartTy, VecParts[PartIdx], InsertVal, NewIdx);
- VecParts[PartIdx] = InsertPart.getReg(0);
-
- // Recombine the inserted subvector with the others to reform the result
- // vector.
- buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
- } else {
- MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
- }
-
- MI.eraseFromParent();
- return Legalized;
- }
-
- // With a variable index, we can't perform the operation in a smaller type, so
- // we're forced to expand this.
- //
- // TODO: We could emit a chain of compare/select to figure out which piece to
- // index.
- return lowerExtractInsertVectorElt(MI);
+ SmallVector<Register, 8> VecParts;
+ LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
+
+ // Build a sequence of NarrowTy pieces in VecParts for this operand.
+ LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
+ TargetOpcode::G_ANYEXT);
+
+ unsigned NewNumElts = NarrowVecTy.getNumElements();
+
+ LLT IdxTy = MRI.getType(Idx);
+ int64_t PartIdx = IdxVal / NewNumElts;
+ auto NewIdx =
+ MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
+
+ if (IsInsert) {
+ LLT PartTy = MRI.getType(VecParts[PartIdx]);
+
+ // Use the adjusted index to insert into one of the subvectors.
+ auto InsertPart = MIRBuilder.buildInsertVectorElement(
+ PartTy, VecParts[PartIdx], InsertVal, NewIdx);
+ VecParts[PartIdx] = InsertPart.getReg(0);
+
+ // Recombine the inserted subvector with the others to reform the result
+ // vector.
+ buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
+ } else {
+ MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // With a variable index, we can't perform the operation in a smaller type, so
+ // we're forced to expand this.
+ //
+ // TODO: We could emit a chain of compare/select to figure out which piece to
+ // index.
+ return lowerExtractInsertVectorElt(MI);
}
LegalizerHelper::LegalizeResult
@@ -3765,8 +3765,8 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
if (NumParts == -1)
return UnableToLegalize;
- LLT PtrTy = MRI.getType(AddrReg);
- const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
+ LLT PtrTy = MRI.getType(AddrReg);
+ const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
unsigned TotalSize = ValTy.getSizeInBits();
@@ -3964,7 +3964,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_ADD:
case G_SUB:
case G_MUL:
- case G_PTR_ADD:
+ case G_PTR_ADD:
case G_SMULH:
case G_UMULH:
case G_FADD:
@@ -3988,7 +3988,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FFLOOR:
case G_FRINT:
case G_INTRINSIC_ROUND:
- case G_INTRINSIC_ROUNDEVEN:
+ case G_INTRINSIC_ROUNDEVEN:
case G_INTRINSIC_TRUNC:
case G_FCOS:
case G_FSIN:
@@ -4020,8 +4020,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_SHL:
case G_LSHR:
case G_ASHR:
- case G_SSHLSAT:
- case G_USHLSAT:
+ case G_SSHLSAT:
+ case G_USHLSAT:
case G_CTLZ:
case G_CTLZ_ZERO_UNDEF:
case G_CTTZ:
@@ -4052,15 +4052,15 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_UNMERGE_VALUES:
return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
case G_BUILD_VECTOR:
- assert(TypeIdx == 0 && "not a vector type index");
- return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
- case G_CONCAT_VECTORS:
- if (TypeIdx != 1) // TODO: This probably does work as expected already.
- return UnableToLegalize;
- return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
- case G_EXTRACT_VECTOR_ELT:
- case G_INSERT_VECTOR_ELT:
- return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
+ assert(TypeIdx == 0 && "not a vector type index");
+ return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
+ case G_CONCAT_VECTORS:
+ if (TypeIdx != 1) // TODO: This probably does work as expected already.
+ return UnableToLegalize;
+ return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
+ case G_EXTRACT_VECTOR_ELT:
+ case G_INSERT_VECTOR_ELT:
+ return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
case G_LOAD:
case G_STORE:
return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
@@ -4484,31 +4484,31 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
-
- bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
-
- Register Src = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(Src);
-
- // If all finite floats fit into the narrowed integer type, we can just swap
- // out the result type. This is practically only useful for conversions from
- // half to at least 16-bits, so just handle the one case.
- if (SrcTy.getScalarType() != LLT::scalar(16) ||
- NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16))
- return UnableToLegalize;
-
- Observer.changingInstr(MI);
- narrowScalarDst(MI, NarrowTy, 0,
- IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
- Observer.changedInstr(MI);
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
+
+ Register Src = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(Src);
+
+ // If all finite floats fit into the narrowed integer type, we can just swap
+ // out the result type. This is practically only useful for conversions from
+ // half to at least 16-bits, so just handle the one case.
+ if (SrcTy.getScalarType() != LLT::scalar(16) ||
+ NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16))
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0,
+ IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 1)
@@ -4857,9 +4857,9 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerBitCount(MachineInstr &MI) {
+LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
- const auto &TII = MIRBuilder.getTII();
+ const auto &TII = MIRBuilder.getTII();
auto isSupported = [this](const LegalityQuery &Q) {
auto QAction = LI.getAction(Q).Action;
return QAction == Legal || QAction == Libcall || QAction == Custom;
@@ -4947,15 +4947,15 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
// unless the target has ctlz but not ctpop, in which case we use:
// { return 32 - nlz(~x & (x-1)); }
// Ref: "Hacker's Delight" by Henry Warren
- auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
- auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
+ auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
+ auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
auto MIBTmp = MIRBuilder.buildAnd(
- SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
- if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
- isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
- auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
+ SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
+ if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
+ isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
+ auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
- MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
+ MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
MI.eraseFromParent();
return Legalized;
}
@@ -4964,8 +4964,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
return Legalized;
}
case TargetOpcode::G_CTPOP: {
- Register SrcReg = MI.getOperand(1).getReg();
- LLT Ty = MRI.getType(SrcReg);
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(SrcReg);
unsigned Size = Ty.getSizeInBits();
MachineIRBuilder &B = MIRBuilder;
@@ -4975,11 +4975,11 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
// B2Count = val - { (val >> 1) & 0x55555555 }
// since it gives same result in blocks of 2 with one instruction less.
auto C_1 = B.buildConstant(Ty, 1);
- auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
+ auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
- auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
+ auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
// In order to get count in blocks of 4 add values from adjacent block of 2.
// B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
@@ -5078,7 +5078,7 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
return Legalized;
}
-LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -5106,7 +5106,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
return UnableToLegalize;
}
-LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -5152,7 +5152,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
return UnableToLegalize;
}
-LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -5369,7 +5369,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
+LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
@@ -5384,20 +5384,20 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
return UnableToLegalize;
}
-// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
-// multiplication tree.
-LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
- LLT Ty = MRI.getType(Dst);
-
- auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
- MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
- MI.eraseFromParent();
- return Legalized;
-}
-
+// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
+// multiplication tree.
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Dst);
+
+ auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
+ MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
+ MI.eraseFromParent();
+ return Legalized;
+}
+
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_SMIN:
@@ -5413,7 +5413,7 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
}
}
-LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src0 = MI.getOperand(1).getReg();
Register Src1 = MI.getOperand(2).getReg();
@@ -5429,7 +5429,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
+LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src0 = MI.getOperand(1).getReg();
Register Src1 = MI.getOperand(2).getReg();
@@ -5651,72 +5651,72 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
return Legalized;
}
-/// Lower a vector extract or insert by writing the vector to a stack temporary
-/// and reloading the element or vector.
-///
-/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
-/// =>
-/// %stack_temp = G_FRAME_INDEX
-/// G_STORE %vec, %stack_temp
-/// %idx = clamp(%idx, %vec.getNumElements())
-/// %element_ptr = G_PTR_ADD %stack_temp, %idx
-/// %dst = G_LOAD %element_ptr
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register InsertVal;
- if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
- InsertVal = MI.getOperand(2).getReg();
-
- Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
-
- LLT VecTy = MRI.getType(SrcVec);
- LLT EltTy = VecTy.getElementType();
- if (!EltTy.isByteSized()) { // Not implemented.
- LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
- return UnableToLegalize;
- }
-
- unsigned EltBytes = EltTy.getSizeInBytes();
- Align VecAlign = getStackTemporaryAlignment(VecTy);
- Align EltAlign;
-
- MachinePointerInfo PtrInfo;
- auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
- VecAlign, PtrInfo);
- MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
-
- // Get the pointer to the element, and be sure not to hit undefined behavior
- // if the index is out of bounds.
- Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
-
- int64_t IdxVal;
- if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
- int64_t Offset = IdxVal * EltBytes;
- PtrInfo = PtrInfo.getWithOffset(Offset);
- EltAlign = commonAlignment(VecAlign, Offset);
- } else {
- // We lose information with a variable offset.
- EltAlign = getStackTemporaryAlignment(EltTy);
- PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
- }
-
- if (InsertVal) {
- // Write the inserted element
- MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
-
- // Reload the whole vector.
- MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
- } else {
- MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
- }
-
- MI.eraseFromParent();
- return Legalized;
-}
-
+/// Lower a vector extract or insert by writing the vector to a stack temporary
+/// and reloading the element or vector.
+///
+/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
+/// =>
+/// %stack_temp = G_FRAME_INDEX
+/// G_STORE %vec, %stack_temp
+/// %idx = clamp(%idx, %vec.getNumElements())
+/// %element_ptr = G_PTR_ADD %stack_temp, %idx
+/// %dst = G_LOAD %element_ptr
LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register InsertVal;
+ if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
+ InsertVal = MI.getOperand(2).getReg();
+
+ Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
+
+ LLT VecTy = MRI.getType(SrcVec);
+ LLT EltTy = VecTy.getElementType();
+ if (!EltTy.isByteSized()) { // Not implemented.
+ LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
+ return UnableToLegalize;
+ }
+
+ unsigned EltBytes = EltTy.getSizeInBytes();
+ Align VecAlign = getStackTemporaryAlignment(VecTy);
+ Align EltAlign;
+
+ MachinePointerInfo PtrInfo;
+ auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
+ VecAlign, PtrInfo);
+ MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
+
+ // Get the pointer to the element, and be sure not to hit undefined behavior
+ // if the index is out of bounds.
+ Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
+
+ int64_t IdxVal;
+ if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
+ int64_t Offset = IdxVal * EltBytes;
+ PtrInfo = PtrInfo.getWithOffset(Offset);
+ EltAlign = commonAlignment(VecAlign, Offset);
+ } else {
+ // We lose information with a variable offset.
+ EltAlign = getStackTemporaryAlignment(EltTy);
+ PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
+ }
+
+ if (InsertVal) {
+ // Write the inserted element
+ MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
+
+ // Reload the whole vector.
+ MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
+ } else {
+ MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
Register Src0Reg = MI.getOperand(1).getReg();
@@ -5931,185 +5931,185 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
- LLT Ty = MRI.getType(Res);
- bool IsSigned;
- bool IsAdd;
- unsigned BaseOp;
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("unexpected addsat/subsat opcode");
- case TargetOpcode::G_UADDSAT:
- IsSigned = false;
- IsAdd = true;
- BaseOp = TargetOpcode::G_ADD;
- break;
- case TargetOpcode::G_SADDSAT:
- IsSigned = true;
- IsAdd = true;
- BaseOp = TargetOpcode::G_ADD;
- break;
- case TargetOpcode::G_USUBSAT:
- IsSigned = false;
- IsAdd = false;
- BaseOp = TargetOpcode::G_SUB;
- break;
- case TargetOpcode::G_SSUBSAT:
- IsSigned = true;
- IsAdd = false;
- BaseOp = TargetOpcode::G_SUB;
- break;
- }
-
- if (IsSigned) {
- // sadd.sat(a, b) ->
- // hi = 0x7fffffff - smax(a, 0)
- // lo = 0x80000000 - smin(a, 0)
- // a + smin(smax(lo, b), hi)
- // ssub.sat(a, b) ->
- // lo = smax(a, -1) - 0x7fffffff
- // hi = smin(a, -1) - 0x80000000
- // a - smin(smax(lo, b), hi)
- // TODO: AMDGPU can use a "median of 3" instruction here:
- // a +/- med3(lo, b, hi)
- uint64_t NumBits = Ty.getScalarSizeInBits();
- auto MaxVal =
- MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
- auto MinVal =
- MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
- MachineInstrBuilder Hi, Lo;
- if (IsAdd) {
- auto Zero = MIRBuilder.buildConstant(Ty, 0);
- Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
- Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
- } else {
- auto NegOne = MIRBuilder.buildConstant(Ty, -1);
- Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
- MaxVal);
- Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
- MinVal);
- }
- auto RHSClamped =
- MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
- MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
- } else {
- // uadd.sat(a, b) -> a + umin(~a, b)
- // usub.sat(a, b) -> a - umin(a, b)
- Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
- auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
- MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
- }
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
- LLT Ty = MRI.getType(Res);
- LLT BoolTy = Ty.changeElementSize(1);
- bool IsSigned;
- bool IsAdd;
- unsigned OverflowOp;
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("unexpected addsat/subsat opcode");
- case TargetOpcode::G_UADDSAT:
- IsSigned = false;
- IsAdd = true;
- OverflowOp = TargetOpcode::G_UADDO;
- break;
- case TargetOpcode::G_SADDSAT:
- IsSigned = true;
- IsAdd = true;
- OverflowOp = TargetOpcode::G_SADDO;
- break;
- case TargetOpcode::G_USUBSAT:
- IsSigned = false;
- IsAdd = false;
- OverflowOp = TargetOpcode::G_USUBO;
- break;
- case TargetOpcode::G_SSUBSAT:
- IsSigned = true;
- IsAdd = false;
- OverflowOp = TargetOpcode::G_SSUBO;
- break;
- }
-
- auto OverflowRes =
- MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
- Register Tmp = OverflowRes.getReg(0);
- Register Ov = OverflowRes.getReg(1);
- MachineInstrBuilder Clamp;
- if (IsSigned) {
- // sadd.sat(a, b) ->
- // {tmp, ov} = saddo(a, b)
- // ov ? (tmp >>s 31) + 0x80000000 : r
- // ssub.sat(a, b) ->
- // {tmp, ov} = ssubo(a, b)
- // ov ? (tmp >>s 31) + 0x80000000 : r
- uint64_t NumBits = Ty.getScalarSizeInBits();
- auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
- auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
- auto MinVal =
- MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
- Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
- } else {
- // uadd.sat(a, b) ->
- // {tmp, ov} = uaddo(a, b)
- // ov ? 0xffffffff : tmp
- // usub.sat(a, b) ->
- // {tmp, ov} = usubo(a, b)
- // ov ? 0 : tmp
- Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
- }
- MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerShlSat(MachineInstr &MI) {
- assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
- MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
- "Expected shlsat opcode!");
- bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
- LLT Ty = MRI.getType(Res);
- LLT BoolTy = Ty.changeElementSize(1);
-
- unsigned BW = Ty.getScalarSizeInBits();
- auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
- auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
- : MIRBuilder.buildLShr(Ty, Result, RHS);
-
- MachineInstrBuilder SatVal;
- if (IsSigned) {
- auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
- auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
- auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
- MIRBuilder.buildConstant(Ty, 0));
- SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
- } else {
- SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
- }
- auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
- MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Res);
+ bool IsSigned;
+ bool IsAdd;
+ unsigned BaseOp;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected addsat/subsat opcode");
+ case TargetOpcode::G_UADDSAT:
+ IsSigned = false;
+ IsAdd = true;
+ BaseOp = TargetOpcode::G_ADD;
+ break;
+ case TargetOpcode::G_SADDSAT:
+ IsSigned = true;
+ IsAdd = true;
+ BaseOp = TargetOpcode::G_ADD;
+ break;
+ case TargetOpcode::G_USUBSAT:
+ IsSigned = false;
+ IsAdd = false;
+ BaseOp = TargetOpcode::G_SUB;
+ break;
+ case TargetOpcode::G_SSUBSAT:
+ IsSigned = true;
+ IsAdd = false;
+ BaseOp = TargetOpcode::G_SUB;
+ break;
+ }
+
+ if (IsSigned) {
+ // sadd.sat(a, b) ->
+ // hi = 0x7fffffff - smax(a, 0)
+ // lo = 0x80000000 - smin(a, 0)
+ // a + smin(smax(lo, b), hi)
+ // ssub.sat(a, b) ->
+ // lo = smax(a, -1) - 0x7fffffff
+ // hi = smin(a, -1) - 0x80000000
+ // a - smin(smax(lo, b), hi)
+ // TODO: AMDGPU can use a "median of 3" instruction here:
+ // a +/- med3(lo, b, hi)
+ uint64_t NumBits = Ty.getScalarSizeInBits();
+ auto MaxVal =
+ MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
+ auto MinVal =
+ MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
+ MachineInstrBuilder Hi, Lo;
+ if (IsAdd) {
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+ Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
+ Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
+ } else {
+ auto NegOne = MIRBuilder.buildConstant(Ty, -1);
+ Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
+ MaxVal);
+ Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
+ MinVal);
+ }
+ auto RHSClamped =
+ MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
+ MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
+ } else {
+ // uadd.sat(a, b) -> a + umin(~a, b)
+ // usub.sat(a, b) -> a - umin(a, b)
+ Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
+ auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
+ MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Res);
+ LLT BoolTy = Ty.changeElementSize(1);
+ bool IsSigned;
+ bool IsAdd;
+ unsigned OverflowOp;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected addsat/subsat opcode");
+ case TargetOpcode::G_UADDSAT:
+ IsSigned = false;
+ IsAdd = true;
+ OverflowOp = TargetOpcode::G_UADDO;
+ break;
+ case TargetOpcode::G_SADDSAT:
+ IsSigned = true;
+ IsAdd = true;
+ OverflowOp = TargetOpcode::G_SADDO;
+ break;
+ case TargetOpcode::G_USUBSAT:
+ IsSigned = false;
+ IsAdd = false;
+ OverflowOp = TargetOpcode::G_USUBO;
+ break;
+ case TargetOpcode::G_SSUBSAT:
+ IsSigned = true;
+ IsAdd = false;
+ OverflowOp = TargetOpcode::G_SSUBO;
+ break;
+ }
+
+ auto OverflowRes =
+ MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
+ Register Tmp = OverflowRes.getReg(0);
+ Register Ov = OverflowRes.getReg(1);
+ MachineInstrBuilder Clamp;
+ if (IsSigned) {
+ // sadd.sat(a, b) ->
+ // {tmp, ov} = saddo(a, b)
+ // ov ? (tmp >>s 31) + 0x80000000 : r
+ // ssub.sat(a, b) ->
+ // {tmp, ov} = ssubo(a, b)
+ // ov ? (tmp >>s 31) + 0x80000000 : r
+ uint64_t NumBits = Ty.getScalarSizeInBits();
+ auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
+ auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
+ auto MinVal =
+ MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
+ Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
+ } else {
+ // uadd.sat(a, b) ->
+ // {tmp, ov} = uaddo(a, b)
+ // ov ? 0xffffffff : tmp
+ // usub.sat(a, b) ->
+ // {tmp, ov} = usubo(a, b)
+ // ov ? 0 : tmp
+ Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
+ }
+ MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerShlSat(MachineInstr &MI) {
+ assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
+ MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
+ "Expected shlsat opcode!");
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Res);
+ LLT BoolTy = Ty.changeElementSize(1);
+
+ unsigned BW = Ty.getScalarSizeInBits();
+ auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
+ auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
+ : MIRBuilder.buildLShr(Ty, Result, RHS);
+
+ MachineInstrBuilder SatVal;
+ if (IsSigned) {
+ auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
+ auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
+ auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
+ MIRBuilder.buildConstant(Ty, 0));
+ SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
+ } else {
+ SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
+ }
+ auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
+ MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBswap(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
@@ -6199,7 +6199,7 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
const MDString *RegStr = cast<MDString>(
cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
- Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
+ Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
if (!PhysReg.isValid())
return UnableToLegalize;
@@ -6211,63 +6211,63 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
- bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
- unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
- Register Result = MI.getOperand(0).getReg();
- LLT OrigTy = MRI.getType(Result);
- auto SizeInBits = OrigTy.getScalarSizeInBits();
- LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
-
- auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
- auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
- auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
- unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
-
- auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
- auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
- MIRBuilder.buildTrunc(Result, Shifted);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
- // Implement vector G_SELECT in terms of XOR, AND, OR.
- Register DstReg = MI.getOperand(0).getReg();
- Register MaskReg = MI.getOperand(1).getReg();
- Register Op1Reg = MI.getOperand(2).getReg();
- Register Op2Reg = MI.getOperand(3).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT MaskTy = MRI.getType(MaskReg);
- LLT Op1Ty = MRI.getType(Op1Reg);
- if (!DstTy.isVector())
- return UnableToLegalize;
-
- // Vector selects can have a scalar predicate. If so, splat into a vector and
- // finish for later legalization attempts to try again.
- if (MaskTy.isScalar()) {
- Register MaskElt = MaskReg;
- if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
- MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
- // Generate a vector splat idiom to be pattern matched later.
- auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(ShufSplat.getReg(0));
- Observer.changedInstr(MI);
- return Legalized;
- }
-
- if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
- return UnableToLegalize;
- }
-
- auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
- auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
- auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
- MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
- MI.eraseFromParent();
- return Legalized;
-}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
+ unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+ Register Result = MI.getOperand(0).getReg();
+ LLT OrigTy = MRI.getType(Result);
+ auto SizeInBits = OrigTy.getScalarSizeInBits();
+ LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
+
+ auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
+ auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
+ auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
+ unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
+
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
+ auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
+ MIRBuilder.buildTrunc(Result, Shifted);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
+ // Implement vector G_SELECT in terms of XOR, AND, OR.
+ Register DstReg = MI.getOperand(0).getReg();
+ Register MaskReg = MI.getOperand(1).getReg();
+ Register Op1Reg = MI.getOperand(2).getReg();
+ Register Op2Reg = MI.getOperand(3).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT MaskTy = MRI.getType(MaskReg);
+ LLT Op1Ty = MRI.getType(Op1Reg);
+ if (!DstTy.isVector())
+ return UnableToLegalize;
+
+ // Vector selects can have a scalar predicate. If so, splat into a vector and
+ // finish for later legalization attempts to try again.
+ if (MaskTy.isScalar()) {
+ Register MaskElt = MaskReg;
+ if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
+ MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
+ // Generate a vector splat idiom to be pattern matched later.
+ auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(ShufSplat.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
+ return UnableToLegalize;
+ }
+
+ auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
+ auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
+ auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
+ MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
+ MI.eraseFromParent();
+ return Legalized;
+}
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 30acac14bc..0a5cb26325 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -105,7 +105,7 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q,
const std::pair<unsigned, LLT> &Mutation) {
switch (Rule.getAction()) {
- case Legal:
+ case Legal:
case Custom:
case Lower:
case MoreElements:
@@ -123,7 +123,7 @@ static bool mutationIsSane(const LegalizeRule &Rule,
std::pair<unsigned, LLT> Mutation) {
// If the user wants a custom mutation, then we can't really say much about
// it. Return true, and trust that they're doing the right thing.
- if (Rule.getAction() == Custom || Rule.getAction() == Legal)
+ if (Rule.getAction() == Custom || Rule.getAction() == Legal)
return true;
const unsigned TypeIdx = Mutation.first;
@@ -148,8 +148,8 @@ static bool mutationIsSane(const LegalizeRule &Rule,
if (NewTy.getNumElements() <= OldElts)
return false;
}
- } else if (Rule.getAction() == MoreElements)
- return false;
+ } else if (Rule.getAction() == MoreElements)
+ return false;
// Make sure the element type didn't change.
return NewTy.getScalarType() == OldTy.getScalarType();
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Localizer.cpp
index 30c00c63f6..66cff18e91 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -11,7 +11,7 @@
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -57,20 +57,20 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
return InsertMBB == Def.getParent();
}
-bool Localizer::isNonUniquePhiValue(MachineOperand &Op) const {
- MachineInstr *MI = Op.getParent();
- if (!MI->isPHI())
- return false;
-
- Register SrcReg = Op.getReg();
- for (unsigned Idx = 1; Idx < MI->getNumOperands(); Idx += 2) {
- auto &MO = MI->getOperand(Idx);
- if (&MO != &Op && MO.isReg() && MO.getReg() == SrcReg)
- return true;
- }
- return false;
-}
-
+bool Localizer::isNonUniquePhiValue(MachineOperand &Op) const {
+ MachineInstr *MI = Op.getParent();
+ if (!MI->isPHI())
+ return false;
+
+ Register SrcReg = Op.getReg();
+ for (unsigned Idx = 1; Idx < MI->getNumOperands(); Idx += 2) {
+ auto &MO = MI->getOperand(Idx);
+ if (&MO != &Op && MO.isReg() && MO.getReg() == SrcReg)
+ return true;
+ }
+ return false;
+}
+
bool Localizer::localizeInterBlock(MachineFunction &MF,
LocalizedSetVecT &LocalizedInstrs) {
bool Changed = false;
@@ -108,14 +108,14 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
LocalizedInstrs.insert(&MI);
continue;
}
-
- // If the use is a phi operand that's not unique, don't try to localize.
- // If we do, we can cause unnecessary instruction bloat by duplicating
- // into each predecessor block, when the existing one is sufficient and
- // allows for easier optimization later.
- if (isNonUniquePhiValue(MOUse))
- continue;
-
+
+ // If the use is a phi operand that's not unique, don't try to localize.
+ // If we do, we can cause unnecessary instruction bloat by duplicating
+ // into each predecessor block, when the existing one is sufficient and
+ // allows for easier optimization later.
+ if (isNonUniquePhiValue(MOUse))
+ continue;
+
LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
Changed = true;
auto MBBAndReg = std::make_pair(InsertMBB, Reg);
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 67ef02a4e7..549bb1a13c 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -9,7 +9,7 @@
/// This file implements the MachineIRBuidler class.
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -106,8 +106,8 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
} else if (auto *CFP = dyn_cast<ConstantFP>(&C)) {
MIB.addFPImm(CFP);
} else {
- // Insert $noreg if we didn't find a usable constant and had to drop it.
- MIB.addReg(Register());
+ // Insert $noreg if we didn't find a usable constant and had to drop it.
+ MIB.addReg(Register());
}
MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
@@ -162,11 +162,11 @@ MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy,
.addJumpTableIndex(JTI);
}
-void MachineIRBuilder::validateUnaryOp(const LLT Res, const LLT Op0) {
- assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
- assert((Res == Op0) && "type mismatch");
-}
-
+void MachineIRBuilder::validateUnaryOp(const LLT Res, const LLT Op0) {
+ assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+ assert((Res == Op0) && "type mismatch");
+}
+
void MachineIRBuilder::validateBinaryOp(const LLT Res, const LLT Op0,
const LLT Op1) {
assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
@@ -317,29 +317,29 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
return buildFConstant(Res, *CFP);
}
-MachineInstrBuilder MachineIRBuilder::buildBrCond(const SrcOp &Tst,
+MachineInstrBuilder MachineIRBuilder::buildBrCond(const SrcOp &Tst,
MachineBasicBlock &Dest) {
- assert(Tst.getLLTTy(*getMRI()).isScalar() && "invalid operand type");
+ assert(Tst.getLLTTy(*getMRI()).isScalar() && "invalid operand type");
- auto MIB = buildInstr(TargetOpcode::G_BRCOND);
- Tst.addSrcToMIB(MIB);
- MIB.addMBB(&Dest);
- return MIB;
+ auto MIB = buildInstr(TargetOpcode::G_BRCOND);
+ Tst.addSrcToMIB(MIB);
+ MIB.addMBB(&Dest);
+ return MIB;
}
-MachineInstrBuilder
-MachineIRBuilder::buildLoad(const DstOp &Dst, const SrcOp &Addr,
- MachinePointerInfo PtrInfo, Align Alignment,
- MachineMemOperand::Flags MMOFlags,
- const AAMDNodes &AAInfo) {
- MMOFlags |= MachineMemOperand::MOLoad;
- assert((MMOFlags & MachineMemOperand::MOStore) == 0);
-
- uint64_t Size = MemoryLocation::getSizeOrUnknown(
- TypeSize::Fixed(Dst.getLLTTy(*getMRI()).getSizeInBytes()));
- MachineMemOperand *MMO =
- getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
- return buildLoad(Dst, Addr, *MMO);
+MachineInstrBuilder
+MachineIRBuilder::buildLoad(const DstOp &Dst, const SrcOp &Addr,
+ MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(
+ TypeSize::Fixed(Dst.getLLTTy(*getMRI()).getSizeInBytes()));
+ MachineMemOperand *MMO =
+ getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+ return buildLoad(Dst, Addr, *MMO);
}
MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
@@ -386,21 +386,21 @@ MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val,
return MIB;
}
-MachineInstrBuilder
-MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr,
- MachinePointerInfo PtrInfo, Align Alignment,
- MachineMemOperand::Flags MMOFlags,
- const AAMDNodes &AAInfo) {
- MMOFlags |= MachineMemOperand::MOStore;
- assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
-
- uint64_t Size = MemoryLocation::getSizeOrUnknown(
- TypeSize::Fixed(Val.getLLTTy(*getMRI()).getSizeInBytes()));
- MachineMemOperand *MMO =
- getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
- return buildStore(Val, Addr, *MMO);
-}
-
+MachineInstrBuilder
+MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr,
+ MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(
+ TypeSize::Fixed(Val.getLLTTy(*getMRI()).getSizeInBytes()));
+ MachineMemOperand *MMO =
+ getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+ return buildStore(Val, Addr, *MMO);
+}
+
MachineInstrBuilder MachineIRBuilder::buildAnyExt(const DstOp &Res,
const SrcOp &Op) {
return buildInstr(TargetOpcode::G_ANYEXT, Res, Op);
@@ -635,35 +635,35 @@ MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec);
}
-MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res,
- const SrcOp &Src) {
- LLT DstTy = Res.getLLTTy(*getMRI());
- assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() &&
- "Expected Src to match Dst elt ty");
- auto UndefVec = buildUndef(DstTy);
- auto Zero = buildConstant(LLT::scalar(64), 0);
- auto InsElt = buildInsertVectorElement(DstTy, UndefVec, Src, Zero);
- SmallVector<int, 16> ZeroMask(DstTy.getNumElements());
- return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask);
-}
-
-MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
- const SrcOp &Src1,
- const SrcOp &Src2,
- ArrayRef<int> Mask) {
- LLT DstTy = Res.getLLTTy(*getMRI());
- LLT Src1Ty = Src1.getLLTTy(*getMRI());
- LLT Src2Ty = Src2.getLLTTy(*getMRI());
- assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size());
- assert(DstTy.getElementType() == Src1Ty.getElementType() &&
- DstTy.getElementType() == Src2Ty.getElementType());
- (void)Src1Ty;
- (void)Src2Ty;
- ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask);
- return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2})
- .addShuffleMask(MaskAlloc);
-}
-
+MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res,
+ const SrcOp &Src) {
+ LLT DstTy = Res.getLLTTy(*getMRI());
+ assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() &&
+ "Expected Src to match Dst elt ty");
+ auto UndefVec = buildUndef(DstTy);
+ auto Zero = buildConstant(LLT::scalar(64), 0);
+ auto InsElt = buildInsertVectorElement(DstTy, UndefVec, Src, Zero);
+ SmallVector<int, 16> ZeroMask(DstTy.getNumElements());
+ return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
+ const SrcOp &Src1,
+ const SrcOp &Src2,
+ ArrayRef<int> Mask) {
+ LLT DstTy = Res.getLLTTy(*getMRI());
+ LLT Src1Ty = Src1.getLLTTy(*getMRI());
+ LLT Src2Ty = Src2.getLLTTy(*getMRI());
+ assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size());
+ assert(DstTy.getElementType() == Src1Ty.getElementType() &&
+ DstTy.getElementType() == Src2Ty.getElementType());
+ (void)Src1Ty;
+ (void)Src2Ty;
+ ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask);
+ return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2})
+ .addShuffleMask(MaskAlloc);
+}
+
MachineInstrBuilder
MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) {
// Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
@@ -986,14 +986,14 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[1].getLLTTy(*getMRI()), SrcOps[2].getLLTTy(*getMRI()));
break;
}
- case TargetOpcode::G_FNEG:
- case TargetOpcode::G_ABS:
- // All these are unary ops.
- assert(DstOps.size() == 1 && "Invalid Dst");
- assert(SrcOps.size() == 1 && "Invalid Srcs");
- validateUnaryOp(DstOps[0].getLLTTy(*getMRI()),
- SrcOps[0].getLLTTy(*getMRI()));
- break;
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_ABS:
+ // All these are unary ops.
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ validateUnaryOp(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()));
+ break;
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_MUL:
@@ -1022,9 +1022,9 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
}
case TargetOpcode::G_SHL:
case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR:
- case TargetOpcode::G_USHLSAT:
- case TargetOpcode::G_SSHLSAT: {
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_USHLSAT:
+ case TargetOpcode::G_SSHLSAT: {
assert(DstOps.size() == 1 && "Invalid Dst");
assert(SrcOps.size() == 2 && "Invalid Srcs");
validateShiftOp(DstOps[0].getLLTTy(*getMRI()),
@@ -1089,11 +1089,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
case TargetOpcode::G_UNMERGE_VALUES: {
assert(!DstOps.empty() && "Invalid trivial sequence");
assert(SrcOps.size() == 1 && "Invalid src for Unmerge");
- assert(llvm::all_of(DstOps,
- [&, this](const DstOp &Op) {
- return Op.getLLTTy(*getMRI()) ==
- DstOps[0].getLLTTy(*getMRI());
- }) &&
+ assert(llvm::all_of(DstOps,
+ [&, this](const DstOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ DstOps[0].getLLTTy(*getMRI());
+ }) &&
"type mismatch in output list");
assert(DstOps.size() * DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
@@ -1103,11 +1103,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
case TargetOpcode::G_MERGE_VALUES: {
assert(!SrcOps.empty() && "invalid trivial sequence");
assert(DstOps.size() == 1 && "Invalid Dst");
- assert(llvm::all_of(SrcOps,
- [&, this](const SrcOp &Op) {
- return Op.getLLTTy(*getMRI()) ==
- SrcOps[0].getLLTTy(*getMRI());
- }) &&
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
"type mismatch in input list");
assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
@@ -1154,11 +1154,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
assert(DstOps.size() == 1 && "Invalid DstOps");
assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
"Res type must be a vector");
- assert(llvm::all_of(SrcOps,
- [&, this](const SrcOp &Op) {
- return Op.getLLTTy(*getMRI()) ==
- SrcOps[0].getLLTTy(*getMRI());
- }) &&
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
"type mismatch in input list");
assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
@@ -1171,11 +1171,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
assert(DstOps.size() == 1 && "Invalid DstOps");
assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
"Res type must be a vector");
- assert(llvm::all_of(SrcOps,
- [&, this](const SrcOp &Op) {
- return Op.getLLTTy(*getMRI()) ==
- SrcOps[0].getLLTTy(*getMRI());
- }) &&
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
"type mismatch in input list");
if (SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getElementType().getSizeInBits())
@@ -1186,12 +1186,12 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
assert(DstOps.size() == 1 && "Invalid DstOps");
assert((!SrcOps.empty() || SrcOps.size() < 2) &&
"Must have at least 2 operands");
- assert(llvm::all_of(SrcOps,
- [&, this](const SrcOp &Op) {
- return (Op.getLLTTy(*getMRI()).isVector() &&
- Op.getLLTTy(*getMRI()) ==
- SrcOps[0].getLLTTy(*getMRI()));
- }) &&
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return (Op.getLLTTy(*getMRI()).isVector() &&
+ Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI()));
+ }) &&
"type mismatch in input list");
assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index e2a9637471..05f47915b3 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -421,7 +421,7 @@ RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const {
// Then the alternative mapping, if any.
InstructionMappings AltMappings = getInstrAlternativeMappings(MI);
- append_range(PossibleMappings, AltMappings);
+ append_range(PossibleMappings, AltMappings);
#ifndef NDEBUG
for (const InstructionMapping *Mapping : PossibleMappings)
assert(Mapping->verify(MI) && "Mapping is invalid");
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Utils.cpp
index cd24832244..2adc30eacc 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Utils.cpp
@@ -11,11 +11,11 @@
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
-#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
-#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -23,16 +23,16 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "globalisel-utils"
using namespace llvm;
-using namespace MIPatternMatch;
+using namespace MIPatternMatch;
Register llvm::constrainRegToClass(MachineRegisterInfo &MRI,
const TargetInstrInfo &TII,
@@ -48,7 +48,7 @@ Register llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const RegisterBankInfo &RBI, MachineInstr &InsertPt,
- const TargetRegisterClass &RegClass, MachineOperand &RegMO) {
+ const TargetRegisterClass &RegClass, MachineOperand &RegMO) {
Register Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
@@ -69,13 +69,13 @@ Register llvm::constrainOperandRegClass(
TII.get(TargetOpcode::COPY), Reg)
.addReg(ConstrainedReg);
}
- if (GISelChangeObserver *Observer = MF.getObserver()) {
- Observer->changingInstr(*RegMO.getParent());
- }
- RegMO.setReg(ConstrainedReg);
- if (GISelChangeObserver *Observer = MF.getObserver()) {
- Observer->changedInstr(*RegMO.getParent());
- }
+ if (GISelChangeObserver *Observer = MF.getObserver()) {
+ Observer->changingInstr(*RegMO.getParent());
+ }
+ RegMO.setReg(ConstrainedReg);
+ if (GISelChangeObserver *Observer = MF.getObserver()) {
+ Observer->changedInstr(*RegMO.getParent());
+ }
} else {
if (GISelChangeObserver *Observer = MF.getObserver()) {
if (!RegMO.isDef()) {
@@ -93,7 +93,7 @@ Register llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
- MachineOperand &RegMO, unsigned OpIdx) {
+ MachineOperand &RegMO, unsigned OpIdx) {
Register Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
@@ -163,7 +163,7 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
// If the operand is a vreg, we should constrain its regclass, and only
// insert COPYs if that's impossible.
// constrainOperandRegClass does that for us.
- constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), MO, OpI);
+ constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), MO, OpI);
// Tie uses to defs as indicated in MCInstrDesc if this hasn't already been
// done.
@@ -192,14 +192,14 @@ bool llvm::canReplaceReg(Register DstReg, Register SrcReg,
bool llvm::isTriviallyDead(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
- // FIXME: This logical is mostly duplicated with
- // DeadMachineInstructionElim::isDead. Why is LOCAL_ESCAPE not considered in
- // MachineInstr::isLabel?
-
- // Don't delete frame allocation labels.
- if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE)
- return false;
-
+ // FIXME: This logical is mostly duplicated with
+ // DeadMachineInstructionElim::isDead. Why is LOCAL_ESCAPE not considered in
+ // MachineInstr::isLabel?
+
+ // Don't delete frame allocation labels.
+ if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE)
+ return false;
+
// If we can move an instruction, we can remove it. Otherwise, it has
// a side-effect of some sort.
bool SawStore = false;
@@ -262,8 +262,8 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
reportGISelFailure(MF, TPC, MORE, R);
}
-Optional<APInt> llvm::getConstantVRegVal(Register VReg,
- const MachineRegisterInfo &MRI) {
+Optional<APInt> llvm::getConstantVRegVal(Register VReg,
+ const MachineRegisterInfo &MRI) {
Optional<ValueAndVReg> ValAndVReg =
getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false);
assert((!ValAndVReg || ValAndVReg->VReg == VReg) &&
@@ -273,17 +273,17 @@ Optional<APInt> llvm::getConstantVRegVal(Register VReg,
return ValAndVReg->Value;
}
-Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg,
- const MachineRegisterInfo &MRI) {
- Optional<APInt> Val = getConstantVRegVal(VReg, MRI);
- if (Val && Val->getBitWidth() <= 64)
- return Val->getSExtValue();
- return None;
-}
-
+Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg,
+ const MachineRegisterInfo &MRI) {
+ Optional<APInt> Val = getConstantVRegVal(VReg, MRI);
+ if (Val && Val->getBitWidth() <= 64)
+ return Val->getSExtValue();
+ return None;
+}
+
Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
- bool HandleFConstant, bool LookThroughAnyExt) {
+ bool HandleFConstant, bool LookThroughAnyExt) {
SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
MachineInstr *MI;
auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) {
@@ -310,10 +310,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) &&
LookThroughInstrs) {
switch (MI->getOpcode()) {
- case TargetOpcode::G_ANYEXT:
- if (!LookThroughAnyExt)
- return None;
- LLVM_FALLTHROUGH;
+ case TargetOpcode::G_ANYEXT:
+ if (!LookThroughAnyExt)
+ return None;
+ LLVM_FALLTHROUGH;
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
@@ -347,7 +347,7 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
case TargetOpcode::G_TRUNC:
Val = Val.trunc(OpcodeAndSize.second);
break;
- case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_SEXT:
Val = Val.sext(OpcodeAndSize.second);
break;
@@ -357,10 +357,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
}
}
- return ValueAndVReg{Val, VReg};
+ return ValueAndVReg{Val, VReg};
}
-const ConstantFP *
+const ConstantFP *
llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) {
MachineInstr *MI = MRI.getVRegDef(VReg);
if (TargetOpcode::G_FCONSTANT != MI->getOpcode())
@@ -368,8 +368,8 @@ llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) {
return MI->getOperand(1).getFPImm();
}
-Optional<DefinitionAndSourceRegister>
-llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
+Optional<DefinitionAndSourceRegister>
+llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
Register DefSrcReg = Reg;
auto *DefMI = MRI.getVRegDef(Reg);
auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
@@ -378,7 +378,7 @@ llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
while (DefMI->getOpcode() == TargetOpcode::COPY) {
Register SrcReg = DefMI->getOperand(1).getReg();
auto SrcTy = MRI.getType(SrcReg);
- if (!SrcTy.isValid())
+ if (!SrcTy.isValid())
break;
DefMI = MRI.getVRegDef(SrcReg);
DefSrcReg = SrcReg;
@@ -386,8 +386,8 @@ llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
return DefinitionAndSourceRegister{DefMI, DefSrcReg};
}
-MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
- const MachineRegisterInfo &MRI) {
+MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
+ const MachineRegisterInfo &MRI) {
Optional<DefinitionAndSourceRegister> DefSrcReg =
getDefSrcRegIgnoringCopies(Reg, MRI);
return DefSrcReg ? DefSrcReg->MI : nullptr;
@@ -400,8 +400,8 @@ Register llvm::getSrcRegIgnoringCopies(Register Reg,
return DefSrcReg ? DefSrcReg->Reg : Register();
}
-MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg,
- const MachineRegisterInfo &MRI) {
+MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg,
+ const MachineRegisterInfo &MRI) {
MachineInstr *DefMI = getDefIgnoringCopies(Reg, MRI);
return DefMI && DefMI->getOpcode() == Opcode ? DefMI : nullptr;
}
@@ -430,8 +430,8 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
if (!MaybeOp1Cst)
return None;
- const APInt &C1 = *MaybeOp1Cst;
- const APInt &C2 = *MaybeOp2Cst;
+ const APInt &C1 = *MaybeOp1Cst;
+ const APInt &C2 = *MaybeOp2Cst;
switch (Opcode) {
default:
break;
@@ -480,8 +480,8 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
if (!DefMI)
return false;
- const TargetMachine& TM = DefMI->getMF()->getTarget();
- if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
+ const TargetMachine& TM = DefMI->getMF()->getTarget();
+ if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
return true;
if (SNaN) {
@@ -512,40 +512,40 @@ Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
return Align(1);
}
-Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF,
- const TargetInstrInfo &TII,
- MCRegister PhysReg,
- const TargetRegisterClass &RC,
- LLT RegTy) {
- DebugLoc DL; // FIXME: Is no location the right choice?
- MachineBasicBlock &EntryMBB = MF.front();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- Register LiveIn = MRI.getLiveInVirtReg(PhysReg);
- if (LiveIn) {
- MachineInstr *Def = MRI.getVRegDef(LiveIn);
- if (Def) {
- // FIXME: Should the verifier check this is in the entry block?
- assert(Def->getParent() == &EntryMBB && "live-in copy not in entry block");
- return LiveIn;
- }
-
- // It's possible the incoming argument register and copy was added during
- // lowering, but later deleted due to being/becoming dead. If this happens,
- // re-insert the copy.
- } else {
- // The live in register was not present, so add it.
- LiveIn = MF.addLiveIn(PhysReg, &RC);
- if (RegTy.isValid())
- MRI.setType(LiveIn, RegTy);
- }
-
- BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(TargetOpcode::COPY), LiveIn)
- .addReg(PhysReg);
- if (!EntryMBB.isLiveIn(PhysReg))
- EntryMBB.addLiveIn(PhysReg);
- return LiveIn;
-}
-
+Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF,
+ const TargetInstrInfo &TII,
+ MCRegister PhysReg,
+ const TargetRegisterClass &RC,
+ LLT RegTy) {
+ DebugLoc DL; // FIXME: Is no location the right choice?
+ MachineBasicBlock &EntryMBB = MF.front();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register LiveIn = MRI.getLiveInVirtReg(PhysReg);
+ if (LiveIn) {
+ MachineInstr *Def = MRI.getVRegDef(LiveIn);
+ if (Def) {
+ // FIXME: Should the verifier check this is in the entry block?
+ assert(Def->getParent() == &EntryMBB && "live-in copy not in entry block");
+ return LiveIn;
+ }
+
+ // It's possible the incoming argument register and copy was added during
+ // lowering, but later deleted due to being/becoming dead. If this happens,
+ // re-insert the copy.
+ } else {
+ // The live in register was not present, so add it.
+ LiveIn = MF.addLiveIn(PhysReg, &RC);
+ if (RegTy.isValid())
+ MRI.setType(LiveIn, RegTy);
+ }
+
+ BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(TargetOpcode::COPY), LiveIn)
+ .addReg(PhysReg);
+ if (!EntryMBB.isLiveIn(PhysReg))
+ EntryMBB.addLiveIn(PhysReg);
+ return LiveIn;
+}
+
Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm,
const MachineRegisterInfo &MRI) {
@@ -554,262 +554,262 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
switch (Opcode) {
default:
break;
- case TargetOpcode::G_SEXT_INREG: {
- LLT Ty = MRI.getType(Op1);
- return MaybeOp1Cst->trunc(Imm).sext(Ty.getScalarSizeInBits());
- }
+ case TargetOpcode::G_SEXT_INREG: {
+ LLT Ty = MRI.getType(Op1);
+ return MaybeOp1Cst->trunc(Imm).sext(Ty.getScalarSizeInBits());
}
+ }
}
return None;
}
-bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
- GISelKnownBits *KB) {
- Optional<DefinitionAndSourceRegister> DefSrcReg =
- getDefSrcRegIgnoringCopies(Reg, MRI);
- if (!DefSrcReg)
- return false;
-
- const MachineInstr &MI = *DefSrcReg->MI;
- const LLT Ty = MRI.getType(Reg);
-
- switch (MI.getOpcode()) {
- case TargetOpcode::G_CONSTANT: {
- unsigned BitWidth = Ty.getScalarSizeInBits();
- const ConstantInt *CI = MI.getOperand(1).getCImm();
- return CI->getValue().zextOrTrunc(BitWidth).isPowerOf2();
- }
- case TargetOpcode::G_SHL: {
- // A left-shift of a constant one will have exactly one bit set because
- // shifting the bit off the end is undefined.
-
- // TODO: Constant splat
- if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
- if (*ConstLHS == 1)
- return true;
- }
-
- break;
- }
- case TargetOpcode::G_LSHR: {
- if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
- if (ConstLHS->isSignMask())
- return true;
- }
-
- break;
- }
- default:
- break;
- }
-
- // TODO: Are all operands of a build vector constant powers of two?
- if (!KB)
- return false;
-
- // More could be done here, though the above checks are enough
- // to handle some common cases.
-
- // Fall back to computeKnownBits to catch other known cases.
- KnownBits Known = KB->getKnownBits(Reg);
- return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
-}
-
+bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB) {
+ Optional<DefinitionAndSourceRegister> DefSrcReg =
+ getDefSrcRegIgnoringCopies(Reg, MRI);
+ if (!DefSrcReg)
+ return false;
+
+ const MachineInstr &MI = *DefSrcReg->MI;
+ const LLT Ty = MRI.getType(Reg);
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_CONSTANT: {
+ unsigned BitWidth = Ty.getScalarSizeInBits();
+ const ConstantInt *CI = MI.getOperand(1).getCImm();
+ return CI->getValue().zextOrTrunc(BitWidth).isPowerOf2();
+ }
+ case TargetOpcode::G_SHL: {
+ // A left-shift of a constant one will have exactly one bit set because
+ // shifting the bit off the end is undefined.
+
+ // TODO: Constant splat
+ if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (*ConstLHS == 1)
+ return true;
+ }
+
+ break;
+ }
+ case TargetOpcode::G_LSHR: {
+ if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (ConstLHS->isSignMask())
+ return true;
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ // TODO: Are all operands of a build vector constant powers of two?
+ if (!KB)
+ return false;
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+
+ // Fall back to computeKnownBits to catch other known cases.
+ KnownBits Known = KB->getKnownBits(Reg);
+ return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
+}
+
void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
-static unsigned getLCMSize(unsigned OrigSize, unsigned TargetSize) {
- unsigned Mul = OrigSize * TargetSize;
- unsigned GCDSize = greatestCommonDivisor(OrigSize, TargetSize);
- return Mul / GCDSize;
-}
-
-LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
- const unsigned OrigSize = OrigTy.getSizeInBits();
- const unsigned TargetSize = TargetTy.getSizeInBits();
-
- if (OrigSize == TargetSize)
- return OrigTy;
-
- if (OrigTy.isVector()) {
- const LLT OrigElt = OrigTy.getElementType();
-
- if (TargetTy.isVector()) {
- const LLT TargetElt = TargetTy.getElementType();
-
- if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
- int GCDElts = greatestCommonDivisor(OrigTy.getNumElements(),
- TargetTy.getNumElements());
- // Prefer the original element type.
- int Mul = OrigTy.getNumElements() * TargetTy.getNumElements();
- return LLT::vector(Mul / GCDElts, OrigTy.getElementType());
- }
- } else {
- if (OrigElt.getSizeInBits() == TargetSize)
- return OrigTy;
- }
-
- unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
- return LLT::vector(LCMSize / OrigElt.getSizeInBits(), OrigElt);
+static unsigned getLCMSize(unsigned OrigSize, unsigned TargetSize) {
+ unsigned Mul = OrigSize * TargetSize;
+ unsigned GCDSize = greatestCommonDivisor(OrigSize, TargetSize);
+ return Mul / GCDSize;
+}
+
+LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
+ const unsigned OrigSize = OrigTy.getSizeInBits();
+ const unsigned TargetSize = TargetTy.getSizeInBits();
+
+ if (OrigSize == TargetSize)
+ return OrigTy;
+
+ if (OrigTy.isVector()) {
+ const LLT OrigElt = OrigTy.getElementType();
+
+ if (TargetTy.isVector()) {
+ const LLT TargetElt = TargetTy.getElementType();
+
+ if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
+ int GCDElts = greatestCommonDivisor(OrigTy.getNumElements(),
+ TargetTy.getNumElements());
+ // Prefer the original element type.
+ int Mul = OrigTy.getNumElements() * TargetTy.getNumElements();
+ return LLT::vector(Mul / GCDElts, OrigTy.getElementType());
+ }
+ } else {
+ if (OrigElt.getSizeInBits() == TargetSize)
+ return OrigTy;
+ }
+
+ unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+ return LLT::vector(LCMSize / OrigElt.getSizeInBits(), OrigElt);
}
- if (TargetTy.isVector()) {
- unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
- return LLT::vector(LCMSize / OrigSize, OrigTy);
+ if (TargetTy.isVector()) {
+ unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+ return LLT::vector(LCMSize / OrigSize, OrigTy);
}
- unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
-
- // Preserve pointer types.
- if (LCMSize == OrigSize)
- return OrigTy;
- if (LCMSize == TargetSize)
- return TargetTy;
-
- return LLT::scalar(LCMSize);
-}
-
-LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
- const unsigned OrigSize = OrigTy.getSizeInBits();
- const unsigned TargetSize = TargetTy.getSizeInBits();
-
- if (OrigSize == TargetSize)
- return OrigTy;
-
- if (OrigTy.isVector()) {
- LLT OrigElt = OrigTy.getElementType();
- if (TargetTy.isVector()) {
- LLT TargetElt = TargetTy.getElementType();
- if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
- int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
- TargetTy.getNumElements());
- return LLT::scalarOrVector(GCD, OrigElt);
- }
- } else {
- // If the source is a vector of pointers, return a pointer element.
- if (OrigElt.getSizeInBits() == TargetSize)
- return OrigElt;
- }
-
- unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize);
- if (GCD == OrigElt.getSizeInBits())
- return OrigElt;
-
- // If we can't produce the original element type, we have to use a smaller
- // scalar.
- if (GCD < OrigElt.getSizeInBits())
- return LLT::scalar(GCD);
- return LLT::vector(GCD / OrigElt.getSizeInBits(), OrigElt);
- }
-
- if (TargetTy.isVector()) {
- // Try to preserve the original element type.
- LLT TargetElt = TargetTy.getElementType();
- if (TargetElt.getSizeInBits() == OrigSize)
- return OrigTy;
- }
-
- unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize);
- return LLT::scalar(GCD);
-}
-
-Optional<int> llvm::getSplatIndex(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
- "Only G_SHUFFLE_VECTOR can have a splat index!");
- ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; });
-
- // If all elements are undefined, this shuffle can be considered a splat.
- // Return 0 for better potential for callers to simplify.
- if (FirstDefinedIdx == Mask.end())
- return 0;
-
- // Make sure all remaining elements are either undef or the same
- // as the first non-undef value.
- int SplatValue = *FirstDefinedIdx;
- if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()),
- [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; }))
- return None;
-
- return SplatValue;
-}
-
-static bool isBuildVectorOp(unsigned Opcode) {
- return Opcode == TargetOpcode::G_BUILD_VECTOR ||
- Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC;
-}
-
-// TODO: Handle mixed undef elements.
-static bool isBuildVectorConstantSplat(const MachineInstr &MI,
- const MachineRegisterInfo &MRI,
- int64_t SplatValue) {
- if (!isBuildVectorOp(MI.getOpcode()))
- return false;
-
- const unsigned NumOps = MI.getNumOperands();
- for (unsigned I = 1; I != NumOps; ++I) {
- Register Element = MI.getOperand(I).getReg();
- if (!mi_match(Element, MRI, m_SpecificICst(SplatValue)))
- return false;
+ unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+
+ // Preserve pointer types.
+ if (LCMSize == OrigSize)
+ return OrigTy;
+ if (LCMSize == TargetSize)
+ return TargetTy;
+
+ return LLT::scalar(LCMSize);
+}
+
+LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
+ const unsigned OrigSize = OrigTy.getSizeInBits();
+ const unsigned TargetSize = TargetTy.getSizeInBits();
+
+ if (OrigSize == TargetSize)
+ return OrigTy;
+
+ if (OrigTy.isVector()) {
+ LLT OrigElt = OrigTy.getElementType();
+ if (TargetTy.isVector()) {
+ LLT TargetElt = TargetTy.getElementType();
+ if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
+ int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
+ TargetTy.getNumElements());
+ return LLT::scalarOrVector(GCD, OrigElt);
+ }
+ } else {
+ // If the source is a vector of pointers, return a pointer element.
+ if (OrigElt.getSizeInBits() == TargetSize)
+ return OrigElt;
+ }
+
+ unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize);
+ if (GCD == OrigElt.getSizeInBits())
+ return OrigElt;
+
+ // If we can't produce the original element type, we have to use a smaller
+ // scalar.
+ if (GCD < OrigElt.getSizeInBits())
+ return LLT::scalar(GCD);
+ return LLT::vector(GCD / OrigElt.getSizeInBits(), OrigElt);
}
- return true;
-}
-
-Optional<int64_t>
-llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
- if (!isBuildVectorOp(MI.getOpcode()))
- return None;
-
- const unsigned NumOps = MI.getNumOperands();
- Optional<int64_t> Scalar;
- for (unsigned I = 1; I != NumOps; ++I) {
- Register Element = MI.getOperand(I).getReg();
- int64_t ElementValue;
- if (!mi_match(Element, MRI, m_ICst(ElementValue)))
- return None;
- if (!Scalar)
- Scalar = ElementValue;
- else if (*Scalar != ElementValue)
- return None;
+ if (TargetTy.isVector()) {
+ // Try to preserve the original element type.
+ LLT TargetElt = TargetTy.getElementType();
+ if (TargetElt.getSizeInBits() == OrigSize)
+ return OrigTy;
+ }
+
+ unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize);
+ return LLT::scalar(GCD);
+}
+
+Optional<int> llvm::getSplatIndex(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
+ "Only G_SHUFFLE_VECTOR can have a splat index!");
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; });
+
+ // If all elements are undefined, this shuffle can be considered a splat.
+ // Return 0 for better potential for callers to simplify.
+ if (FirstDefinedIdx == Mask.end())
+ return 0;
+
+ // Make sure all remaining elements are either undef or the same
+ // as the first non-undef value.
+ int SplatValue = *FirstDefinedIdx;
+ if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()),
+ [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; }))
+ return None;
+
+ return SplatValue;
+}
+
+static bool isBuildVectorOp(unsigned Opcode) {
+ return Opcode == TargetOpcode::G_BUILD_VECTOR ||
+ Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC;
+}
+
+// TODO: Handle mixed undef elements.
+static bool isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue) {
+ if (!isBuildVectorOp(MI.getOpcode()))
+ return false;
+
+ const unsigned NumOps = MI.getNumOperands();
+ for (unsigned I = 1; I != NumOps; ++I) {
+ Register Element = MI.getOperand(I).getReg();
+ if (!mi_match(Element, MRI, m_SpecificICst(SplatValue)))
+ return false;
}
- return Scalar;
-}
-
-bool llvm::isBuildVectorAllZeros(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
- return isBuildVectorConstantSplat(MI, MRI, 0);
-}
-
-bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
- return isBuildVectorConstantSplat(MI, MRI, -1);
-}
-
-bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
- bool IsFP) {
- switch (TLI.getBooleanContents(IsVector, IsFP)) {
- case TargetLowering::UndefinedBooleanContent:
- return Val & 0x1;
- case TargetLowering::ZeroOrOneBooleanContent:
- return Val == 1;
- case TargetLowering::ZeroOrNegativeOneBooleanContent:
- return Val == -1;
+ return true;
+}
+
+Optional<int64_t>
+llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ if (!isBuildVectorOp(MI.getOpcode()))
+ return None;
+
+ const unsigned NumOps = MI.getNumOperands();
+ Optional<int64_t> Scalar;
+ for (unsigned I = 1; I != NumOps; ++I) {
+ Register Element = MI.getOperand(I).getReg();
+ int64_t ElementValue;
+ if (!mi_match(Element, MRI, m_ICst(ElementValue)))
+ return None;
+ if (!Scalar)
+ Scalar = ElementValue;
+ else if (*Scalar != ElementValue)
+ return None;
}
- llvm_unreachable("Invalid boolean contents");
-}
-int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector,
- bool IsFP) {
- switch (TLI.getBooleanContents(IsVector, IsFP)) {
- case TargetLowering::UndefinedBooleanContent:
- case TargetLowering::ZeroOrOneBooleanContent:
- return 1;
- case TargetLowering::ZeroOrNegativeOneBooleanContent:
- return -1;
- }
- llvm_unreachable("Invalid boolean contents");
-}
+ return Scalar;
+}
+
+bool llvm::isBuildVectorAllZeros(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return isBuildVectorConstantSplat(MI, MRI, 0);
+}
+
+bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return isBuildVectorConstantSplat(MI, MRI, -1);
+}
+
+bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
+ bool IsFP) {
+ switch (TLI.getBooleanContents(IsVector, IsFP)) {
+ case TargetLowering::UndefinedBooleanContent:
+ return Val & 0x1;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ return Val == 1;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return Val == -1;
+ }
+ llvm_unreachable("Invalid boolean contents");
+}
+
+int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector,
+ bool IsFP) {
+ switch (TLI.getBooleanContents(IsVector, IsFP)) {
+ case TargetLowering::UndefinedBooleanContent:
+ case TargetLowering::ZeroOrOneBooleanContent:
+ return 1;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return -1;
+ }
+ llvm_unreachable("Invalid boolean contents");
+}
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make
index e6de0fe8d9..6ede6da277 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make
@@ -12,16 +12,16 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/Analysis
- contrib/libs/llvm12/lib/CodeGen
- contrib/libs/llvm12/lib/CodeGen/SelectionDAG
- contrib/libs/llvm12/lib/IR
- contrib/libs/llvm12/lib/MC
- contrib/libs/llvm12/lib/Support
- contrib/libs/llvm12/lib/Target
- contrib/libs/llvm12/lib/Transforms/Utils
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/Analysis
+ contrib/libs/llvm12/lib/CodeGen
+ contrib/libs/llvm12/lib/CodeGen/SelectionDAG
+ contrib/libs/llvm12/lib/IR
+ contrib/libs/llvm12/lib/MC
+ contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12/lib/Target
+ contrib/libs/llvm12/lib/Transforms/Utils
)
ADDINCL(