Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.

author: shadchin <shadchin@yandex-team.ru> 2022-02-10 16:44:39 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:44:39 +0300
commit: e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree: 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/CodeGen/GlobalISel
parent: 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
download: ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz
21 files changed, 5189 insertions, 5189 deletions
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 24391970d6..2fa208fbfa 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -59,7 +59,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
   case TargetOpcode::G_UNMERGE_VALUES:
   case TargetOpcode::G_TRUNC:
   case TargetOpcode::G_PTR_ADD:
-  case TargetOpcode::G_EXTRACT: 
+  case TargetOpcode::G_EXTRACT:
     return true;
   }
   return false;
@@ -367,21 +367,21 @@ GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const {
   return *this;
 }
 
-const GISelInstProfileBuilder & 
-GISelInstProfileBuilder::addNodeIDReg(Register Reg) const { 
-  LLT Ty = MRI.getType(Reg); 
-  if (Ty.isValid()) 
-    addNodeIDRegType(Ty); 
- 
-  if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) { 
-    if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>()) 
-      addNodeIDRegType(RB); 
-    else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>()) 
-      addNodeIDRegType(RC); 
-  } 
-  return *this; 
-} 
- 
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDReg(Register Reg) const {
+  LLT Ty = MRI.getType(Reg);
+  if (Ty.isValid())
+    addNodeIDRegType(Ty);
+
+  if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) {
+    if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>())
+      addNodeIDRegType(RB);
+    else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
+      addNodeIDRegType(RC);
+  }
+  return *this;
+}
+
 const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
     const MachineOperand &MO) const {
   if (MO.isReg()) {
@@ -389,8 +389,8 @@ const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
     if (!MO.isDef())
       addNodeIDRegNum(Reg);
 
-    // Profile the register properties. 
-    addNodeIDReg(Reg); 
+    // Profile the register properties.
+    addNodeIDReg(Reg);
     assert(!MO.isImplicit() && "Unhandled case");
   } else if (MO.isImm())
     ID.AddInteger(MO.getImm());
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index b0f8a6610d..2c86f06a60 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -13,7 +13,7 @@
 
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
-#include "llvm/IR/DebugInfoMetadata.h" 
+#include "llvm/IR/DebugInfoMetadata.h"
 
 using namespace llvm;
 
@@ -42,14 +42,14 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID,
   if (MI) {
     CSEInfo->countOpcodeHit(MI->getOpcode());
     auto CurrPos = getInsertPt();
-    auto MII = MachineBasicBlock::iterator(MI); 
-    if (MII == CurrPos) { 
-      // Move the insert point ahead of the instruction so any future uses of 
-      // this builder will have the def ready. 
-      setInsertPt(*CurMBB, std::next(MII)); 
-    } else if (!dominates(MI, CurrPos)) { 
+    auto MII = MachineBasicBlock::iterator(MI);
+    if (MII == CurrPos) {
+      // Move the insert point ahead of the instruction so any future uses of
+      // this builder will have the def ready.
+      setInsertPt(*CurMBB, std::next(MII));
+    } else if (!dominates(MI, CurrPos)) {
       CurMBB->splice(CurrPos, CurMBB, MI);
-    } 
+    }
     return MachineInstrBuilder(getMF(), MI);
   }
   return MachineInstrBuilder();
@@ -68,11 +68,11 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op,
   case DstOp::DstType::Ty_RC:
     B.addNodeIDRegType(Op.getRegClass());
     break;
-  case DstOp::DstType::Ty_Reg: { 
-    // Regs can have LLT&(RB|RC). If those exist, profile them as well. 
-    B.addNodeIDReg(Op.getReg()); 
-    break; 
-  } 
+  case DstOp::DstType::Ty_Reg: {
+    // Regs can have LLT&(RB|RC). If those exist, profile them as well.
+    B.addNodeIDReg(Op.getReg());
+    break;
+  }
   default:
     B.addNodeIDRegType(Op.getLLTTy(*getMRI()));
     break;
@@ -82,9 +82,9 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op,
 void CSEMIRBuilder::profileSrcOp(const SrcOp &Op,
                                  GISelInstProfileBuilder &B) const {
   switch (Op.getSrcOpKind()) {
-  case SrcOp::SrcType::Ty_Imm: 
-    B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm())); 
-    break; 
+  case SrcOp::SrcType::Ty_Imm:
+    B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm()));
+    break;
   case SrcOp::SrcType::Ty_Predicate:
     B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate()));
     break;
@@ -130,7 +130,7 @@ bool CSEMIRBuilder::checkCopyToDefsPossible(ArrayRef<DstOp> DstOps) {
   if (DstOps.size() == 1)
     return true; // always possible to emit copy to just 1 vreg.
 
-  return llvm::all_of(DstOps, [](const DstOp &Op) { 
+  return llvm::all_of(DstOps, [](const DstOp &Op) {
     DstOp::DstType DT = Op.getDstOpKind();
     return DT == DstOp::DstType::Ty_LLT || DT == DstOp::DstType::Ty_RC;
   });
@@ -146,21 +146,21 @@ CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps,
     if (Op.getDstOpKind() == DstOp::DstType::Ty_Reg)
       return buildCopy(Op.getReg(), MIB.getReg(0));
   }
- 
-  // If we didn't generate a copy then we're re-using an existing node directly 
-  // instead of emitting any code. Merge the debug location we wanted to emit 
-  // into the instruction we're CSE'ing with. Debug locations arent part of the 
-  // profile so we don't need to recompute it. 
-  if (getDebugLoc()) { 
-    GISelChangeObserver *Observer = getState().Observer; 
-    if (Observer) 
-      Observer->changingInstr(*MIB); 
-    MIB->setDebugLoc( 
-        DILocation::getMergedLocation(MIB->getDebugLoc(), getDebugLoc())); 
-    if (Observer) 
-      Observer->changedInstr(*MIB); 
-  } 
- 
+
+  // If we didn't generate a copy then we're re-using an existing node directly
+  // instead of emitting any code. Merge the debug location we wanted to emit
+  // into the instruction we're CSE'ing with. Debug locations arent part of the
+  // profile so we don't need to recompute it.
+  if (getDebugLoc()) {
+    GISelChangeObserver *Observer = getState().Observer;
+    if (Observer)
+      Observer->changingInstr(*MIB);
+    MIB->setDebugLoc(
+        DILocation::getMergedLocation(MIB->getDebugLoc(), getDebugLoc()));
+    if (Observer)
+      Observer->changedInstr(*MIB);
+  }
+
   return MIB;
 }
 
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CallLowering.cpp
index ad7c789b2e..803e1527a4 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -30,51 +30,51 @@ using namespace llvm;
 
 void CallLowering::anchor() {}
 
-/// Helper function which updates \p Flags when \p AttrFn returns true. 
-static void 
-addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, 
-                    const std::function<bool(Attribute::AttrKind)> &AttrFn) { 
-  if (AttrFn(Attribute::SExt)) 
-    Flags.setSExt(); 
-  if (AttrFn(Attribute::ZExt)) 
-    Flags.setZExt(); 
-  if (AttrFn(Attribute::InReg)) 
-    Flags.setInReg(); 
-  if (AttrFn(Attribute::StructRet)) 
-    Flags.setSRet(); 
-  if (AttrFn(Attribute::Nest)) 
-    Flags.setNest(); 
-  if (AttrFn(Attribute::ByVal)) 
-    Flags.setByVal(); 
-  if (AttrFn(Attribute::Preallocated)) 
-    Flags.setPreallocated(); 
-  if (AttrFn(Attribute::InAlloca)) 
-    Flags.setInAlloca(); 
-  if (AttrFn(Attribute::Returned)) 
-    Flags.setReturned(); 
-  if (AttrFn(Attribute::SwiftSelf)) 
-    Flags.setSwiftSelf(); 
-  if (AttrFn(Attribute::SwiftError)) 
-    Flags.setSwiftError(); 
-} 
- 
-ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call, 
-                                                     unsigned ArgIdx) const { 
-  ISD::ArgFlagsTy Flags; 
-  addFlagsUsingAttrFn(Flags, [&Call, &ArgIdx](Attribute::AttrKind Attr) { 
-    return Call.paramHasAttr(ArgIdx, Attr); 
-  }); 
-  return Flags; 
-} 
- 
-void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags, 
-                                             const AttributeList &Attrs, 
-                                             unsigned OpIdx) const { 
-  addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) { 
-    return Attrs.hasAttribute(OpIdx, Attr); 
-  }); 
-} 
- 
+/// Helper function which updates \p Flags when \p AttrFn returns true.
+static void
+addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags,
+                    const std::function<bool(Attribute::AttrKind)> &AttrFn) {
+  if (AttrFn(Attribute::SExt))
+    Flags.setSExt();
+  if (AttrFn(Attribute::ZExt))
+    Flags.setZExt();
+  if (AttrFn(Attribute::InReg))
+    Flags.setInReg();
+  if (AttrFn(Attribute::StructRet))
+    Flags.setSRet();
+  if (AttrFn(Attribute::Nest))
+    Flags.setNest();
+  if (AttrFn(Attribute::ByVal))
+    Flags.setByVal();
+  if (AttrFn(Attribute::Preallocated))
+    Flags.setPreallocated();
+  if (AttrFn(Attribute::InAlloca))
+    Flags.setInAlloca();
+  if (AttrFn(Attribute::Returned))
+    Flags.setReturned();
+  if (AttrFn(Attribute::SwiftSelf))
+    Flags.setSwiftSelf();
+  if (AttrFn(Attribute::SwiftError))
+    Flags.setSwiftError();
+}
+
+ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call,
+                                                     unsigned ArgIdx) const {
+  ISD::ArgFlagsTy Flags;
+  addFlagsUsingAttrFn(Flags, [&Call, &ArgIdx](Attribute::AttrKind Attr) {
+    return Call.paramHasAttr(ArgIdx, Attr);
+  });
+  return Flags;
+}
+
+void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
+                                             const AttributeList &Attrs,
+                                             unsigned OpIdx) const {
+  addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) {
+    return Attrs.hasAttribute(OpIdx, Attr);
+  });
+}
+
 bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
                              ArrayRef<Register> ResRegs,
                              ArrayRef<ArrayRef<Register>> ArgRegs,
@@ -82,45 +82,45 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
                              std::function<unsigned()> GetCalleeReg) const {
   CallLoweringInfo Info;
   const DataLayout &DL = MIRBuilder.getDataLayout();
-  MachineFunction &MF = MIRBuilder.getMF(); 
-  bool CanBeTailCalled = CB.isTailCall() && 
-                         isInTailCallPosition(CB, MF.getTarget()) && 
-                         (MF.getFunction() 
-                              .getFnAttribute("disable-tail-calls") 
-                              .getValueAsString() != "true"); 
-
-  CallingConv::ID CallConv = CB.getCallingConv(); 
-  Type *RetTy = CB.getType(); 
-  bool IsVarArg = CB.getFunctionType()->isVarArg(); 
- 
-  SmallVector<BaseArgInfo, 4> SplitArgs; 
-  getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL); 
-  Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg); 
- 
-  if (!Info.CanLowerReturn) { 
-    // Callee requires sret demotion. 
-    insertSRetOutgoingArgument(MIRBuilder, CB, Info); 
- 
-    // The sret demotion isn't compatible with tail-calls, since the sret 
-    // argument points into the caller's stack frame. 
-    CanBeTailCalled = false; 
-  } 
- 
+  MachineFunction &MF = MIRBuilder.getMF();
+  bool CanBeTailCalled = CB.isTailCall() &&
+                         isInTailCallPosition(CB, MF.getTarget()) &&
+                         (MF.getFunction()
+                              .getFnAttribute("disable-tail-calls")
+                              .getValueAsString() != "true");
+
+  CallingConv::ID CallConv = CB.getCallingConv();
+  Type *RetTy = CB.getType();
+  bool IsVarArg = CB.getFunctionType()->isVarArg();
+
+  SmallVector<BaseArgInfo, 4> SplitArgs;
+  getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL);
+  Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg);
+
+  if (!Info.CanLowerReturn) {
+    // Callee requires sret demotion.
+    insertSRetOutgoingArgument(MIRBuilder, CB, Info);
+
+    // The sret demotion isn't compatible with tail-calls, since the sret
+    // argument points into the caller's stack frame.
+    CanBeTailCalled = false;
+  }
+
   // First step is to marshall all the function's parameters into the correct
   // physregs and memory locations. Gather the sequence of argument types that
   // we'll pass to the assigner function.
   unsigned i = 0;
   unsigned NumFixedArgs = CB.getFunctionType()->getNumParams();
   for (auto &Arg : CB.args()) {
-    ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i), 
+    ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i),
                     i < NumFixedArgs};
     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB);
- 
-    // If we have an explicit sret argument that is an Instruction, (i.e., it 
-    // might point to function-local memory), we can't meaningfully tail-call. 
-    if (OrigArg.Flags[0].isSRet() && isa<Instruction>(&Arg)) 
-      CanBeTailCalled = false; 
- 
+
+    // If we have an explicit sret argument that is an Instruction, (i.e., it
+    // might point to function-local memory), we can't meaningfully tail-call.
+    if (OrigArg.Flags[0].isSRet() && isa<Instruction>(&Arg))
+      CanBeTailCalled = false;
+
     Info.OrigArgs.push_back(OrigArg);
     ++i;
   }
@@ -133,16 +133,16 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
   else
     Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
 
-  Info.OrigRet = ArgInfo{ResRegs, RetTy, ISD::ArgFlagsTy{}}; 
+  Info.OrigRet = ArgInfo{ResRegs, RetTy, ISD::ArgFlagsTy{}};
   if (!Info.OrigRet.Ty->isVoidTy())
     setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
 
   Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
-  Info.CallConv = CallConv; 
+  Info.CallConv = CallConv;
   Info.SwiftErrorVReg = SwiftErrorVReg;
   Info.IsMustTailCall = CB.isMustTailCall();
-  Info.IsTailCall = CanBeTailCalled; 
-  Info.IsVarArg = IsVarArg; 
+  Info.IsTailCall = CanBeTailCalled;
+  Info.IsVarArg = IsVarArg;
   return lowerCall(MIRBuilder, Info);
 }
 
@@ -152,7 +152,7 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
                                const FuncInfoTy &FuncInfo) const {
   auto &Flags = Arg.Flags[0];
   const AttributeList &Attrs = FuncInfo.getAttributes();
-  addArgFlagsFromAttributes(Flags, Attrs, OpIdx); 
+  addArgFlagsFromAttributes(Flags, Attrs, OpIdx);
 
   if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
     Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
@@ -245,97 +245,97 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
   unsigned NumArgs = Args.size();
   for (unsigned i = 0; i != NumArgs; ++i) {
     EVT CurVT = EVT::getEVT(Args[i].Ty);
-    if (CurVT.isSimple() && 
-        !Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(), 
-                           CCValAssign::Full, Args[i], Args[i].Flags[0], 
-                           CCInfo)) 
-      continue; 
-
-    MVT NewVT = TLI->getRegisterTypeForCallingConv( 
-        F.getContext(), F.getCallingConv(), EVT(CurVT)); 
-
-    // If we need to split the type over multiple regs, check it's a scenario 
-    // we currently support. 
-    unsigned NumParts = TLI->getNumRegistersForCallingConv( 
-        F.getContext(), F.getCallingConv(), CurVT); 
- 
-    if (NumParts == 1) { 
-      // Try to use the register type if we couldn't assign the VT. 
-      if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], 
-                            Args[i].Flags[0], CCInfo)) 
-        return false; 
-      continue; 
-    } 
- 
-    assert(NumParts > 1); 
-    // For now only handle exact splits. 
-    if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits()) 
-      return false; 
- 
-    // For incoming arguments (physregs to vregs), we could have values in 
-    // physregs (or memlocs) which we want to extract and copy to vregs. 
-    // During this, we might have to deal with the LLT being split across 
-    // multiple regs, so we have to record this information for later. 
-    // 
-    // If we have outgoing args, then we have the opposite case. We have a 
-    // vreg with an LLT which we want to assign to a physical location, and 
-    // we might have to record that the value has to be split later. 
-    if (Handler.isIncomingArgumentHandler()) { 
-      // We're handling an incoming arg which is split over multiple regs. 
-      // E.g. passing an s128 on AArch64. 
-      ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; 
-      Args[i].OrigRegs.push_back(Args[i].Regs[0]); 
-      Args[i].Regs.clear(); 
-      Args[i].Flags.clear(); 
-      LLT NewLLT = getLLTForMVT(NewVT); 
-      // For each split register, create and assign a vreg that will store 
-      // the incoming component of the larger value. These will later be 
-      // merged to form the final vreg. 
-      for (unsigned Part = 0; Part < NumParts; ++Part) { 
-        Register Reg = 
-            MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT); 
-        ISD::ArgFlagsTy Flags = OrigFlags; 
-        if (Part == 0) { 
-          Flags.setSplit(); 
+    if (CurVT.isSimple() &&
+        !Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(),
+                           CCValAssign::Full, Args[i], Args[i].Flags[0],
+                           CCInfo))
+      continue;
+
+    MVT NewVT = TLI->getRegisterTypeForCallingConv(
+        F.getContext(), F.getCallingConv(), EVT(CurVT));
+
+    // If we need to split the type over multiple regs, check it's a scenario
+    // we currently support.
+    unsigned NumParts = TLI->getNumRegistersForCallingConv(
+        F.getContext(), F.getCallingConv(), CurVT);
+
+    if (NumParts == 1) {
+      // Try to use the register type if we couldn't assign the VT.
+      if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
+                            Args[i].Flags[0], CCInfo))
+        return false;
+      continue;
+    }
+
+    assert(NumParts > 1);
+    // For now only handle exact splits.
+    if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits())
+      return false;
+
+    // For incoming arguments (physregs to vregs), we could have values in
+    // physregs (or memlocs) which we want to extract and copy to vregs.
+    // During this, we might have to deal with the LLT being split across
+    // multiple regs, so we have to record this information for later.
+    //
+    // If we have outgoing args, then we have the opposite case. We have a
+    // vreg with an LLT which we want to assign to a physical location, and
+    // we might have to record that the value has to be split later.
+    if (Handler.isIncomingArgumentHandler()) {
+      // We're handling an incoming arg which is split over multiple regs.
+      // E.g. passing an s128 on AArch64.
+      ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+      Args[i].OrigRegs.push_back(Args[i].Regs[0]);
+      Args[i].Regs.clear();
+      Args[i].Flags.clear();
+      LLT NewLLT = getLLTForMVT(NewVT);
+      // For each split register, create and assign a vreg that will store
+      // the incoming component of the larger value. These will later be
+      // merged to form the final vreg.
+      for (unsigned Part = 0; Part < NumParts; ++Part) {
+        Register Reg =
+            MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT);
+        ISD::ArgFlagsTy Flags = OrigFlags;
+        if (Part == 0) {
+          Flags.setSplit();
         } else {
-          Flags.setOrigAlign(Align(1)); 
-          if (Part == NumParts - 1) 
-            Flags.setSplitEnd(); 
+          Flags.setOrigAlign(Align(1));
+          if (Part == NumParts - 1)
+            Flags.setSplitEnd();
         }
-        Args[i].Regs.push_back(Reg); 
-        Args[i].Flags.push_back(Flags); 
-        if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], 
-                              Args[i].Flags[Part], CCInfo)) { 
-          // Still couldn't assign this smaller part type for some reason. 
-          return false; 
+        Args[i].Regs.push_back(Reg);
+        Args[i].Flags.push_back(Flags);
+        if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
+                              Args[i].Flags[Part], CCInfo)) {
+          // Still couldn't assign this smaller part type for some reason.
+          return false;
+        }
+      }
+    } else {
+      // This type is passed via multiple registers in the calling convention.
+      // We need to extract the individual parts.
+      Register LargeReg = Args[i].Regs[0];
+      LLT SmallTy = LLT::scalar(NewVT.getSizeInBits());
+      auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg);
+      assert(Unmerge->getNumOperands() == NumParts + 1);
+      ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+      // We're going to replace the regs and flags with the split ones.
+      Args[i].Regs.clear();
+      Args[i].Flags.clear();
+      for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) {
+        ISD::ArgFlagsTy Flags = OrigFlags;
+        if (PartIdx == 0) {
+          Flags.setSplit();
+        } else {
+          Flags.setOrigAlign(Align(1));
+          if (PartIdx == NumParts - 1)
+            Flags.setSplitEnd();
         }
+        Args[i].Regs.push_back(Unmerge.getReg(PartIdx));
+        Args[i].Flags.push_back(Flags);
+        if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full,
+                              Args[i], Args[i].Flags[PartIdx], CCInfo))
+          return false;
       }
-    } else { 
-      // This type is passed via multiple registers in the calling convention. 
-      // We need to extract the individual parts. 
-      Register LargeReg = Args[i].Regs[0]; 
-      LLT SmallTy = LLT::scalar(NewVT.getSizeInBits()); 
-      auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg); 
-      assert(Unmerge->getNumOperands() == NumParts + 1); 
-      ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; 
-      // We're going to replace the regs and flags with the split ones. 
-      Args[i].Regs.clear(); 
-      Args[i].Flags.clear(); 
-      for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) { 
-        ISD::ArgFlagsTy Flags = OrigFlags; 
-        if (PartIdx == 0) { 
-          Flags.setSplit(); 
-        } else { 
-          Flags.setOrigAlign(Align(1)); 
-          if (PartIdx == NumParts - 1) 
-            Flags.setSplitEnd(); 
-        } 
-        Args[i].Regs.push_back(Unmerge.getReg(PartIdx)); 
-        Args[i].Flags.push_back(Flags); 
-        if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, 
-                              Args[i], Args[i].Flags[PartIdx], CCInfo)) 
-          return false; 
-      } 
     }
   }
 
@@ -361,239 +361,239 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
     EVT VAVT = VA.getValVT();
     const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
 
-    // Expected to be multiple regs for a single incoming arg. 
-    // There should be Regs.size() ArgLocs per argument. 
-    unsigned NumArgRegs = Args[i].Regs.size(); 
-
-    assert((j + (NumArgRegs - 1)) < ArgLocs.size() && 
-           "Too many regs for number of args"); 
-    for (unsigned Part = 0; Part < NumArgRegs; ++Part) { 
-      // There should be Regs.size() ArgLocs per argument. 
-      VA = ArgLocs[j + Part]; 
-      if (VA.isMemLoc()) { 
-        // Don't currently support loading/storing a type that needs to be split 
-        // to the stack. Should be easy, just not implemented yet. 
-        if (NumArgRegs > 1) { 
-          LLVM_DEBUG( 
-            dbgs() 
-            << "Load/store a split arg to/from the stack not implemented yet\n"); 
-          return false; 
+    // Expected to be multiple regs for a single incoming arg.
+    // There should be Regs.size() ArgLocs per argument.
+    unsigned NumArgRegs = Args[i].Regs.size();
+
+    assert((j + (NumArgRegs - 1)) < ArgLocs.size() &&
+           "Too many regs for number of args");
+    for (unsigned Part = 0; Part < NumArgRegs; ++Part) {
+      // There should be Regs.size() ArgLocs per argument.
+      VA = ArgLocs[j + Part];
+      if (VA.isMemLoc()) {
+        // Don't currently support loading/storing a type that needs to be split
+        // to the stack. Should be easy, just not implemented yet.
+        if (NumArgRegs > 1) {
+          LLVM_DEBUG(
+            dbgs()
+            << "Load/store a split arg to/from the stack not implemented yet\n");
+          return false;
         }
- 
-        // FIXME: Use correct address space for pointer size 
-        EVT LocVT = VA.getValVT(); 
-        unsigned MemSize = LocVT == MVT::iPTR ? DL.getPointerSize() 
-                                              : LocVT.getStoreSize(); 
-        unsigned Offset = VA.getLocMemOffset(); 
-        MachinePointerInfo MPO; 
-        Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO); 
-        Handler.assignValueToAddress(Args[i], StackAddr, 
-                                     MemSize, MPO, VA); 
-        continue; 
-      } 
- 
-      assert(VA.isRegLoc() && "custom loc should have been handled already"); 
- 
-      // GlobalISel does not currently work for scalable vectors. 
-      if (OrigVT.getFixedSizeInBits() >= VAVT.getFixedSizeInBits() || 
-          !Handler.isIncomingArgumentHandler()) { 
-        // This is an argument that might have been split. There should be 
-        // Regs.size() ArgLocs per argument. 
- 
-        // Insert the argument copies. If VAVT < OrigVT, we'll insert the merge 
-        // to the original register after handling all of the parts. 
-        Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); 
-        continue; 
-      } 
- 
-      // This ArgLoc covers multiple pieces, so we need to split it. 
-      const LLT VATy(VAVT.getSimpleVT()); 
-      Register NewReg = 
-        MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); 
-      Handler.assignValueToReg(NewReg, VA.getLocReg(), VA); 
-      // If it's a vector type, we either need to truncate the elements 
-      // or do an unmerge to get the lower block of elements. 
-      if (VATy.isVector() && 
-          VATy.getNumElements() > OrigVT.getVectorNumElements()) { 
-        // Just handle the case where the VA type is 2 * original type. 
-        if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) { 
-          LLVM_DEBUG(dbgs() 
-                     << "Incoming promoted vector arg has too many elts"); 
-          return false; 
+
+        // FIXME: Use correct address space for pointer size
+        EVT LocVT = VA.getValVT();
+        unsigned MemSize = LocVT == MVT::iPTR ? DL.getPointerSize()
+                                              : LocVT.getStoreSize();
+        unsigned Offset = VA.getLocMemOffset();
+        MachinePointerInfo MPO;
+        Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO);
+        Handler.assignValueToAddress(Args[i], StackAddr,
+                                     MemSize, MPO, VA);
+        continue;
+      }
+
+      assert(VA.isRegLoc() && "custom loc should have been handled already");
+
+      // GlobalISel does not currently work for scalable vectors.
+      if (OrigVT.getFixedSizeInBits() >= VAVT.getFixedSizeInBits() ||
+          !Handler.isIncomingArgumentHandler()) {
+        // This is an argument that might have been split. There should be
+        // Regs.size() ArgLocs per argument.
+
+        // Insert the argument copies. If VAVT < OrigVT, we'll insert the merge
+        // to the original register after handling all of the parts.
+        Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
+        continue;
+      }
+
+      // This ArgLoc covers multiple pieces, so we need to split it.
+      const LLT VATy(VAVT.getSimpleVT());
+      Register NewReg =
+        MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
+      Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
+      // If it's a vector type, we either need to truncate the elements
+      // or do an unmerge to get the lower block of elements.
+      if (VATy.isVector() &&
+          VATy.getNumElements() > OrigVT.getVectorNumElements()) {
+        // Just handle the case where the VA type is 2 * original type.
+        if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
+          LLVM_DEBUG(dbgs()
+                     << "Incoming promoted vector arg has too many elts");
+          return false;
         }
-        auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg}); 
-        MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0)); 
+        auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg});
+        MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0));
       } else {
-        MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); 
+        MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0);
       }
-    } 
- 
-    // Now that all pieces have been handled, re-pack any arguments into any 
-    // wider, original registers. 
-    if (Handler.isIncomingArgumentHandler()) { 
-      if (VAVT.getFixedSizeInBits() < OrigVT.getFixedSizeInBits()) { 
-        assert(NumArgRegs >= 2); 
- 
-        // Merge the split registers into the expected larger result vreg 
-        // of the original call. 
-        MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs); 
+    }
+
+    // Now that all pieces have been handled, re-pack any arguments into any
+    // wider, original registers.
+    if (Handler.isIncomingArgumentHandler()) {
+      if (VAVT.getFixedSizeInBits() < OrigVT.getFixedSizeInBits()) {
+        assert(NumArgRegs >= 2);
+
+        // Merge the split registers into the expected larger result vreg
+        // of the original call.
+        MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs);
       }
-    } 
-
-    j += NumArgRegs - 1; 
-  } 
-
-  return true; 
-} 
- 
-void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, 
-                                   ArrayRef<Register> VRegs, Register DemoteReg, 
-                                   int FI) const { 
-  MachineFunction &MF = MIRBuilder.getMF(); 
-  MachineRegisterInfo &MRI = MF.getRegInfo(); 
-  const DataLayout &DL = MF.getDataLayout(); 
- 
-  SmallVector<EVT, 4> SplitVTs; 
-  SmallVector<uint64_t, 4> Offsets; 
-  ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); 
- 
-  assert(VRegs.size() == SplitVTs.size()); 
- 
-  unsigned NumValues = SplitVTs.size(); 
-  Align BaseAlign = DL.getPrefTypeAlign(RetTy); 
-  Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace()); 
-  LLT OffsetLLTy = getLLTForType(*DL.getIntPtrType(RetPtrTy), DL); 
- 
-  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 
- 
-  for (unsigned I = 0; I < NumValues; ++I) { 
-    Register Addr; 
-    MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); 
-    auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, 
-                                        MRI.getType(VRegs[I]).getSizeInBytes(), 
-                                        commonAlignment(BaseAlign, Offsets[I])); 
-    MIRBuilder.buildLoad(VRegs[I], Addr, *MMO); 
-  } 
-} 
- 
-void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, 
-                                    ArrayRef<Register> VRegs, 
-                                    Register DemoteReg) const { 
-  MachineFunction &MF = MIRBuilder.getMF(); 
-  MachineRegisterInfo &MRI = MF.getRegInfo(); 
-  const DataLayout &DL = MF.getDataLayout(); 
- 
-  SmallVector<EVT, 4> SplitVTs; 
-  SmallVector<uint64_t, 4> Offsets; 
-  ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); 
- 
-  assert(VRegs.size() == SplitVTs.size()); 
- 
-  unsigned NumValues = SplitVTs.size(); 
-  Align BaseAlign = DL.getPrefTypeAlign(RetTy); 
-  unsigned AS = DL.getAllocaAddrSpace(); 
-  LLT OffsetLLTy = 
-      getLLTForType(*DL.getIntPtrType(RetTy->getPointerTo(AS)), DL); 
- 
-  MachinePointerInfo PtrInfo(AS); 
- 
-  for (unsigned I = 0; I < NumValues; ++I) { 
-    Register Addr; 
-    MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); 
-    auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 
-                                        MRI.getType(VRegs[I]).getSizeInBytes(), 
-                                        commonAlignment(BaseAlign, Offsets[I])); 
-    MIRBuilder.buildStore(VRegs[I], Addr, *MMO); 
-  } 
-} 
- 
-void CallLowering::insertSRetIncomingArgument( 
-    const Function &F, SmallVectorImpl<ArgInfo> &SplitArgs, Register &DemoteReg, 
-    MachineRegisterInfo &MRI, const DataLayout &DL) const { 
-  unsigned AS = DL.getAllocaAddrSpace(); 
-  DemoteReg = MRI.createGenericVirtualRegister( 
-      LLT::pointer(AS, DL.getPointerSizeInBits(AS))); 
- 
-  Type *PtrTy = PointerType::get(F.getReturnType(), AS); 
- 
-  SmallVector<EVT, 1> ValueVTs; 
-  ComputeValueVTs(*TLI, DL, PtrTy, ValueVTs); 
- 
-  // NOTE: Assume that a pointer won't get split into more than one VT. 
-  assert(ValueVTs.size() == 1); 
- 
-  ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext())); 
-  setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, F); 
-  DemoteArg.Flags[0].setSRet(); 
-  SplitArgs.insert(SplitArgs.begin(), DemoteArg); 
-} 
- 
-void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder, 
-                                              const CallBase &CB, 
-                                              CallLoweringInfo &Info) const { 
-  const DataLayout &DL = MIRBuilder.getDataLayout(); 
-  Type *RetTy = CB.getType(); 
-  unsigned AS = DL.getAllocaAddrSpace(); 
-  LLT FramePtrTy = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); 
- 
-  int FI = MIRBuilder.getMF().getFrameInfo().CreateStackObject( 
-      DL.getTypeAllocSize(RetTy), DL.getPrefTypeAlign(RetTy), false); 
- 
-  Register DemoteReg = MIRBuilder.buildFrameIndex(FramePtrTy, FI).getReg(0); 
-  ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS)); 
-  setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, CB); 
-  DemoteArg.Flags[0].setSRet(); 
- 
-  Info.OrigArgs.insert(Info.OrigArgs.begin(), DemoteArg); 
-  Info.DemoteStackIndex = FI; 
-  Info.DemoteRegister = DemoteReg; 
-} 
- 
-bool CallLowering::checkReturn(CCState &CCInfo, 
-                               SmallVectorImpl<BaseArgInfo> &Outs, 
-                               CCAssignFn *Fn) const { 
-  for (unsigned I = 0, E = Outs.size(); I < E; ++I) { 
-    MVT VT = MVT::getVT(Outs[I].Ty); 
-    if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo)) 
+    }
+
+    j += NumArgRegs - 1;
+  }
+
+  return true;
+}
+
+void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
+                                   ArrayRef<Register> VRegs, Register DemoteReg,
+                                   int FI) const {
+  MachineFunction &MF = MIRBuilder.getMF();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const DataLayout &DL = MF.getDataLayout();
+
+  SmallVector<EVT, 4> SplitVTs;
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+
+  assert(VRegs.size() == SplitVTs.size());
+
+  unsigned NumValues = SplitVTs.size();
+  Align BaseAlign = DL.getPrefTypeAlign(RetTy);
+  Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace());
+  LLT OffsetLLTy = getLLTForType(*DL.getIntPtrType(RetPtrTy), DL);
+
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
+
+  for (unsigned I = 0; I < NumValues; ++I) {
+    Register Addr;
+    MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
+    auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+                                        MRI.getType(VRegs[I]).getSizeInBytes(),
+                                        commonAlignment(BaseAlign, Offsets[I]));
+    MIRBuilder.buildLoad(VRegs[I], Addr, *MMO);
+  }
+}
+
+void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
+                                    ArrayRef<Register> VRegs,
+                                    Register DemoteReg) const {
+  MachineFunction &MF = MIRBuilder.getMF();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const DataLayout &DL = MF.getDataLayout();
+
+  SmallVector<EVT, 4> SplitVTs;
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+
+  assert(VRegs.size() == SplitVTs.size());
+
+  unsigned NumValues = SplitVTs.size();
+  Align BaseAlign = DL.getPrefTypeAlign(RetTy);
+  unsigned AS = DL.getAllocaAddrSpace();
+  LLT OffsetLLTy =
+      getLLTForType(*DL.getIntPtrType(RetTy->getPointerTo(AS)), DL);
+
+  MachinePointerInfo PtrInfo(AS);
+
+  for (unsigned I = 0; I < NumValues; ++I) {
+    Register Addr;
+    MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
+    auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+                                        MRI.getType(VRegs[I]).getSizeInBytes(),
+                                        commonAlignment(BaseAlign, Offsets[I]));
+    MIRBuilder.buildStore(VRegs[I], Addr, *MMO);
+  }
+}
+
+void CallLowering::insertSRetIncomingArgument(
+    const Function &F, SmallVectorImpl<ArgInfo> &SplitArgs, Register &DemoteReg,
+    MachineRegisterInfo &MRI, const DataLayout &DL) const {
+  unsigned AS = DL.getAllocaAddrSpace();
+  DemoteReg = MRI.createGenericVirtualRegister(
+      LLT::pointer(AS, DL.getPointerSizeInBits(AS)));
+
+  Type *PtrTy = PointerType::get(F.getReturnType(), AS);
+
+  SmallVector<EVT, 1> ValueVTs;
+  ComputeValueVTs(*TLI, DL, PtrTy, ValueVTs);
+
+  // NOTE: Assume that a pointer won't get split into more than one VT.
+  assert(ValueVTs.size() == 1);
+
+  ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext()));
+  setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, F);
+  DemoteArg.Flags[0].setSRet();
+  SplitArgs.insert(SplitArgs.begin(), DemoteArg);
+}
+
+void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder,
+                                              const CallBase &CB,
+                                              CallLoweringInfo &Info) const {
+  const DataLayout &DL = MIRBuilder.getDataLayout();
+  Type *RetTy = CB.getType();
+  unsigned AS = DL.getAllocaAddrSpace();
+  LLT FramePtrTy = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
+
+  int FI = MIRBuilder.getMF().getFrameInfo().CreateStackObject(
+      DL.getTypeAllocSize(RetTy), DL.getPrefTypeAlign(RetTy), false);
+
+  Register DemoteReg = MIRBuilder.buildFrameIndex(FramePtrTy, FI).getReg(0);
+  ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS));
+  setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, CB);
+  DemoteArg.Flags[0].setSRet();
+
+  Info.OrigArgs.insert(Info.OrigArgs.begin(), DemoteArg);
+  Info.DemoteStackIndex = FI;
+  Info.DemoteRegister = DemoteReg;
+}
+
+bool CallLowering::checkReturn(CCState &CCInfo,
+                               SmallVectorImpl<BaseArgInfo> &Outs,
+                               CCAssignFn *Fn) const {
+  for (unsigned I = 0, E = Outs.size(); I < E; ++I) {
+    MVT VT = MVT::getVT(Outs[I].Ty);
+    if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo))
       return false;
   }
   return true;
 }
 
-void CallLowering::getReturnInfo(CallingConv::ID CallConv, Type *RetTy, 
-                                 AttributeList Attrs, 
-                                 SmallVectorImpl<BaseArgInfo> &Outs, 
-                                 const DataLayout &DL) const { 
-  LLVMContext &Context = RetTy->getContext(); 
-  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); 
- 
-  SmallVector<EVT, 4> SplitVTs; 
-  ComputeValueVTs(*TLI, DL, RetTy, SplitVTs); 
-  addArgFlagsFromAttributes(Flags, Attrs, AttributeList::ReturnIndex); 
- 
-  for (EVT VT : SplitVTs) { 
-    unsigned NumParts = 
-        TLI->getNumRegistersForCallingConv(Context, CallConv, VT); 
-    MVT RegVT = TLI->getRegisterTypeForCallingConv(Context, CallConv, VT); 
-    Type *PartTy = EVT(RegVT).getTypeForEVT(Context); 
- 
-    for (unsigned I = 0; I < NumParts; ++I) { 
-      Outs.emplace_back(PartTy, Flags); 
-    } 
-  } 
-} 
- 
-bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const { 
-  const auto &F = MF.getFunction(); 
-  Type *ReturnType = F.getReturnType(); 
-  CallingConv::ID CallConv = F.getCallingConv(); 
- 
-  SmallVector<BaseArgInfo, 4> SplitArgs; 
-  getReturnInfo(CallConv, ReturnType, F.getAttributes(), SplitArgs, 
-                MF.getDataLayout()); 
-  return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg()); 
-} 
- 
+void CallLowering::getReturnInfo(CallingConv::ID CallConv, Type *RetTy,
+                                 AttributeList Attrs,
+                                 SmallVectorImpl<BaseArgInfo> &Outs,
+                                 const DataLayout &DL) const {
+  LLVMContext &Context = RetTy->getContext();
+  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+
+  SmallVector<EVT, 4> SplitVTs;
+  ComputeValueVTs(*TLI, DL, RetTy, SplitVTs);
+  addArgFlagsFromAttributes(Flags, Attrs, AttributeList::ReturnIndex);
+
+  for (EVT VT : SplitVTs) {
+    unsigned NumParts =
+        TLI->getNumRegistersForCallingConv(Context, CallConv, VT);
+    MVT RegVT = TLI->getRegisterTypeForCallingConv(Context, CallConv, VT);
+    Type *PartTy = EVT(RegVT).getTypeForEVT(Context);
+
+    for (unsigned I = 0; I < NumParts; ++I) {
+      Outs.emplace_back(PartTy, Flags);
+    }
+  }
+}
+
+bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const {
+  const auto &F = MF.getFunction();
+  Type *ReturnType = F.getReturnType();
+  CallingConv::ID CallConv = F.getCallingConv();
+
+  SmallVector<BaseArgInfo, 4> SplitArgs;
+  getReturnInfo(CallConv, ReturnType, F.getAttributes(), SplitArgs,
+                MF.getDataLayout());
+  return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg());
+}
+
 bool CallLowering::analyzeArgInfo(CCState &CCState,
                                   SmallVectorImpl<ArgInfo> &Args,
                                   CCAssignFn &AssignFnFixed,
@@ -611,58 +611,58 @@ bool CallLowering::analyzeArgInfo(CCState &CCState,
   return true;
 }
 
-bool CallLowering::parametersInCSRMatch( 
-    const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, 
-    const SmallVectorImpl<CCValAssign> &OutLocs, 
-    const SmallVectorImpl<ArgInfo> &OutArgs) const { 
-  for (unsigned i = 0; i < OutLocs.size(); ++i) { 
-    auto &ArgLoc = OutLocs[i]; 
-    // If it's not a register, it's fine. 
-    if (!ArgLoc.isRegLoc()) 
-      continue; 
- 
-    MCRegister PhysReg = ArgLoc.getLocReg(); 
- 
-    // Only look at callee-saved registers. 
-    if (MachineOperand::clobbersPhysReg(CallerPreservedMask, PhysReg)) 
-      continue; 
- 
-    LLVM_DEBUG( 
-        dbgs() 
-        << "... Call has an argument passed in a callee-saved register.\n"); 
- 
-    // Check if it was copied from. 
-    const ArgInfo &OutInfo = OutArgs[i]; 
- 
-    if (OutInfo.Regs.size() > 1) { 
-      LLVM_DEBUG( 
-          dbgs() << "... Cannot handle arguments in multiple registers.\n"); 
-      return false; 
-    } 
- 
-    // Check if we copy the register, walking through copies from virtual 
-    // registers. Note that getDefIgnoringCopies does not ignore copies from 
-    // physical registers. 
-    MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI); 
-    if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) { 
-      LLVM_DEBUG( 
-          dbgs() 
-          << "... Parameter was not copied into a VReg, cannot tail call.\n"); 
-      return false; 
-    } 
- 
-    // Got a copy. Verify that it's the same as the register we want. 
-    Register CopyRHS = RegDef->getOperand(1).getReg(); 
-    if (CopyRHS != PhysReg) { 
-      LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into " 
-                           "VReg, cannot tail call.\n"); 
-      return false; 
-    } 
-  } 
- 
-  return true; 
-} 
- 
+bool CallLowering::parametersInCSRMatch(
+    const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask,
+    const SmallVectorImpl<CCValAssign> &OutLocs,
+    const SmallVectorImpl<ArgInfo> &OutArgs) const {
+  for (unsigned i = 0; i < OutLocs.size(); ++i) {
+    auto &ArgLoc = OutLocs[i];
+    // If it's not a register, it's fine.
+    if (!ArgLoc.isRegLoc())
+      continue;
+
+    MCRegister PhysReg = ArgLoc.getLocReg();
+
+    // Only look at callee-saved registers.
+    if (MachineOperand::clobbersPhysReg(CallerPreservedMask, PhysReg))
+      continue;
+
+    LLVM_DEBUG(
+        dbgs()
+        << "... Call has an argument passed in a callee-saved register.\n");
+
+    // Check if it was copied from.
+    const ArgInfo &OutInfo = OutArgs[i];
+
+    if (OutInfo.Regs.size() > 1) {
+      LLVM_DEBUG(
+          dbgs() << "... Cannot handle arguments in multiple registers.\n");
+      return false;
+    }
+
+    // Check if we copy the register, walking through copies from virtual
+    // registers. Note that getDefIgnoringCopies does not ignore copies from
+    // physical registers.
+    MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI);
+    if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) {
+      LLVM_DEBUG(
+          dbgs()
+          << "... Parameter was not copied into a VReg, cannot tail call.\n");
+      return false;
+    }
+
+    // Got a copy. Verify that it's the same as the register we want.
+    Register CopyRHS = RegDef->getOperand(1).getReg();
+    if (CopyRHS != PhysReg) {
+      LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into "
+                           "VReg, cannot tail call.\n");
+      return false;
+    }
+  }
+
+  return true;
+}
+
 bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
                                      MachineFunction &MF,
                                      SmallVectorImpl<ArgInfo> &InArgs,
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Combiner.cpp
index 86480b47e9..f1071d96e5 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -153,8 +153,8 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
     MFChanged |= Changed;
   } while (Changed);
 
-  assert(!CSEInfo || (!errorToBool(CSEInfo->verify()) && 
-                         "CSEInfo is not consistent. Likely missing calls to " 
-                         "observer on mutations")); 
+  assert(!CSEInfo || (!errorToBool(CSEInfo->verify()) &&
+                         "CSEInfo is not consistent. Likely missing calls to "
+                         "observer on mutations"));
   return MFChanged;
 }
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 8ea55b6abd..a9353bdfb7 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -16,7 +16,7 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineMemOperand.h" 
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
@@ -44,75 +44,75 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
   (void)this->KB;
 }
 
-const TargetLowering &CombinerHelper::getTargetLowering() const { 
-  return *Builder.getMF().getSubtarget().getTargetLowering(); 
-} 
- 
-/// \returns The little endian in-memory byte position of byte \p I in a 
-/// \p ByteWidth bytes wide type. 
-/// 
-/// E.g. Given a 4-byte type x, x[0] -> byte 0 
-static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) { 
-  assert(I < ByteWidth && "I must be in [0, ByteWidth)"); 
-  return I; 
-} 
- 
-/// \returns The big endian in-memory byte position of byte \p I in a 
-/// \p ByteWidth bytes wide type. 
-/// 
-/// E.g. Given a 4-byte type x, x[0] -> byte 3 
-static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) { 
-  assert(I < ByteWidth && "I must be in [0, ByteWidth)"); 
-  return ByteWidth - I - 1; 
-} 
- 
-/// Given a map from byte offsets in memory to indices in a load/store, 
-/// determine if that map corresponds to a little or big endian byte pattern. 
-/// 
-/// \param MemOffset2Idx maps memory offsets to address offsets. 
-/// \param LowestIdx is the lowest index in \p MemOffset2Idx. 
-/// 
-/// \returns true if the map corresponds to a big endian byte pattern, false 
-/// if it corresponds to a little endian byte pattern, and None otherwise. 
-/// 
-/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns 
-/// are as follows: 
-/// 
-/// AddrOffset   Little endian    Big endian 
-/// 0            0                3 
-/// 1            1                2 
-/// 2            2                1 
-/// 3            3                0 
-static Optional<bool> 
-isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, 
-            int64_t LowestIdx) { 
-  // Need at least two byte positions to decide on endianness. 
-  unsigned Width = MemOffset2Idx.size(); 
-  if (Width < 2) 
-    return None; 
-  bool BigEndian = true, LittleEndian = true; 
-  for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) { 
-    auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset); 
-    if (MemOffsetAndIdx == MemOffset2Idx.end()) 
-      return None; 
-    const int64_t Idx = MemOffsetAndIdx->second - LowestIdx; 
-    assert(Idx >= 0 && "Expected non-negative byte offset?"); 
-    LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset); 
-    BigEndian &= Idx == bigEndianByteAt(Width, MemOffset); 
-    if (!BigEndian && !LittleEndian) 
-      return None; 
-  } 
- 
-  assert((BigEndian != LittleEndian) && 
-         "Pattern cannot be both big and little endian!"); 
-  return BigEndian; 
-} 
- 
-bool CombinerHelper::isLegalOrBeforeLegalizer( 
-    const LegalityQuery &Query) const { 
-  return !LI || LI->getAction(Query).Action == LegalizeActions::Legal; 
-} 
- 
+const TargetLowering &CombinerHelper::getTargetLowering() const {
+  return *Builder.getMF().getSubtarget().getTargetLowering();
+}
+
+/// \returns The little endian in-memory byte position of byte \p I in a
+/// \p ByteWidth bytes wide type.
+///
+/// E.g. Given a 4-byte type x, x[0] -> byte 0
+static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
+  assert(I < ByteWidth && "I must be in [0, ByteWidth)");
+  return I;
+}
+
+/// \returns The big endian in-memory byte position of byte \p I in a
+/// \p ByteWidth bytes wide type.
+///
+/// E.g. Given a 4-byte type x, x[0] -> byte 3
+static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
+  assert(I < ByteWidth && "I must be in [0, ByteWidth)");
+  return ByteWidth - I - 1;
+}
+
+/// Given a map from byte offsets in memory to indices in a load/store,
+/// determine if that map corresponds to a little or big endian byte pattern.
+///
+/// \param MemOffset2Idx maps memory offsets to address offsets.
+/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
+///
+/// \returns true if the map corresponds to a big endian byte pattern, false
+/// if it corresponds to a little endian byte pattern, and None otherwise.
+///
+/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
+/// are as follows:
+///
+/// AddrOffset   Little endian    Big endian
+/// 0            0                3
+/// 1            1                2
+/// 2            2                1
+/// 3            3                0
+static Optional<bool>
+isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
+            int64_t LowestIdx) {
+  // Need at least two byte positions to decide on endianness.
+  unsigned Width = MemOffset2Idx.size();
+  if (Width < 2)
+    return None;
+  bool BigEndian = true, LittleEndian = true;
+  for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
+    auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
+    if (MemOffsetAndIdx == MemOffset2Idx.end())
+      return None;
+    const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
+    assert(Idx >= 0 && "Expected non-negative byte offset?");
+    LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
+    BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
+    if (!BigEndian && !LittleEndian)
+      return None;
+  }
+
+  assert((BigEndian != LittleEndian) &&
+         "Pattern cannot be both big and little endian!");
+  return BigEndian;
+}
+
+bool CombinerHelper::isLegalOrBeforeLegalizer(
+    const LegalityQuery &Query) const {
+  return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
+}
+
 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
                                     Register ToReg) const {
   Observer.changingAllUsesOfReg(MRI, FromReg);
@@ -624,13 +624,13 @@ bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
   assert(DefMI.getParent() == UseMI.getParent());
   if (&DefMI == &UseMI)
     return false;
-  const MachineBasicBlock &MBB = *DefMI.getParent(); 
-  auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) { 
-    return &MI == &DefMI || &MI == &UseMI; 
-  }); 
-  if (DefOrUse == MBB.end()) 
-    llvm_unreachable("Block must contain both DefMI and UseMI!"); 
-  return &*DefOrUse == &DefMI; 
+  const MachineBasicBlock &MBB = *DefMI.getParent();
+  auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
+    return &MI == &DefMI || &MI == &UseMI;
+  });
+  if (DefOrUse == MBB.end())
+    llvm_unreachable("Block must contain both DefMI and UseMI!");
+  return &*DefOrUse == &DefMI;
 }
 
 bool CombinerHelper::dominates(const MachineInstr &DefMI,
@@ -645,101 +645,101 @@ bool CombinerHelper::dominates(const MachineInstr &DefMI,
   return isPredecessor(DefMI, UseMI);
 }
 
-bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { 
+bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+  Register SrcReg = MI.getOperand(1).getReg();
+  Register LoadUser = SrcReg;
+
+  if (MRI.getType(SrcReg).isVector())
+    return false;
+
+  Register TruncSrc;
+  if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
+    LoadUser = TruncSrc;
+
+  uint64_t SizeInBits = MI.getOperand(2).getImm();
+  // If the source is a G_SEXTLOAD from the same bit width, then we don't
+  // need any extend at all, just a truncate.
+  if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
+    const auto &MMO = **LoadMI->memoperands_begin();
+    // If truncating more than the original extended value, abort.
+    if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
+      return false;
+    if (MMO.getSizeInBits() == SizeInBits)
+      return true;
+  }
+  return false;
+}
+
+bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+  Builder.setInstrAndDebugLoc(MI);
+  Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchSextInRegOfLoad(
+    MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+
+  // Only supports scalars for now.
+  if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+    return false;
+
   Register SrcReg = MI.getOperand(1).getReg();
-  Register LoadUser = SrcReg; 
- 
-  if (MRI.getType(SrcReg).isVector()) 
-    return false; 
- 
-  Register TruncSrc; 
-  if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) 
-    LoadUser = TruncSrc; 
- 
-  uint64_t SizeInBits = MI.getOperand(2).getImm(); 
-  // If the source is a G_SEXTLOAD from the same bit width, then we don't 
-  // need any extend at all, just a truncate. 
-  if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) { 
-    const auto &MMO = **LoadMI->memoperands_begin(); 
-    // If truncating more than the original extended value, abort. 
-    if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits()) 
-      return false; 
-    if (MMO.getSizeInBits() == SizeInBits) 
-      return true; 
-  } 
-  return false; 
-}
-
-bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { 
+  MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
+  if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()))
+    return false;
+
+  // If the sign extend extends from a narrower width than the load's width,
+  // then we can narrow the load width when we combine to a G_SEXTLOAD.
+  auto &MMO = **LoadDef->memoperands_begin();
+  // Don't do this for non-simple loads.
+  if (MMO.isAtomic() || MMO.isVolatile())
+    return false;
+
+  // Avoid widening the load at all.
+  unsigned NewSizeBits =
+      std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits());
+
+  // Don't generate G_SEXTLOADs with a < 1 byte width.
+  if (NewSizeBits < 8)
+    return false;
+  // Don't bother creating a non-power-2 sextload, it will likely be broken up
+  // anyway for most targets.
+  if (!isPowerOf2_32(NewSizeBits))
+    return false;
+  MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits);
+  return true;
+}
+
+bool CombinerHelper::applySextInRegOfLoad(
+    MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
-  Builder.setInstrAndDebugLoc(MI); 
-  Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); 
+  Register LoadReg;
+  unsigned ScalarSizeBits;
+  std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
+  auto *LoadDef = MRI.getVRegDef(LoadReg);
+  assert(LoadDef && "Expected a load reg");
+
+  // If we have the following:
+  // %ld = G_LOAD %ptr, (load 2)
+  // %ext = G_SEXT_INREG %ld, 8
+  //    ==>
+  // %ld = G_SEXTLOAD %ptr (load 1)
+
+  auto &MMO = **LoadDef->memoperands_begin();
+  Builder.setInstrAndDebugLoc(MI);
+  auto &MF = Builder.getMF();
+  auto PtrInfo = MMO.getPointerInfo();
+  auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
+  Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
+                         LoadDef->getOperand(1).getReg(), *NewMMO);
   MI.eraseFromParent();
   return true;
 }
 
-bool CombinerHelper::matchSextInRegOfLoad( 
-    MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 
-  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 
- 
-  // Only supports scalars for now. 
-  if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 
-    return false; 
- 
-  Register SrcReg = MI.getOperand(1).getReg(); 
-  MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); 
-  if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg())) 
-    return false; 
- 
-  // If the sign extend extends from a narrower width than the load's width, 
-  // then we can narrow the load width when we combine to a G_SEXTLOAD. 
-  auto &MMO = **LoadDef->memoperands_begin(); 
-  // Don't do this for non-simple loads. 
-  if (MMO.isAtomic() || MMO.isVolatile()) 
-    return false; 
- 
-  // Avoid widening the load at all. 
-  unsigned NewSizeBits = 
-      std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits()); 
- 
-  // Don't generate G_SEXTLOADs with a < 1 byte width. 
-  if (NewSizeBits < 8) 
-    return false; 
-  // Don't bother creating a non-power-2 sextload, it will likely be broken up 
-  // anyway for most targets. 
-  if (!isPowerOf2_32(NewSizeBits)) 
-    return false; 
-  MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits); 
-  return true; 
-} 
- 
-bool CombinerHelper::applySextInRegOfLoad( 
-    MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 
-  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 
-  Register LoadReg; 
-  unsigned ScalarSizeBits; 
-  std::tie(LoadReg, ScalarSizeBits) = MatchInfo; 
-  auto *LoadDef = MRI.getVRegDef(LoadReg); 
-  assert(LoadDef && "Expected a load reg"); 
- 
-  // If we have the following: 
-  // %ld = G_LOAD %ptr, (load 2) 
-  // %ext = G_SEXT_INREG %ld, 8 
-  //    ==> 
-  // %ld = G_SEXTLOAD %ptr (load 1) 
- 
-  auto &MMO = **LoadDef->memoperands_begin(); 
-  Builder.setInstrAndDebugLoc(MI); 
-  auto &MF = Builder.getMF(); 
-  auto PtrInfo = MMO.getPointerInfo(); 
-  auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8); 
-  Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), 
-                         LoadDef->getOperand(1).getReg(), *NewMMO); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
                                             Register &Base, Register &Offset) {
   auto &MF = *MI.getParent()->getParent();
@@ -757,7 +757,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
     return false;
 
   LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
-  // FIXME: The following use traversal needs a bail out for patholigical cases. 
+  // FIXME: The following use traversal needs a bail out for patholigical cases.
   for (auto &Use : MRI.use_nodbg_instructions(Base)) {
     if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
       continue;
@@ -884,11 +884,11 @@ bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadS
       Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
     return false;
 
-  // For now, no targets actually support these opcodes so don't waste time 
-  // running these unless we're forced to for testing. 
-  if (!ForceLegalIndexing) 
-    return false; 
- 
+  // For now, no targets actually support these opcodes so don't waste time
+  // running these unless we're forced to for testing.
+  if (!ForceLegalIndexing)
+    return false;
+
   MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
                                           MatchInfo.Offset);
   if (!MatchInfo.IsPre &&
@@ -941,7 +941,7 @@ void CombinerHelper::applyCombineIndexedLoadStore(
   LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
 }
 
-bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { 
+bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
   if (MI.getOpcode() != TargetOpcode::G_BR)
     return false;
 
@@ -956,7 +956,7 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
   // The above pattern does not have a fall through to the successor bb2, always
   // resulting in a branch no matter which path is taken. Here we try to find
   // and replace that pattern with conditional branch to bb3 and otherwise
-  // fallthrough to bb2. This is generally better for branch predictors. 
+  // fallthrough to bb2. This is generally better for branch predictors.
 
   MachineBasicBlock *MBB = MI.getParent();
   MachineBasicBlock::iterator BrIt(MI);
@@ -968,36 +968,36 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
     return false;
 
-  // Check that the next block is the conditional branch target. Also make sure 
-  // that it isn't the same as the G_BR's target (otherwise, this will loop.) 
-  MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB(); 
-  return BrCondTarget != MI.getOperand(0).getMBB() && 
-         MBB->isLayoutSuccessor(BrCondTarget); 
+  // Check that the next block is the conditional branch target. Also make sure
+  // that it isn't the same as the G_BR's target (otherwise, this will loop.)
+  MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
+  return BrCondTarget != MI.getOperand(0).getMBB() &&
+         MBB->isLayoutSuccessor(BrCondTarget);
 }
 
-void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) { 
+void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) {
   MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
   MachineBasicBlock::iterator BrIt(MI);
   MachineInstr *BrCond = &*std::prev(BrIt);
 
-  Builder.setInstrAndDebugLoc(*BrCond); 
-  LLT Ty = MRI.getType(BrCond->getOperand(0).getReg()); 
-  // FIXME: Does int/fp matter for this? If so, we might need to restrict 
-  // this to i1 only since we might not know for sure what kind of 
-  // compare generated the condition value. 
-  auto True = Builder.buildConstant( 
-      Ty, getICmpTrueVal(getTargetLowering(), false, false)); 
-  auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True); 
-
-  auto *FallthroughBB = BrCond->getOperand(1).getMBB(); 
-  Observer.changingInstr(MI); 
-  MI.getOperand(0).setMBB(FallthroughBB); 
-  Observer.changedInstr(MI); 
-
-  // Change the conditional branch to use the inverted condition and 
-  // new target block. 
+  Builder.setInstrAndDebugLoc(*BrCond);
+  LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
+  // FIXME: Does int/fp matter for this? If so, we might need to restrict
+  // this to i1 only since we might not know for sure what kind of
+  // compare generated the condition value.
+  auto True = Builder.buildConstant(
+      Ty, getICmpTrueVal(getTargetLowering(), false, false));
+  auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
+
+  auto *FallthroughBB = BrCond->getOperand(1).getMBB();
+  Observer.changingInstr(MI);
+  MI.getOperand(0).setMBB(FallthroughBB);
+  Observer.changedInstr(MI);
+
+  // Change the conditional branch to use the inverted condition and
+  // new target block.
   Observer.changingInstr(*BrCond);
-  BrCond->getOperand(0).setReg(Xor.getReg(0)); 
+  BrCond->getOperand(0).setReg(Xor.getReg(0));
   BrCond->getOperand(1).setMBB(BrTarget);
   Observer.changedInstr(*BrCond);
 }
@@ -1090,7 +1090,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
   unsigned NumBits = Ty.getScalarSizeInBits();
   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
   if (!Ty.isVector() && ValVRegAndVal) {
-    APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); 
+    APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
     return MIB.buildConstant(Ty, SplatVal).getReg(0);
   }
@@ -1442,11 +1442,11 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
 }
 
 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
-  const unsigned Opc = MI.getOpcode(); 
+  const unsigned Opc = MI.getOpcode();
   // This combine is fairly complex so it's not written with a separate
   // matcher function.
-  assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE || 
-          Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction"); 
+  assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
+          Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
 
   auto MMOIt = MI.memoperands_begin();
   const MachineMemOperand *MemOp = *MMOIt;
@@ -1457,11 +1457,11 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
 
   Align DstAlign = MemOp->getBaseAlign();
   Align SrcAlign;
-  Register Dst = MI.getOperand(0).getReg(); 
-  Register Src = MI.getOperand(1).getReg(); 
-  Register Len = MI.getOperand(2).getReg(); 
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src = MI.getOperand(1).getReg();
+  Register Len = MI.getOperand(2).getReg();
 
-  if (Opc != TargetOpcode::G_MEMSET) { 
+  if (Opc != TargetOpcode::G_MEMSET) {
     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
     MemOp = *(++MMOIt);
     SrcAlign = MemOp->getBaseAlign();
@@ -1471,7 +1471,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
   if (!LenVRegAndVal)
     return false; // Leave it to the legalizer to lower it to a libcall.
-  unsigned KnownLen = LenVRegAndVal->Value.getZExtValue(); 
+  unsigned KnownLen = LenVRegAndVal->Value.getZExtValue();
 
   if (KnownLen == 0) {
     MI.eraseFromParent();
@@ -1481,78 +1481,78 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
   if (MaxLen && KnownLen > MaxLen)
     return false;
 
-  if (Opc == TargetOpcode::G_MEMCPY) 
+  if (Opc == TargetOpcode::G_MEMCPY)
     return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
-  if (Opc == TargetOpcode::G_MEMMOVE) 
+  if (Opc == TargetOpcode::G_MEMMOVE)
     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
-  if (Opc == TargetOpcode::G_MEMSET) 
+  if (Opc == TargetOpcode::G_MEMSET)
     return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
   return false;
 }
 
-static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy, 
-                                             const Register Op, 
-                                             const MachineRegisterInfo &MRI) { 
-  const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI); 
-  if (!MaybeCst) 
-    return None; 
- 
-  APFloat V = MaybeCst->getValueAPF(); 
-  switch (Opcode) { 
-  default: 
-    llvm_unreachable("Unexpected opcode!"); 
-  case TargetOpcode::G_FNEG: { 
-    V.changeSign(); 
-    return V; 
-  } 
-  case TargetOpcode::G_FABS: { 
-    V.clearSign(); 
-    return V; 
-  } 
-  case TargetOpcode::G_FPTRUNC: 
-    break; 
-  case TargetOpcode::G_FSQRT: { 
-    bool Unused; 
-    V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); 
-    V = APFloat(sqrt(V.convertToDouble())); 
-    break; 
-  } 
-  case TargetOpcode::G_FLOG2: { 
-    bool Unused; 
-    V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); 
-    V = APFloat(log2(V.convertToDouble())); 
-    break; 
-  } 
-  } 
-  // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise, 
-  // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`, 
-  // and `G_FLOG2` reach here. 
-  bool Unused; 
-  V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused); 
-  return V; 
-} 
- 
-bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, 
-                                                     Optional<APFloat> &Cst) { 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  Register SrcReg = MI.getOperand(1).getReg(); 
-  LLT DstTy = MRI.getType(DstReg); 
-  Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI); 
-  return Cst.hasValue(); 
-} 
- 
-bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, 
-                                                     Optional<APFloat> &Cst) { 
-  assert(Cst.hasValue() && "Optional is unexpectedly empty!"); 
-  Builder.setInstrAndDebugLoc(MI); 
-  MachineFunction &MF = Builder.getMF(); 
-  auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst); 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  Builder.buildFConstant(DstReg, *FPVal); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
+static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
+                                             const Register Op,
+                                             const MachineRegisterInfo &MRI) {
+  const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
+  if (!MaybeCst)
+    return None;
+
+  APFloat V = MaybeCst->getValueAPF();
+  switch (Opcode) {
+  default:
+    llvm_unreachable("Unexpected opcode!");
+  case TargetOpcode::G_FNEG: {
+    V.changeSign();
+    return V;
+  }
+  case TargetOpcode::G_FABS: {
+    V.clearSign();
+    return V;
+  }
+  case TargetOpcode::G_FPTRUNC:
+    break;
+  case TargetOpcode::G_FSQRT: {
+    bool Unused;
+    V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
+    V = APFloat(sqrt(V.convertToDouble()));
+    break;
+  }
+  case TargetOpcode::G_FLOG2: {
+    bool Unused;
+    V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
+    V = APFloat(log2(V.convertToDouble()));
+    break;
+  }
+  }
+  // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
+  // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
+  // and `G_FLOG2` reach here.
+  bool Unused;
+  V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
+  return V;
+}
+
+bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
+                                                     Optional<APFloat> &Cst) {
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
+  return Cst.hasValue();
+}
+
+bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
+                                                     Optional<APFloat> &Cst) {
+  assert(Cst.hasValue() && "Optional is unexpectedly empty!");
+  Builder.setInstrAndDebugLoc(MI);
+  MachineFunction &MF = Builder.getMF();
+  auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
+  Register DstReg = MI.getOperand(0).getReg();
+  Builder.buildFConstant(DstReg, *FPVal);
+  MI.eraseFromParent();
+  return true;
+}
+
 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
                                            PtrAddChain &MatchInfo) {
   // We're trying to match the following pattern:
@@ -1581,7 +1581,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
     return false;
 
   // Pass the combined immediate to the apply function.
-  MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue(); 
+  MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
   MatchInfo.Base = Base;
   return true;
 }
@@ -1599,211 +1599,211 @@ bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
   return true;
 }
 
-bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, 
-                                          RegisterImmPair &MatchInfo) { 
-  // We're trying to match the following pattern with any of 
-  // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions: 
-  //   %t1 = SHIFT %base, G_CONSTANT imm1 
-  //   %root = SHIFT %t1, G_CONSTANT imm2 
-  // --> 
-  //   %root = SHIFT %base, G_CONSTANT (imm1 + imm2) 
- 
-  unsigned Opcode = MI.getOpcode(); 
-  assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 
-          Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || 
-          Opcode == TargetOpcode::G_USHLSAT) && 
-         "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); 
- 
-  Register Shl2 = MI.getOperand(1).getReg(); 
-  Register Imm1 = MI.getOperand(2).getReg(); 
-  auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); 
-  if (!MaybeImmVal) 
-    return false; 
- 
-  MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2); 
-  if (Shl2Def->getOpcode() != Opcode) 
-    return false; 
- 
-  Register Base = Shl2Def->getOperand(1).getReg(); 
-  Register Imm2 = Shl2Def->getOperand(2).getReg(); 
-  auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); 
-  if (!MaybeImm2Val) 
-    return false; 
- 
-  // Pass the combined immediate to the apply function. 
-  MatchInfo.Imm = 
-      (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue(); 
-  MatchInfo.Reg = Base; 
- 
-  // There is no simple replacement for a saturating unsigned left shift that 
-  // exceeds the scalar size. 
-  if (Opcode == TargetOpcode::G_USHLSAT && 
-      MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits()) 
-    return false; 
- 
-  return true; 
-} 
- 
-bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI, 
-                                          RegisterImmPair &MatchInfo) { 
-  unsigned Opcode = MI.getOpcode(); 
-  assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 
-          Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || 
-          Opcode == TargetOpcode::G_USHLSAT) && 
-         "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); 
- 
-  Builder.setInstrAndDebugLoc(MI); 
-  LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 
-  unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits(); 
-  auto Imm = MatchInfo.Imm; 
- 
-  if (Imm >= ScalarSizeInBits) { 
-    // Any logical shift that exceeds scalar size will produce zero. 
-    if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) { 
-      Builder.buildConstant(MI.getOperand(0), 0); 
-      MI.eraseFromParent(); 
-      return true; 
-    } 
-    // Arithmetic shift and saturating signed left shift have no effect beyond 
-    // scalar size. 
-    Imm = ScalarSizeInBits - 1; 
-  } 
- 
-  LLT ImmTy = MRI.getType(MI.getOperand(2).getReg()); 
-  Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0); 
-  Observer.changingInstr(MI); 
-  MI.getOperand(1).setReg(MatchInfo.Reg); 
-  MI.getOperand(2).setReg(NewImm); 
-  Observer.changedInstr(MI); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, 
-                                              ShiftOfShiftedLogic &MatchInfo) { 
-  // We're trying to match the following pattern with any of 
-  // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination 
-  // with any of G_AND/G_OR/G_XOR logic instructions. 
-  //   %t1 = SHIFT %X, G_CONSTANT C0 
-  //   %t2 = LOGIC %t1, %Y 
-  //   %root = SHIFT %t2, G_CONSTANT C1 
-  // --> 
-  //   %t3 = SHIFT %X, G_CONSTANT (C0+C1) 
-  //   %t4 = SHIFT %Y, G_CONSTANT C1 
-  //   %root = LOGIC %t3, %t4 
-  unsigned ShiftOpcode = MI.getOpcode(); 
-  assert((ShiftOpcode == TargetOpcode::G_SHL || 
-          ShiftOpcode == TargetOpcode::G_ASHR || 
-          ShiftOpcode == TargetOpcode::G_LSHR || 
-          ShiftOpcode == TargetOpcode::G_USHLSAT || 
-          ShiftOpcode == TargetOpcode::G_SSHLSAT) && 
-         "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); 
- 
-  // Match a one-use bitwise logic op. 
-  Register LogicDest = MI.getOperand(1).getReg(); 
-  if (!MRI.hasOneNonDBGUse(LogicDest)) 
-    return false; 
- 
-  MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest); 
-  unsigned LogicOpcode = LogicMI->getOpcode(); 
-  if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR && 
-      LogicOpcode != TargetOpcode::G_XOR) 
-    return false; 
- 
-  // Find a matching one-use shift by constant. 
-  const Register C1 = MI.getOperand(2).getReg(); 
-  auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI); 
-  if (!MaybeImmVal) 
-    return false; 
- 
-  const uint64_t C1Val = MaybeImmVal->Value.getZExtValue(); 
- 
-  auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) { 
-    // Shift should match previous one and should be a one-use. 
-    if (MI->getOpcode() != ShiftOpcode || 
-        !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 
-      return false; 
- 
-    // Must be a constant. 
-    auto MaybeImmVal = 
-        getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); 
-    if (!MaybeImmVal) 
-      return false; 
- 
-    ShiftVal = MaybeImmVal->Value.getSExtValue(); 
-    return true; 
-  }; 
- 
-  // Logic ops are commutative, so check each operand for a match. 
-  Register LogicMIReg1 = LogicMI->getOperand(1).getReg(); 
-  MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1); 
-  Register LogicMIReg2 = LogicMI->getOperand(2).getReg(); 
-  MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2); 
-  uint64_t C0Val; 
- 
-  if (matchFirstShift(LogicMIOp1, C0Val)) { 
-    MatchInfo.LogicNonShiftReg = LogicMIReg2; 
-    MatchInfo.Shift2 = LogicMIOp1; 
-  } else if (matchFirstShift(LogicMIOp2, C0Val)) { 
-    MatchInfo.LogicNonShiftReg = LogicMIReg1; 
-    MatchInfo.Shift2 = LogicMIOp2; 
-  } else 
-    return false; 
- 
-  MatchInfo.ValSum = C0Val + C1Val; 
- 
-  // The fold is not valid if the sum of the shift values exceeds bitwidth. 
-  if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits()) 
-    return false; 
- 
-  MatchInfo.Logic = LogicMI; 
-  return true; 
-} 
- 
-bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, 
-                                              ShiftOfShiftedLogic &MatchInfo) { 
-  unsigned Opcode = MI.getOpcode(); 
-  assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 
-          Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT || 
-          Opcode == TargetOpcode::G_SSHLSAT) && 
-         "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); 
- 
-  LLT ShlType = MRI.getType(MI.getOperand(2).getReg()); 
-  LLT DestType = MRI.getType(MI.getOperand(0).getReg()); 
-  Builder.setInstrAndDebugLoc(MI); 
- 
-  Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0); 
- 
-  Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg(); 
-  Register Shift1 = 
-      Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0); 
- 
-  Register Shift2Const = MI.getOperand(2).getReg(); 
-  Register Shift2 = Builder 
-                        .buildInstr(Opcode, {DestType}, 
-                                    {MatchInfo.LogicNonShiftReg, Shift2Const}) 
-                        .getReg(0); 
- 
-  Register Dest = MI.getOperand(0).getReg(); 
-  Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2}); 
- 
-  // These were one use so it's safe to remove them. 
-  MatchInfo.Shift2->eraseFromParent(); 
-  MatchInfo.Logic->eraseFromParent(); 
- 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
+bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
+                                          RegisterImmPair &MatchInfo) {
+  // We're trying to match the following pattern with any of
+  // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
+  //   %t1 = SHIFT %base, G_CONSTANT imm1
+  //   %root = SHIFT %t1, G_CONSTANT imm2
+  // -->
+  //   %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
+
+  unsigned Opcode = MI.getOpcode();
+  assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+          Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
+          Opcode == TargetOpcode::G_USHLSAT) &&
+         "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
+
+  Register Shl2 = MI.getOperand(1).getReg();
+  Register Imm1 = MI.getOperand(2).getReg();
+  auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+  if (!MaybeImmVal)
+    return false;
+
+  MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
+  if (Shl2Def->getOpcode() != Opcode)
+    return false;
+
+  Register Base = Shl2Def->getOperand(1).getReg();
+  Register Imm2 = Shl2Def->getOperand(2).getReg();
+  auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+  if (!MaybeImm2Val)
+    return false;
+
+  // Pass the combined immediate to the apply function.
+  MatchInfo.Imm =
+      (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
+  MatchInfo.Reg = Base;
+
+  // There is no simple replacement for a saturating unsigned left shift that
+  // exceeds the scalar size.
+  if (Opcode == TargetOpcode::G_USHLSAT &&
+      MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
+    return false;
+
+  return true;
+}
+
+bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
+                                          RegisterImmPair &MatchInfo) {
+  unsigned Opcode = MI.getOpcode();
+  assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+          Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
+          Opcode == TargetOpcode::G_USHLSAT) &&
+         "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
+
+  Builder.setInstrAndDebugLoc(MI);
+  LLT Ty = MRI.getType(MI.getOperand(1).getReg());
+  unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
+  auto Imm = MatchInfo.Imm;
+
+  if (Imm >= ScalarSizeInBits) {
+    // Any logical shift that exceeds scalar size will produce zero.
+    if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
+      Builder.buildConstant(MI.getOperand(0), 0);
+      MI.eraseFromParent();
+      return true;
+    }
+    // Arithmetic shift and saturating signed left shift have no effect beyond
+    // scalar size.
+    Imm = ScalarSizeInBits - 1;
+  }
+
+  LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
+  Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
+  Observer.changingInstr(MI);
+  MI.getOperand(1).setReg(MatchInfo.Reg);
+  MI.getOperand(2).setReg(NewImm);
+  Observer.changedInstr(MI);
+  return true;
+}
+
+bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
+                                              ShiftOfShiftedLogic &MatchInfo) {
+  // We're trying to match the following pattern with any of
+  // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
+  // with any of G_AND/G_OR/G_XOR logic instructions.
+  //   %t1 = SHIFT %X, G_CONSTANT C0
+  //   %t2 = LOGIC %t1, %Y
+  //   %root = SHIFT %t2, G_CONSTANT C1
+  // -->
+  //   %t3 = SHIFT %X, G_CONSTANT (C0+C1)
+  //   %t4 = SHIFT %Y, G_CONSTANT C1
+  //   %root = LOGIC %t3, %t4
+  unsigned ShiftOpcode = MI.getOpcode();
+  assert((ShiftOpcode == TargetOpcode::G_SHL ||
+          ShiftOpcode == TargetOpcode::G_ASHR ||
+          ShiftOpcode == TargetOpcode::G_LSHR ||
+          ShiftOpcode == TargetOpcode::G_USHLSAT ||
+          ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
+         "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
+
+  // Match a one-use bitwise logic op.
+  Register LogicDest = MI.getOperand(1).getReg();
+  if (!MRI.hasOneNonDBGUse(LogicDest))
+    return false;
+
+  MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
+  unsigned LogicOpcode = LogicMI->getOpcode();
+  if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
+      LogicOpcode != TargetOpcode::G_XOR)
+    return false;
+
+  // Find a matching one-use shift by constant.
+  const Register C1 = MI.getOperand(2).getReg();
+  auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
+  if (!MaybeImmVal)
+    return false;
+
+  const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
+
+  auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
+    // Shift should match previous one and should be a one-use.
+    if (MI->getOpcode() != ShiftOpcode ||
+        !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
+      return false;
+
+    // Must be a constant.
+    auto MaybeImmVal =
+        getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+    if (!MaybeImmVal)
+      return false;
+
+    ShiftVal = MaybeImmVal->Value.getSExtValue();
+    return true;
+  };
+
+  // Logic ops are commutative, so check each operand for a match.
+  Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
+  MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
+  Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
+  MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
+  uint64_t C0Val;
+
+  if (matchFirstShift(LogicMIOp1, C0Val)) {
+    MatchInfo.LogicNonShiftReg = LogicMIReg2;
+    MatchInfo.Shift2 = LogicMIOp1;
+  } else if (matchFirstShift(LogicMIOp2, C0Val)) {
+    MatchInfo.LogicNonShiftReg = LogicMIReg1;
+    MatchInfo.Shift2 = LogicMIOp2;
+  } else
+    return false;
+
+  MatchInfo.ValSum = C0Val + C1Val;
+
+  // The fold is not valid if the sum of the shift values exceeds bitwidth.
+  if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
+    return false;
+
+  MatchInfo.Logic = LogicMI;
+  return true;
+}
+
+bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
+                                              ShiftOfShiftedLogic &MatchInfo) {
+  unsigned Opcode = MI.getOpcode();
+  assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+          Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
+          Opcode == TargetOpcode::G_SSHLSAT) &&
+         "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
+
+  LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
+  LLT DestType = MRI.getType(MI.getOperand(0).getReg());
+  Builder.setInstrAndDebugLoc(MI);
+
+  Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
+
+  Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
+  Register Shift1 =
+      Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
+
+  Register Shift2Const = MI.getOperand(2).getReg();
+  Register Shift2 = Builder
+                        .buildInstr(Opcode, {DestType},
+                                    {MatchInfo.LogicNonShiftReg, Shift2Const})
+                        .getReg(0);
+
+  Register Dest = MI.getOperand(0).getReg();
+  Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
+
+  // These were one use so it's safe to remove them.
+  MatchInfo.Shift2->eraseFromParent();
+  MatchInfo.Logic->eraseFromParent();
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
                                           unsigned &ShiftVal) {
   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
   auto MaybeImmVal =
       getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
-  if (!MaybeImmVal) 
+  if (!MaybeImmVal)
     return false;
- 
-  ShiftVal = MaybeImmVal->Value.exactLogBase2(); 
-  return (static_cast<int32_t>(ShiftVal) != -1); 
+
+  ShiftVal = MaybeImmVal->Value.exactLogBase2();
+  return (static_cast<int32_t>(ShiftVal) != -1);
 }
 
 bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
@@ -1819,254 +1819,254 @@ bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
   return true;
 }
 
-// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source 
-bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, 
-                                             RegisterImmPair &MatchData) { 
-  assert(MI.getOpcode() == TargetOpcode::G_SHL && KB); 
- 
-  Register LHS = MI.getOperand(1).getReg(); 
- 
-  Register ExtSrc; 
-  if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) && 
-      !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) && 
-      !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc)))) 
-    return false; 
- 
-  // TODO: Should handle vector splat. 
-  Register RHS = MI.getOperand(2).getReg(); 
-  auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI); 
-  if (!MaybeShiftAmtVal) 
-    return false; 
- 
-  if (LI) { 
-    LLT SrcTy = MRI.getType(ExtSrc); 
- 
-    // We only really care about the legality with the shifted value. We can 
-    // pick any type the constant shift amount, so ask the target what to 
-    // use. Otherwise we would have to guess and hope it is reported as legal. 
-    LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy); 
-    if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}})) 
-      return false; 
-  } 
- 
-  int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue(); 
-  MatchData.Reg = ExtSrc; 
-  MatchData.Imm = ShiftAmt; 
- 
-  unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes(); 
-  return MinLeadingZeros >= ShiftAmt; 
-} 
- 
-bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, 
-                                             const RegisterImmPair &MatchData) { 
-  Register ExtSrcReg = MatchData.Reg; 
-  int64_t ShiftAmtVal = MatchData.Imm; 
- 
-  LLT ExtSrcTy = MRI.getType(ExtSrcReg); 
-  Builder.setInstrAndDebugLoc(MI); 
-  auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal); 
-  auto NarrowShift = 
-      Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags()); 
-  Builder.buildZExt(MI.getOperand(0), NarrowShift); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-static Register peekThroughBitcast(Register Reg, 
-                                   const MachineRegisterInfo &MRI) { 
-  while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg)))) 
-    ; 
- 
-  return Reg; 
-} 
- 
-bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( 
-    MachineInstr &MI, SmallVectorImpl<Register> &Operands) { 
-  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 
-         "Expected an unmerge"); 
-  Register SrcReg = 
-      peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI); 
- 
-  MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); 
-  if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES && 
-      SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR && 
-      SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS) 
-    return false; 
- 
-  // Check the source type of the merge. 
-  LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg()); 
-  LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); 
-  bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits(); 
-  if (SrcMergeTy != Dst0Ty && !SameSize) 
-    return false; 
-  // They are the same now (modulo a bitcast). 
-  // We can collect all the src registers. 
-  for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx; 
-       ++Idx) 
-    Operands.push_back(SrcInstr->getOperand(Idx).getReg()); 
-  return true; 
-} 
- 
-bool CombinerHelper::applyCombineUnmergeMergeToPlainValues( 
-    MachineInstr &MI, SmallVectorImpl<Register> &Operands) { 
-  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 
-         "Expected an unmerge"); 
-  assert((MI.getNumOperands() - 1 == Operands.size()) && 
-         "Not enough operands to replace all defs"); 
-  unsigned NumElems = MI.getNumOperands() - 1; 
- 
-  LLT SrcTy = MRI.getType(Operands[0]); 
-  LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 
-  bool CanReuseInputDirectly = DstTy == SrcTy; 
-  Builder.setInstrAndDebugLoc(MI); 
-  for (unsigned Idx = 0; Idx < NumElems; ++Idx) { 
-    Register DstReg = MI.getOperand(Idx).getReg(); 
-    Register SrcReg = Operands[Idx]; 
-    if (CanReuseInputDirectly) 
-      replaceRegWith(MRI, DstReg, SrcReg); 
-    else 
-      Builder.buildCast(DstReg, SrcReg); 
-  } 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, 
-                                                 SmallVectorImpl<APInt> &Csts) { 
-  unsigned SrcIdx = MI.getNumOperands() - 1; 
-  Register SrcReg = MI.getOperand(SrcIdx).getReg(); 
-  MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); 
-  if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT && 
-      SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT) 
-    return false; 
-  // Break down the big constant in smaller ones. 
-  const MachineOperand &CstVal = SrcInstr->getOperand(1); 
-  APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT 
-                  ? CstVal.getCImm()->getValue() 
-                  : CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); 
- 
-  LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); 
-  unsigned ShiftAmt = Dst0Ty.getSizeInBits(); 
-  // Unmerge a constant. 
-  for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) { 
-    Csts.emplace_back(Val.trunc(ShiftAmt)); 
-    Val = Val.lshr(ShiftAmt); 
-  } 
- 
-  return true; 
-} 
- 
-bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, 
-                                                 SmallVectorImpl<APInt> &Csts) { 
-  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 
-         "Expected an unmerge"); 
-  assert((MI.getNumOperands() - 1 == Csts.size()) && 
-         "Not enough operands to replace all defs"); 
-  unsigned NumElems = MI.getNumOperands() - 1; 
-  Builder.setInstrAndDebugLoc(MI); 
-  for (unsigned Idx = 0; Idx < NumElems; ++Idx) { 
-    Register DstReg = MI.getOperand(Idx).getReg(); 
-    Builder.buildConstant(DstReg, Csts[Idx]); 
-  } 
- 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { 
-  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 
-         "Expected an unmerge"); 
-  // Check that all the lanes are dead except the first one. 
-  for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { 
-    if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg())) 
-      return false; 
-  } 
-  return true; 
-} 
- 
-bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { 
-  Builder.setInstrAndDebugLoc(MI); 
-  Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); 
-  // Truncating a vector is going to truncate every single lane, 
-  // whereas we want the full lowbits. 
-  // Do the operation on a scalar instead. 
-  LLT SrcTy = MRI.getType(SrcReg); 
-  if (SrcTy.isVector()) 
-    SrcReg = 
-        Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0); 
- 
-  Register Dst0Reg = MI.getOperand(0).getReg(); 
-  LLT Dst0Ty = MRI.getType(Dst0Reg); 
-  if (Dst0Ty.isVector()) { 
-    auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg); 
-    Builder.buildCast(Dst0Reg, MIB); 
-  } else 
-    Builder.buildTrunc(Dst0Reg, SrcReg); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) { 
-  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 
-         "Expected an unmerge"); 
-  Register Dst0Reg = MI.getOperand(0).getReg(); 
-  LLT Dst0Ty = MRI.getType(Dst0Reg); 
-  // G_ZEXT on vector applies to each lane, so it will 
-  // affect all destinations. Therefore we won't be able 
-  // to simplify the unmerge to just the first definition. 
-  if (Dst0Ty.isVector()) 
-    return false; 
-  Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); 
-  LLT SrcTy = MRI.getType(SrcReg); 
-  if (SrcTy.isVector()) 
-    return false; 
- 
-  Register ZExtSrcReg; 
-  if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg)))) 
-    return false; 
- 
-  // Finally we can replace the first definition with 
-  // a zext of the source if the definition is big enough to hold 
-  // all of ZExtSrc bits. 
-  LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); 
-  return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits(); 
-} 
- 
-bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { 
-  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 
-         "Expected an unmerge"); 
- 
-  Register Dst0Reg = MI.getOperand(0).getReg(); 
- 
-  MachineInstr *ZExtInstr = 
-      MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg()); 
-  assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT && 
-         "Expecting a G_ZEXT"); 
- 
-  Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg(); 
-  LLT Dst0Ty = MRI.getType(Dst0Reg); 
-  LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); 
- 
-  Builder.setInstrAndDebugLoc(MI); 
- 
-  if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) { 
-    Builder.buildZExt(Dst0Reg, ZExtSrcReg); 
-  } else { 
-    assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() && 
-           "ZExt src doesn't fit in destination"); 
-    replaceRegWith(MRI, Dst0Reg, ZExtSrcReg); 
-  } 
- 
-  Register ZeroReg; 
-  for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { 
-    if (!ZeroReg) 
-      ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0); 
-    replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg); 
-  } 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
+// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
+bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
+                                             RegisterImmPair &MatchData) {
+  assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
+
+  Register LHS = MI.getOperand(1).getReg();
+
+  Register ExtSrc;
+  if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
+      !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
+      !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
+    return false;
+
+  // TODO: Should handle vector splat.
+  Register RHS = MI.getOperand(2).getReg();
+  auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
+  if (!MaybeShiftAmtVal)
+    return false;
+
+  if (LI) {
+    LLT SrcTy = MRI.getType(ExtSrc);
+
+    // We only really care about the legality with the shifted value. We can
+    // pick any type the constant shift amount, so ask the target what to
+    // use. Otherwise we would have to guess and hope it is reported as legal.
+    LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
+    if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
+      return false;
+  }
+
+  int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
+  MatchData.Reg = ExtSrc;
+  MatchData.Imm = ShiftAmt;
+
+  unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
+  return MinLeadingZeros >= ShiftAmt;
+}
+
+bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
+                                             const RegisterImmPair &MatchData) {
+  Register ExtSrcReg = MatchData.Reg;
+  int64_t ShiftAmtVal = MatchData.Imm;
+
+  LLT ExtSrcTy = MRI.getType(ExtSrcReg);
+  Builder.setInstrAndDebugLoc(MI);
+  auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
+  auto NarrowShift =
+      Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
+  Builder.buildZExt(MI.getOperand(0), NarrowShift);
+  MI.eraseFromParent();
+  return true;
+}
+
+static Register peekThroughBitcast(Register Reg,
+                                   const MachineRegisterInfo &MRI) {
+  while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
+    ;
+
+  return Reg;
+}
+
+bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
+    MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
+  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+         "Expected an unmerge");
+  Register SrcReg =
+      peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
+
+  MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
+  if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
+      SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
+      SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
+    return false;
+
+  // Check the source type of the merge.
+  LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
+  LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+  bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
+  if (SrcMergeTy != Dst0Ty && !SameSize)
+    return false;
+  // They are the same now (modulo a bitcast).
+  // We can collect all the src registers.
+  for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
+       ++Idx)
+    Operands.push_back(SrcInstr->getOperand(Idx).getReg());
+  return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
+    MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
+  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+         "Expected an unmerge");
+  assert((MI.getNumOperands() - 1 == Operands.size()) &&
+         "Not enough operands to replace all defs");
+  unsigned NumElems = MI.getNumOperands() - 1;
+
+  LLT SrcTy = MRI.getType(Operands[0]);
+  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+  bool CanReuseInputDirectly = DstTy == SrcTy;
+  Builder.setInstrAndDebugLoc(MI);
+  for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+    Register DstReg = MI.getOperand(Idx).getReg();
+    Register SrcReg = Operands[Idx];
+    if (CanReuseInputDirectly)
+      replaceRegWith(MRI, DstReg, SrcReg);
+    else
+      Builder.buildCast(DstReg, SrcReg);
+  }
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
+                                                 SmallVectorImpl<APInt> &Csts) {
+  unsigned SrcIdx = MI.getNumOperands() - 1;
+  Register SrcReg = MI.getOperand(SrcIdx).getReg();
+  MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
+  if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
+      SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
+    return false;
+  // Break down the big constant in smaller ones.
+  const MachineOperand &CstVal = SrcInstr->getOperand(1);
+  APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
+                  ? CstVal.getCImm()->getValue()
+                  : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+
+  LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+  unsigned ShiftAmt = Dst0Ty.getSizeInBits();
+  // Unmerge a constant.
+  for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
+    Csts.emplace_back(Val.trunc(ShiftAmt));
+    Val = Val.lshr(ShiftAmt);
+  }
+
+  return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
+                                                 SmallVectorImpl<APInt> &Csts) {
+  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+         "Expected an unmerge");
+  assert((MI.getNumOperands() - 1 == Csts.size()) &&
+         "Not enough operands to replace all defs");
+  unsigned NumElems = MI.getNumOperands() - 1;
+  Builder.setInstrAndDebugLoc(MI);
+  for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+    Register DstReg = MI.getOperand(Idx).getReg();
+    Builder.buildConstant(DstReg, Csts[Idx]);
+  }
+
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+         "Expected an unmerge");
+  // Check that all the lanes are dead except the first one.
+  for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
+    if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
+      return false;
+  }
+  return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+  Builder.setInstrAndDebugLoc(MI);
+  Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
+  // Truncating a vector is going to truncate every single lane,
+  // whereas we want the full lowbits.
+  // Do the operation on a scalar instead.
+  LLT SrcTy = MRI.getType(SrcReg);
+  if (SrcTy.isVector())
+    SrcReg =
+        Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
+
+  Register Dst0Reg = MI.getOperand(0).getReg();
+  LLT Dst0Ty = MRI.getType(Dst0Reg);
+  if (Dst0Ty.isVector()) {
+    auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
+    Builder.buildCast(Dst0Reg, MIB);
+  } else
+    Builder.buildTrunc(Dst0Reg, SrcReg);
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+         "Expected an unmerge");
+  Register Dst0Reg = MI.getOperand(0).getReg();
+  LLT Dst0Ty = MRI.getType(Dst0Reg);
+  // G_ZEXT on vector applies to each lane, so it will
+  // affect all destinations. Therefore we won't be able
+  // to simplify the unmerge to just the first definition.
+  if (Dst0Ty.isVector())
+    return false;
+  Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
+  LLT SrcTy = MRI.getType(SrcReg);
+  if (SrcTy.isVector())
+    return false;
+
+  Register ZExtSrcReg;
+  if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
+    return false;
+
+  // Finally we can replace the first definition with
+  // a zext of the source if the definition is big enough to hold
+  // all of ZExtSrc bits.
+  LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
+  return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
+}
+
+bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+         "Expected an unmerge");
+
+  Register Dst0Reg = MI.getOperand(0).getReg();
+
+  MachineInstr *ZExtInstr =
+      MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
+  assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
+         "Expecting a G_ZEXT");
+
+  Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
+  LLT Dst0Ty = MRI.getType(Dst0Reg);
+  LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
+
+  Builder.setInstrAndDebugLoc(MI);
+
+  if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
+    Builder.buildZExt(Dst0Reg, ZExtSrcReg);
+  } else {
+    assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
+           "ZExt src doesn't fit in destination");
+    replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
+  }
+
+  Register ZeroReg;
+  for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
+    if (!ZeroReg)
+      ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
+    replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
+  }
+  MI.eraseFromParent();
+  return true;
+}
+
 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
                                                 unsigned TargetShiftSize,
                                                 unsigned &ShiftVal) {
@@ -2088,7 +2088,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
   if (!MaybeImmVal)
     return false;
 
-  ShiftVal = MaybeImmVal->Value.getSExtValue(); 
+  ShiftVal = MaybeImmVal->Value.getSExtValue();
   return ShiftVal >= Size / 2 && ShiftVal < Size;
 }
 
@@ -2177,296 +2177,296 @@ bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
   return false;
 }
 
-bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { 
-  assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  LLT DstTy = MRI.getType(DstReg); 
-  Register SrcReg = MI.getOperand(1).getReg(); 
-  return mi_match(SrcReg, MRI, 
-                  m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg)))); 
-} 
- 
-bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { 
-  assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  Builder.setInstr(MI); 
-  Builder.buildCopy(DstReg, Reg); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) { 
-  assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); 
-  Register SrcReg = MI.getOperand(1).getReg(); 
-  return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg))); 
-} 
- 
-bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { 
-  assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  Builder.setInstr(MI); 
-  Builder.buildZExtOrTrunc(DstReg, Reg); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchCombineAddP2IToPtrAdd( 
-    MachineInstr &MI, std::pair<Register, bool> &PtrReg) { 
-  assert(MI.getOpcode() == TargetOpcode::G_ADD); 
-  Register LHS = MI.getOperand(1).getReg(); 
-  Register RHS = MI.getOperand(2).getReg(); 
-  LLT IntTy = MRI.getType(LHS); 
- 
-  // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the 
-  // instruction. 
-  PtrReg.second = false; 
-  for (Register SrcReg : {LHS, RHS}) { 
-    if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) { 
-      // Don't handle cases where the integer is implicitly converted to the 
-      // pointer width. 
-      LLT PtrTy = MRI.getType(PtrReg.first); 
-      if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits()) 
-        return true; 
-    } 
- 
-    PtrReg.second = true; 
-  } 
- 
-  return false; 
-} 
- 
-bool CombinerHelper::applyCombineAddP2IToPtrAdd( 
-    MachineInstr &MI, std::pair<Register, bool> &PtrReg) { 
-  Register Dst = MI.getOperand(0).getReg(); 
-  Register LHS = MI.getOperand(1).getReg(); 
-  Register RHS = MI.getOperand(2).getReg(); 
- 
-  const bool DoCommute = PtrReg.second; 
-  if (DoCommute) 
-    std::swap(LHS, RHS); 
-  LHS = PtrReg.first; 
- 
-  LLT PtrTy = MRI.getType(LHS); 
- 
-  Builder.setInstrAndDebugLoc(MI); 
-  auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS); 
-  Builder.buildPtrToInt(Dst, PtrAdd); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, 
-                                                  int64_t &NewCst) { 
-  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); 
-  Register LHS = MI.getOperand(1).getReg(); 
-  Register RHS = MI.getOperand(2).getReg(); 
-  MachineRegisterInfo &MRI = Builder.getMF().getRegInfo(); 
- 
-  if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) { 
-    int64_t Cst; 
-    if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) { 
-      NewCst = Cst + *RHSCst; 
-      return true; 
-    } 
-  } 
- 
-  return false; 
-} 
- 
-bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, 
-                                                  int64_t &NewCst) { 
-  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); 
-  Register Dst = MI.getOperand(0).getReg(); 
- 
-  Builder.setInstrAndDebugLoc(MI); 
-  Builder.buildConstant(Dst, NewCst); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { 
-  assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  Register SrcReg = MI.getOperand(1).getReg(); 
-  LLT DstTy = MRI.getType(DstReg); 
-  return mi_match(SrcReg, MRI, 
-                  m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))); 
-} 
- 
-bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { 
-  assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  MI.eraseFromParent(); 
-  replaceRegWith(MRI, DstReg, Reg); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchCombineExtOfExt( 
-    MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 
-  assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || 
-          MI.getOpcode() == TargetOpcode::G_SEXT || 
-          MI.getOpcode() == TargetOpcode::G_ZEXT) && 
-         "Expected a G_[ASZ]EXT"); 
-  Register SrcReg = MI.getOperand(1).getReg(); 
-  MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 
-  // Match exts with the same opcode, anyext([sz]ext) and sext(zext). 
-  unsigned Opc = MI.getOpcode(); 
-  unsigned SrcOpc = SrcMI->getOpcode(); 
-  if (Opc == SrcOpc || 
-      (Opc == TargetOpcode::G_ANYEXT && 
-       (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) || 
-      (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) { 
-    MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc); 
-    return true; 
-  } 
-  return false; 
-} 
- 
-bool CombinerHelper::applyCombineExtOfExt( 
-    MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 
-  assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || 
-          MI.getOpcode() == TargetOpcode::G_SEXT || 
-          MI.getOpcode() == TargetOpcode::G_ZEXT) && 
-         "Expected a G_[ASZ]EXT"); 
- 
-  Register Reg = std::get<0>(MatchInfo); 
-  unsigned SrcExtOp = std::get<1>(MatchInfo); 
- 
-  // Combine exts with the same opcode. 
-  if (MI.getOpcode() == SrcExtOp) { 
-    Observer.changingInstr(MI); 
-    MI.getOperand(1).setReg(Reg); 
-    Observer.changedInstr(MI); 
-    return true; 
-  } 
- 
-  // Combine: 
-  // - anyext([sz]ext x) to [sz]ext x 
-  // - sext(zext x) to zext x 
-  if (MI.getOpcode() == TargetOpcode::G_ANYEXT || 
-      (MI.getOpcode() == TargetOpcode::G_SEXT && 
-       SrcExtOp == TargetOpcode::G_ZEXT)) { 
-    Register DstReg = MI.getOperand(0).getReg(); 
-    Builder.setInstrAndDebugLoc(MI); 
-    Builder.buildInstr(SrcExtOp, {DstReg}, {Reg}); 
-    MI.eraseFromParent(); 
-    return true; 
-  } 
- 
-  return false; 
-} 
- 
-bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { 
-  assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  Register SrcReg = MI.getOperand(1).getReg(); 
-  LLT DstTy = MRI.getType(DstReg); 
- 
-  Builder.setInstrAndDebugLoc(MI); 
-  Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg, 
-                   MI.getFlags()); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) { 
-  assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG"); 
-  Register SrcReg = MI.getOperand(1).getReg(); 
-  return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg))); 
-} 
- 
-bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { 
-  assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); 
-  Src = MI.getOperand(1).getReg(); 
-  Register AbsSrc; 
-  return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc))); 
-} 
- 
-bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { 
-  assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); 
-  Register Dst = MI.getOperand(0).getReg(); 
-  MI.eraseFromParent(); 
-  replaceRegWith(MRI, Dst, Src); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchCombineTruncOfExt( 
-    MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { 
-  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 
-  Register SrcReg = MI.getOperand(1).getReg(); 
-  MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 
-  unsigned SrcOpc = SrcMI->getOpcode(); 
-  if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT || 
-      SrcOpc == TargetOpcode::G_ZEXT) { 
-    MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc); 
-    return true; 
-  } 
-  return false; 
-} 
- 
-bool CombinerHelper::applyCombineTruncOfExt( 
-    MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { 
-  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 
-  Register SrcReg = MatchInfo.first; 
-  unsigned SrcExtOp = MatchInfo.second; 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  LLT SrcTy = MRI.getType(SrcReg); 
-  LLT DstTy = MRI.getType(DstReg); 
-  if (SrcTy == DstTy) { 
-    MI.eraseFromParent(); 
-    replaceRegWith(MRI, DstReg, SrcReg); 
-    return true; 
-  } 
-  Builder.setInstrAndDebugLoc(MI); 
-  if (SrcTy.getSizeInBits() < DstTy.getSizeInBits()) 
-    Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg}); 
-  else 
-    Builder.buildTrunc(DstReg, SrcReg); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchCombineTruncOfShl( 
-    MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 
-  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  Register SrcReg = MI.getOperand(1).getReg(); 
-  LLT DstTy = MRI.getType(DstReg); 
-  Register ShiftSrc; 
-  Register ShiftAmt; 
- 
-  if (MRI.hasOneNonDBGUse(SrcReg) && 
-      mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) && 
-      isLegalOrBeforeLegalizer( 
-          {TargetOpcode::G_SHL, 
-           {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) { 
-    KnownBits Known = KB->getKnownBits(ShiftAmt); 
-    unsigned Size = DstTy.getSizeInBits(); 
-    if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { 
-      MatchInfo = std::make_pair(ShiftSrc, ShiftAmt); 
-      return true; 
-    } 
-  } 
-  return false; 
-} 
- 
-bool CombinerHelper::applyCombineTruncOfShl( 
-    MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 
-  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  Register SrcReg = MI.getOperand(1).getReg(); 
-  LLT DstTy = MRI.getType(DstReg); 
-  MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 
- 
-  Register ShiftSrc = MatchInfo.first; 
-  Register ShiftAmt = MatchInfo.second; 
-  Builder.setInstrAndDebugLoc(MI); 
-  auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc); 
-  Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags()); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
+bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+  assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  Register SrcReg = MI.getOperand(1).getReg();
+  return mi_match(SrcReg, MRI,
+                  m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
+}
+
+bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+  assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
+  Register DstReg = MI.getOperand(0).getReg();
+  Builder.setInstr(MI);
+  Builder.buildCopy(DstReg, Reg);
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
+  assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
+  Register SrcReg = MI.getOperand(1).getReg();
+  return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
+}
+
+bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
+  assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
+  Register DstReg = MI.getOperand(0).getReg();
+  Builder.setInstr(MI);
+  Builder.buildZExtOrTrunc(DstReg, Reg);
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchCombineAddP2IToPtrAdd(
+    MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
+  assert(MI.getOpcode() == TargetOpcode::G_ADD);
+  Register LHS = MI.getOperand(1).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+  LLT IntTy = MRI.getType(LHS);
+
+  // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
+  // instruction.
+  PtrReg.second = false;
+  for (Register SrcReg : {LHS, RHS}) {
+    if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
+      // Don't handle cases where the integer is implicitly converted to the
+      // pointer width.
+      LLT PtrTy = MRI.getType(PtrReg.first);
+      if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
+        return true;
+    }
+
+    PtrReg.second = true;
+  }
+
+  return false;
+}
+
+bool CombinerHelper::applyCombineAddP2IToPtrAdd(
+    MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
+  Register Dst = MI.getOperand(0).getReg();
+  Register LHS = MI.getOperand(1).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+
+  const bool DoCommute = PtrReg.second;
+  if (DoCommute)
+    std::swap(LHS, RHS);
+  LHS = PtrReg.first;
+
+  LLT PtrTy = MRI.getType(LHS);
+
+  Builder.setInstrAndDebugLoc(MI);
+  auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
+  Builder.buildPtrToInt(Dst, PtrAdd);
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
+                                                  int64_t &NewCst) {
+  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
+  Register LHS = MI.getOperand(1).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+  MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
+
+  if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
+    int64_t Cst;
+    if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
+      NewCst = Cst + *RHSCst;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
+                                                  int64_t &NewCst) {
+  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
+  Register Dst = MI.getOperand(0).getReg();
+
+  Builder.setInstrAndDebugLoc(MI);
+  Builder.buildConstant(Dst, NewCst);
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
+  assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  return mi_match(SrcReg, MRI,
+                  m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
+}
+
+bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
+  assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
+  Register DstReg = MI.getOperand(0).getReg();
+  MI.eraseFromParent();
+  replaceRegWith(MRI, DstReg, Reg);
+  return true;
+}
+
+bool CombinerHelper::matchCombineExtOfExt(
+    MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+  assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+          MI.getOpcode() == TargetOpcode::G_SEXT ||
+          MI.getOpcode() == TargetOpcode::G_ZEXT) &&
+         "Expected a G_[ASZ]EXT");
+  Register SrcReg = MI.getOperand(1).getReg();
+  MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+  // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
+  unsigned Opc = MI.getOpcode();
+  unsigned SrcOpc = SrcMI->getOpcode();
+  if (Opc == SrcOpc ||
+      (Opc == TargetOpcode::G_ANYEXT &&
+       (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
+      (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
+    MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
+    return true;
+  }
+  return false;
+}
+
+bool CombinerHelper::applyCombineExtOfExt(
+    MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+  assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+          MI.getOpcode() == TargetOpcode::G_SEXT ||
+          MI.getOpcode() == TargetOpcode::G_ZEXT) &&
+         "Expected a G_[ASZ]EXT");
+
+  Register Reg = std::get<0>(MatchInfo);
+  unsigned SrcExtOp = std::get<1>(MatchInfo);
+
+  // Combine exts with the same opcode.
+  if (MI.getOpcode() == SrcExtOp) {
+    Observer.changingInstr(MI);
+    MI.getOperand(1).setReg(Reg);
+    Observer.changedInstr(MI);
+    return true;
+  }
+
+  // Combine:
+  // - anyext([sz]ext x) to [sz]ext x
+  // - sext(zext x) to zext x
+  if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+      (MI.getOpcode() == TargetOpcode::G_SEXT &&
+       SrcExtOp == TargetOpcode::G_ZEXT)) {
+    Register DstReg = MI.getOperand(0).getReg();
+    Builder.setInstrAndDebugLoc(MI);
+    Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
+    MI.eraseFromParent();
+    return true;
+  }
+
+  return false;
+}
+
+bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+
+  Builder.setInstrAndDebugLoc(MI);
+  Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
+                   MI.getFlags());
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
+  assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
+  Register SrcReg = MI.getOperand(1).getReg();
+  return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
+}
+
+bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
+  assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+  Src = MI.getOperand(1).getReg();
+  Register AbsSrc;
+  return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
+}
+
+bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
+  assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+  Register Dst = MI.getOperand(0).getReg();
+  MI.eraseFromParent();
+  replaceRegWith(MRI, Dst, Src);
+  return true;
+}
+
+bool CombinerHelper::matchCombineTruncOfExt(
+    MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+  Register SrcReg = MI.getOperand(1).getReg();
+  MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+  unsigned SrcOpc = SrcMI->getOpcode();
+  if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
+      SrcOpc == TargetOpcode::G_ZEXT) {
+    MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
+    return true;
+  }
+  return false;
+}
+
+bool CombinerHelper::applyCombineTruncOfExt(
+    MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+  Register SrcReg = MatchInfo.first;
+  unsigned SrcExtOp = MatchInfo.second;
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT SrcTy = MRI.getType(SrcReg);
+  LLT DstTy = MRI.getType(DstReg);
+  if (SrcTy == DstTy) {
+    MI.eraseFromParent();
+    replaceRegWith(MRI, DstReg, SrcReg);
+    return true;
+  }
+  Builder.setInstrAndDebugLoc(MI);
+  if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
+    Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
+  else
+    Builder.buildTrunc(DstReg, SrcReg);
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchCombineTruncOfShl(
+    MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  Register ShiftSrc;
+  Register ShiftAmt;
+
+  if (MRI.hasOneNonDBGUse(SrcReg) &&
+      mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
+      isLegalOrBeforeLegalizer(
+          {TargetOpcode::G_SHL,
+           {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
+    KnownBits Known = KB->getKnownBits(ShiftAmt);
+    unsigned Size = DstTy.getSizeInBits();
+    if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+      MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool CombinerHelper::applyCombineTruncOfShl(
+    MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+
+  Register ShiftSrc = MatchInfo.first;
+  Register ShiftAmt = MatchInfo.second;
+  Builder.setInstrAndDebugLoc(MI);
+  auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
+  Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
+  MI.eraseFromParent();
+  return true;
+}
+
 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
   return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
     return MO.isReg() &&
@@ -2493,22 +2493,22 @@ bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
                       MRI);
 }
 
-bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) { 
-  assert(MI.getOpcode() == TargetOpcode::G_SELECT); 
-  return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(), 
-                      MRI); 
-} 
- 
-bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { 
-  assert(MI.getOpcode() == TargetOpcode::G_SELECT); 
-  if (auto MaybeCstCmp = 
-          getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) { 
-    OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2; 
-    return true; 
-  } 
-  return false; 
-} 
- 
+bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+  return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
+                      MRI);
+}
+
+bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
+  assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+  if (auto MaybeCstCmp =
+          getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
+    OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
+    return true;
+  }
+  return false;
+}
+
 bool CombinerHelper::eraseInst(MachineInstr &MI) {
   MI.eraseFromParent();
   return true;
@@ -2605,16 +2605,16 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
   return true;
 }
 
-bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, 
-                                                 Register Replacement) { 
-  assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); 
-  Register OldReg = MI.getOperand(0).getReg(); 
-  assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); 
-  MI.eraseFromParent(); 
-  replaceRegWith(MRI, OldReg, Replacement); 
-  return true; 
-} 
- 
+bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
+                                                 Register Replacement) {
+  assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
+  Register OldReg = MI.getOperand(0).getReg();
+  assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
+  MI.eraseFromParent();
+  replaceRegWith(MRI, OldReg, Replacement);
+  return true;
+}
+
 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
   // Match (cond ? x : x)
@@ -2635,18 +2635,18 @@ bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
                        MRI);
 }
 
-bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) { 
-  MachineOperand &MO = MI.getOperand(OpIdx); 
-  return MO.isReg() && 
-         getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 
-} 
- 
-bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, 
-                                                        unsigned OpIdx) { 
-  MachineOperand &MO = MI.getOperand(OpIdx); 
-  return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB); 
-} 
- 
+bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
+  MachineOperand &MO = MI.getOperand(OpIdx);
+  return MO.isReg() &&
+         getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
+}
+
+bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
+                                                        unsigned OpIdx) {
+  MachineOperand &MO = MI.getOperand(OpIdx);
+  return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
+}
+
 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
   assert(MI.getNumDefs() == 1 && "Expected only one def?");
   Builder.setInstr(MI);
@@ -2682,7 +2682,7 @@ bool CombinerHelper::matchSimplifyAddToSub(
   // ((0-A) + B) -> B - A
   // (A + (0-B)) -> A - B
   auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
-    if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS)))) 
+    if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
       return false;
     NewLHS = MaybeNewLHS;
     return true;
@@ -2691,67 +2691,67 @@ bool CombinerHelper::matchSimplifyAddToSub(
   return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
 }
 
-bool CombinerHelper::matchCombineInsertVecElts( 
-    MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { 
-  assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT && 
-         "Invalid opcode"); 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  LLT DstTy = MRI.getType(DstReg); 
-  assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?"); 
-  unsigned NumElts = DstTy.getNumElements(); 
-  // If this MI is part of a sequence of insert_vec_elts, then 
-  // don't do the combine in the middle of the sequence. 
-  if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() == 
-                                   TargetOpcode::G_INSERT_VECTOR_ELT) 
-    return false; 
-  MachineInstr *CurrInst = &MI; 
-  MachineInstr *TmpInst; 
-  int64_t IntImm; 
-  Register TmpReg; 
-  MatchInfo.resize(NumElts); 
-  while (mi_match( 
-      CurrInst->getOperand(0).getReg(), MRI, 
-      m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) { 
-    if (IntImm >= NumElts) 
-      return false; 
-    if (!MatchInfo[IntImm]) 
-      MatchInfo[IntImm] = TmpReg; 
-    CurrInst = TmpInst; 
-  } 
-  // Variable index. 
-  if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) 
-    return false; 
-  if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) { 
-    for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) { 
-      if (!MatchInfo[I - 1].isValid()) 
-        MatchInfo[I - 1] = TmpInst->getOperand(I).getReg(); 
-    } 
-    return true; 
-  } 
-  // If we didn't end in a G_IMPLICIT_DEF, bail out. 
-  return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; 
-} 
- 
-bool CombinerHelper::applyCombineInsertVecElts( 
-    MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { 
-  Builder.setInstr(MI); 
-  Register UndefReg; 
-  auto GetUndef = [&]() { 
-    if (UndefReg) 
-      return UndefReg; 
-    LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 
-    UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0); 
-    return UndefReg; 
-  }; 
-  for (unsigned I = 0; I < MatchInfo.size(); ++I) { 
-    if (!MatchInfo[I]) 
-      MatchInfo[I] = GetUndef(); 
-  } 
-  Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
+bool CombinerHelper::matchCombineInsertVecElts(
+    MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
+         "Invalid opcode");
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
+  unsigned NumElts = DstTy.getNumElements();
+  // If this MI is part of a sequence of insert_vec_elts, then
+  // don't do the combine in the middle of the sequence.
+  if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
+                                   TargetOpcode::G_INSERT_VECTOR_ELT)
+    return false;
+  MachineInstr *CurrInst = &MI;
+  MachineInstr *TmpInst;
+  int64_t IntImm;
+  Register TmpReg;
+  MatchInfo.resize(NumElts);
+  while (mi_match(
+      CurrInst->getOperand(0).getReg(), MRI,
+      m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
+    if (IntImm >= NumElts)
+      return false;
+    if (!MatchInfo[IntImm])
+      MatchInfo[IntImm] = TmpReg;
+    CurrInst = TmpInst;
+  }
+  // Variable index.
+  if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
+    return false;
+  if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
+    for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
+      if (!MatchInfo[I - 1].isValid())
+        MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
+    }
+    return true;
+  }
+  // If we didn't end in a G_IMPLICIT_DEF, bail out.
+  return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
+}
+
+bool CombinerHelper::applyCombineInsertVecElts(
+    MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+  Builder.setInstr(MI);
+  Register UndefReg;
+  auto GetUndef = [&]() {
+    if (UndefReg)
+      return UndefReg;
+    LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+    UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
+    return UndefReg;
+  };
+  for (unsigned I = 0; I < MatchInfo.size(); ++I) {
+    if (!MatchInfo[I])
+      MatchInfo[I] = GetUndef();
+  }
+  Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
+  MI.eraseFromParent();
+  return true;
+}
+
 bool CombinerHelper::applySimplifyAddToSub(
     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
   Builder.setInstr(MI);
@@ -2762,812 +2762,812 @@ bool CombinerHelper::applySimplifyAddToSub(
   return true;
 }
 
-bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( 
-    MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { 
-  // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ... 
-  // 
-  // Creates the new hand + logic instruction (but does not insert them.) 
-  // 
-  // On success, MatchInfo is populated with the new instructions. These are 
-  // inserted in applyHoistLogicOpWithSameOpcodeHands. 
-  unsigned LogicOpcode = MI.getOpcode(); 
-  assert(LogicOpcode == TargetOpcode::G_AND || 
-         LogicOpcode == TargetOpcode::G_OR || 
-         LogicOpcode == TargetOpcode::G_XOR); 
-  MachineIRBuilder MIB(MI); 
-  Register Dst = MI.getOperand(0).getReg(); 
-  Register LHSReg = MI.getOperand(1).getReg(); 
-  Register RHSReg = MI.getOperand(2).getReg(); 
- 
-  // Don't recompute anything. 
-  if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg)) 
-    return false; 
- 
-  // Make sure we have (hand x, ...), (hand y, ...) 
-  MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI); 
-  MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI); 
-  if (!LeftHandInst || !RightHandInst) 
-    return false; 
-  unsigned HandOpcode = LeftHandInst->getOpcode(); 
-  if (HandOpcode != RightHandInst->getOpcode()) 
-    return false; 
-  if (!LeftHandInst->getOperand(1).isReg() || 
-      !RightHandInst->getOperand(1).isReg()) 
-    return false; 
- 
-  // Make sure the types match up, and if we're doing this post-legalization, 
-  // we end up with legal types. 
-  Register X = LeftHandInst->getOperand(1).getReg(); 
-  Register Y = RightHandInst->getOperand(1).getReg(); 
-  LLT XTy = MRI.getType(X); 
-  LLT YTy = MRI.getType(Y); 
-  if (XTy != YTy) 
-    return false; 
-  if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}})) 
-    return false; 
- 
-  // Optional extra source register. 
-  Register ExtraHandOpSrcReg; 
-  switch (HandOpcode) { 
-  default: 
-    return false; 
-  case TargetOpcode::G_ANYEXT: 
-  case TargetOpcode::G_SEXT: 
-  case TargetOpcode::G_ZEXT: { 
-    // Match: logic (ext X), (ext Y) --> ext (logic X, Y) 
-    break; 
-  } 
-  case TargetOpcode::G_AND: 
-  case TargetOpcode::G_ASHR: 
-  case TargetOpcode::G_LSHR: 
-  case TargetOpcode::G_SHL: { 
-    // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z 
-    MachineOperand &ZOp = LeftHandInst->getOperand(2); 
-    if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2))) 
-      return false; 
-    ExtraHandOpSrcReg = ZOp.getReg(); 
-    break; 
-  } 
-  } 
- 
-  // Record the steps to build the new instructions. 
-  // 
-  // Steps to build (logic x, y) 
-  auto NewLogicDst = MRI.createGenericVirtualRegister(XTy); 
-  OperandBuildSteps LogicBuildSteps = { 
-      [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); }, 
-      [=](MachineInstrBuilder &MIB) { MIB.addReg(X); }, 
-      [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }}; 
-  InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps); 
- 
-  // Steps to build hand (logic x, y), ...z 
-  OperandBuildSteps HandBuildSteps = { 
-      [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); }, 
-      [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }}; 
-  if (ExtraHandOpSrcReg.isValid()) 
-    HandBuildSteps.push_back( 
-        [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); }); 
-  InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps); 
- 
-  MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps}); 
-  return true; 
-} 
- 
-bool CombinerHelper::applyBuildInstructionSteps( 
-    MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { 
-  assert(MatchInfo.InstrsToBuild.size() && 
-         "Expected at least one instr to build?"); 
-  Builder.setInstr(MI); 
-  for (auto &InstrToBuild : MatchInfo.InstrsToBuild) { 
-    assert(InstrToBuild.Opcode && "Expected a valid opcode?"); 
-    assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?"); 
-    MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode); 
-    for (auto &OperandFn : InstrToBuild.OperandFns) 
-      OperandFn(Instr); 
-  } 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchAshrShlToSextInreg( 
-    MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { 
-  assert(MI.getOpcode() == TargetOpcode::G_ASHR); 
-  int64_t ShlCst, AshrCst; 
-  Register Src; 
-  // FIXME: detect splat constant vectors. 
-  if (!mi_match(MI.getOperand(0).getReg(), MRI, 
-                m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst)))) 
-    return false; 
-  if (ShlCst != AshrCst) 
-    return false; 
-  if (!isLegalOrBeforeLegalizer( 
-          {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}})) 
-    return false; 
-  MatchInfo = std::make_tuple(Src, ShlCst); 
-  return true; 
-} 
-bool CombinerHelper::applyAshShlToSextInreg( 
-    MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { 
-  assert(MI.getOpcode() == TargetOpcode::G_ASHR); 
-  Register Src; 
-  int64_t ShiftAmt; 
-  std::tie(Src, ShiftAmt) = MatchInfo; 
-  unsigned Size = MRI.getType(Src).getScalarSizeInBits(); 
-  Builder.setInstrAndDebugLoc(MI); 
-  Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, 
-                                       Register &Replacement) { 
-  // Given 
-  // 
-  // %y:_(sN) = G_SOMETHING 
-  // %x:_(sN) = G_SOMETHING 
-  // %res:_(sN) = G_AND %x, %y 
-  // 
-  // Eliminate the G_AND when it is known that x & y == x or x & y == y. 
-  // 
-  // Patterns like this can appear as a result of legalization. E.g. 
-  // 
-  // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y 
-  // %one:_(s32) = G_CONSTANT i32 1 
-  // %and:_(s32) = G_AND %cmp, %one 
-  // 
-  // In this case, G_ICMP only produces a single bit, so x & 1 == x. 
-  assert(MI.getOpcode() == TargetOpcode::G_AND); 
-  if (!KB) 
-    return false; 
- 
-  Register AndDst = MI.getOperand(0).getReg(); 
-  LLT DstTy = MRI.getType(AndDst); 
- 
-  // FIXME: This should be removed once GISelKnownBits supports vectors. 
-  if (DstTy.isVector()) 
-    return false; 
- 
-  Register LHS = MI.getOperand(1).getReg(); 
-  Register RHS = MI.getOperand(2).getReg(); 
-  KnownBits LHSBits = KB->getKnownBits(LHS); 
-  KnownBits RHSBits = KB->getKnownBits(RHS); 
- 
-  // Check that x & Mask == x. 
-  // x & 1 == x, always 
-  // x & 0 == x, only if x is also 0 
-  // Meaning Mask has no effect if every bit is either one in Mask or zero in x. 
-  // 
-  // Check if we can replace AndDst with the LHS of the G_AND 
-  if (canReplaceReg(AndDst, LHS, MRI) && 
-      (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { 
-    Replacement = LHS; 
-    return true; 
-  } 
- 
-  // Check if we can replace AndDst with the RHS of the G_AND 
-  if (canReplaceReg(AndDst, RHS, MRI) && 
-      (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { 
-    Replacement = RHS; 
-    return true; 
-  } 
- 
-  return false; 
-} 
- 
-bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) { 
-  // Given 
-  // 
-  // %y:_(sN) = G_SOMETHING 
-  // %x:_(sN) = G_SOMETHING 
-  // %res:_(sN) = G_OR %x, %y 
-  // 
-  // Eliminate the G_OR when it is known that x | y == x or x | y == y. 
-  assert(MI.getOpcode() == TargetOpcode::G_OR); 
-  if (!KB) 
-    return false; 
- 
-  Register OrDst = MI.getOperand(0).getReg(); 
-  LLT DstTy = MRI.getType(OrDst); 
- 
-  // FIXME: This should be removed once GISelKnownBits supports vectors. 
-  if (DstTy.isVector()) 
-    return false; 
- 
-  Register LHS = MI.getOperand(1).getReg(); 
-  Register RHS = MI.getOperand(2).getReg(); 
-  KnownBits LHSBits = KB->getKnownBits(LHS); 
-  KnownBits RHSBits = KB->getKnownBits(RHS); 
- 
-  // Check that x | Mask == x. 
-  // x | 0 == x, always 
-  // x | 1 == x, only if x is also 1 
-  // Meaning Mask has no effect if every bit is either zero in Mask or one in x. 
-  // 
-  // Check if we can replace OrDst with the LHS of the G_OR 
-  if (canReplaceReg(OrDst, LHS, MRI) && 
-      (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { 
-    Replacement = LHS; 
-    return true; 
-  } 
- 
-  // Check if we can replace OrDst with the RHS of the G_OR 
-  if (canReplaceReg(OrDst, RHS, MRI) && 
-      (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { 
-    Replacement = RHS; 
-    return true; 
-  } 
- 
-  return false; 
-} 
- 
-bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) { 
-  // If the input is already sign extended, just drop the extension. 
-  Register Src = MI.getOperand(1).getReg(); 
-  unsigned ExtBits = MI.getOperand(2).getImm(); 
-  unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits(); 
-  return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1); 
-} 
- 
-static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, 
-                             int64_t Cst, bool IsVector, bool IsFP) { 
-  // For i1, Cst will always be -1 regardless of boolean contents. 
-  return (ScalarSizeBits == 1 && Cst == -1) || 
-         isConstTrueVal(TLI, Cst, IsVector, IsFP); 
-} 
- 
-bool CombinerHelper::matchNotCmp(MachineInstr &MI, 
-                                 SmallVectorImpl<Register> &RegsToNegate) { 
-  assert(MI.getOpcode() == TargetOpcode::G_XOR); 
-  LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 
-  const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering(); 
-  Register XorSrc; 
-  Register CstReg; 
-  // We match xor(src, true) here. 
-  if (!mi_match(MI.getOperand(0).getReg(), MRI, 
-                m_GXor(m_Reg(XorSrc), m_Reg(CstReg)))) 
-    return false; 
- 
-  if (!MRI.hasOneNonDBGUse(XorSrc)) 
-    return false; 
- 
-  // Check that XorSrc is the root of a tree of comparisons combined with ANDs 
-  // and ORs. The suffix of RegsToNegate starting from index I is used a work 
-  // list of tree nodes to visit. 
-  RegsToNegate.push_back(XorSrc); 
-  // Remember whether the comparisons are all integer or all floating point. 
-  bool IsInt = false; 
-  bool IsFP = false; 
-  for (unsigned I = 0; I < RegsToNegate.size(); ++I) { 
-    Register Reg = RegsToNegate[I]; 
-    if (!MRI.hasOneNonDBGUse(Reg)) 
-      return false; 
-    MachineInstr *Def = MRI.getVRegDef(Reg); 
-    switch (Def->getOpcode()) { 
-    default: 
-      // Don't match if the tree contains anything other than ANDs, ORs and 
-      // comparisons. 
-      return false; 
-    case TargetOpcode::G_ICMP: 
-      if (IsFP) 
-        return false; 
-      IsInt = true; 
-      // When we apply the combine we will invert the predicate. 
-      break; 
-    case TargetOpcode::G_FCMP: 
-      if (IsInt) 
-        return false; 
-      IsFP = true; 
-      // When we apply the combine we will invert the predicate. 
-      break; 
-    case TargetOpcode::G_AND: 
-    case TargetOpcode::G_OR: 
-      // Implement De Morgan's laws: 
-      // ~(x & y) -> ~x | ~y 
-      // ~(x | y) -> ~x & ~y 
-      // When we apply the combine we will change the opcode and recursively 
-      // negate the operands. 
-      RegsToNegate.push_back(Def->getOperand(1).getReg()); 
-      RegsToNegate.push_back(Def->getOperand(2).getReg()); 
-      break; 
-    } 
-  } 
- 
-  // Now we know whether the comparisons are integer or floating point, check 
-  // the constant in the xor. 
-  int64_t Cst; 
-  if (Ty.isVector()) { 
-    MachineInstr *CstDef = MRI.getVRegDef(CstReg); 
-    auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI); 
-    if (!MaybeCst) 
-      return false; 
-    if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP)) 
-      return false; 
-  } else { 
-    if (!mi_match(CstReg, MRI, m_ICst(Cst))) 
-      return false; 
-    if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP)) 
-      return false; 
-  } 
- 
-  return true; 
-} 
- 
-bool CombinerHelper::applyNotCmp(MachineInstr &MI, 
-                                 SmallVectorImpl<Register> &RegsToNegate) { 
-  for (Register Reg : RegsToNegate) { 
-    MachineInstr *Def = MRI.getVRegDef(Reg); 
-    Observer.changingInstr(*Def); 
-    // For each comparison, invert the opcode. For each AND and OR, change the 
-    // opcode. 
-    switch (Def->getOpcode()) { 
-    default: 
-      llvm_unreachable("Unexpected opcode"); 
-    case TargetOpcode::G_ICMP: 
-    case TargetOpcode::G_FCMP: { 
-      MachineOperand &PredOp = Def->getOperand(1); 
-      CmpInst::Predicate NewP = CmpInst::getInversePredicate( 
-          (CmpInst::Predicate)PredOp.getPredicate()); 
-      PredOp.setPredicate(NewP); 
-      break; 
-    } 
-    case TargetOpcode::G_AND: 
-      Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR)); 
-      break; 
-    case TargetOpcode::G_OR: 
-      Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND)); 
-      break; 
-    } 
-    Observer.changedInstr(*Def); 
-  } 
- 
-  replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchXorOfAndWithSameReg( 
-    MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 
-  // Match (xor (and x, y), y) (or any of its commuted cases) 
-  assert(MI.getOpcode() == TargetOpcode::G_XOR); 
-  Register &X = MatchInfo.first; 
-  Register &Y = MatchInfo.second; 
-  Register AndReg = MI.getOperand(1).getReg(); 
-  Register SharedReg = MI.getOperand(2).getReg(); 
- 
-  // Find a G_AND on either side of the G_XOR. 
-  // Look for one of 
-  // 
-  // (xor (and x, y), SharedReg) 
-  // (xor SharedReg, (and x, y)) 
-  if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) { 
-    std::swap(AndReg, SharedReg); 
-    if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) 
-      return false; 
-  } 
- 
-  // Only do this if we'll eliminate the G_AND. 
-  if (!MRI.hasOneNonDBGUse(AndReg)) 
-    return false; 
- 
-  // We can combine if SharedReg is the same as either the LHS or RHS of the 
-  // G_AND. 
-  if (Y != SharedReg) 
-    std::swap(X, Y); 
-  return Y == SharedReg; 
-} 
- 
-bool CombinerHelper::applyXorOfAndWithSameReg( 
-    MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 
-  // Fold (xor (and x, y), y) -> (and (not x), y) 
-  Builder.setInstrAndDebugLoc(MI); 
-  Register X, Y; 
-  std::tie(X, Y) = MatchInfo; 
-  auto Not = Builder.buildNot(MRI.getType(X), X); 
-  Observer.changingInstr(MI); 
-  MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND)); 
-  MI.getOperand(1).setReg(Not->getOperand(0).getReg()); 
-  MI.getOperand(2).setReg(Y); 
-  Observer.changedInstr(MI); 
-  return true; 
-} 
- 
-bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  LLT Ty = MRI.getType(DstReg); 
-  const DataLayout &DL = Builder.getMF().getDataLayout(); 
- 
-  if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace())) 
-    return false; 
- 
-  if (Ty.isPointer()) { 
-    auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI); 
-    return ConstVal && *ConstVal == 0; 
-  } 
- 
-  assert(Ty.isVector() && "Expecting a vector type"); 
-  const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg()); 
-  return isBuildVectorAllZeros(*VecMI, MRI); 
-} 
- 
-bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) { 
-  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); 
-  Builder.setInstrAndDebugLoc(MI); 
-  Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2)); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-/// The second source operand is known to be a power of 2. 
-bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  Register Src0 = MI.getOperand(1).getReg(); 
-  Register Pow2Src1 = MI.getOperand(2).getReg(); 
-  LLT Ty = MRI.getType(DstReg); 
-  Builder.setInstrAndDebugLoc(MI); 
- 
-  // Fold (urem x, pow2) -> (and x, pow2-1) 
-  auto NegOne = Builder.buildConstant(Ty, -1); 
-  auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne); 
-  Builder.buildAnd(DstReg, Src0, Add); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
-Optional<SmallVector<Register, 8>> 
-CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { 
-  assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!"); 
-  // We want to detect if Root is part of a tree which represents a bunch 
-  // of loads being merged into a larger load. We'll try to recognize patterns 
-  // like, for example: 
-  // 
-  //  Reg   Reg 
-  //   \    / 
-  //    OR_1   Reg 
-  //     \    / 
-  //      OR_2 
-  //        \     Reg 
-  //         .. / 
-  //        Root 
-  // 
-  //  Reg   Reg   Reg   Reg 
-  //     \ /       \   / 
-  //     OR_1      OR_2 
-  //       \       / 
-  //        \    / 
-  //         ... 
-  //         Root 
-  // 
-  // Each "Reg" may have been produced by a load + some arithmetic. This 
-  // function will save each of them. 
-  SmallVector<Register, 8> RegsToVisit; 
-  SmallVector<const MachineInstr *, 7> Ors = {Root}; 
- 
-  // In the "worst" case, we're dealing with a load for each byte. So, there 
-  // are at most #bytes - 1 ORs. 
-  const unsigned MaxIter = 
-      MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1; 
-  for (unsigned Iter = 0; Iter < MaxIter; ++Iter) { 
-    if (Ors.empty()) 
-      break; 
-    const MachineInstr *Curr = Ors.pop_back_val(); 
-    Register OrLHS = Curr->getOperand(1).getReg(); 
-    Register OrRHS = Curr->getOperand(2).getReg(); 
- 
-    // In the combine, we want to elimate the entire tree. 
-    if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS)) 
-      return None; 
- 
-    // If it's a G_OR, save it and continue to walk. If it's not, then it's 
-    // something that may be a load + arithmetic. 
-    if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI)) 
-      Ors.push_back(Or); 
-    else 
-      RegsToVisit.push_back(OrLHS); 
-    if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI)) 
-      Ors.push_back(Or); 
-    else 
-      RegsToVisit.push_back(OrRHS); 
-  } 
- 
-  // We're going to try and merge each register into a wider power-of-2 type, 
-  // so we ought to have an even number of registers. 
-  if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0) 
-    return None; 
-  return RegsToVisit; 
-} 
- 
-/// Helper function for findLoadOffsetsForLoadOrCombine. 
-/// 
-/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value, 
-/// and then moving that value into a specific byte offset. 
-/// 
-/// e.g. x[i] << 24 
-/// 
-/// \returns The load instruction and the byte offset it is moved into. 
-static Optional<std::pair<MachineInstr *, int64_t>> 
-matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, 
-                         const MachineRegisterInfo &MRI) { 
-  assert(MRI.hasOneNonDBGUse(Reg) && 
-         "Expected Reg to only have one non-debug use?"); 
-  Register MaybeLoad; 
-  int64_t Shift; 
-  if (!mi_match(Reg, MRI, 
-                m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) { 
-    Shift = 0; 
-    MaybeLoad = Reg; 
-  } 
- 
-  if (Shift % MemSizeInBits != 0) 
-    return None; 
- 
-  // TODO: Handle other types of loads. 
-  auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI); 
-  if (!Load) 
-    return None; 
- 
-  const auto &MMO = **Load->memoperands_begin(); 
-  if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits) 
-    return None; 
- 
-  return std::make_pair(Load, Shift / MemSizeInBits); 
-} 
- 
-Optional<std::pair<MachineInstr *, int64_t>> 
-CombinerHelper::findLoadOffsetsForLoadOrCombine( 
-    SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, 
-    const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) { 
- 
-  // Each load found for the pattern. There should be one for each RegsToVisit. 
-  SmallSetVector<const MachineInstr *, 8> Loads; 
- 
-  // The lowest index used in any load. (The lowest "i" for each x[i].) 
-  int64_t LowestIdx = INT64_MAX; 
- 
-  // The load which uses the lowest index. 
-  MachineInstr *LowestIdxLoad = nullptr; 
- 
-  // Keeps track of the load indices we see. We shouldn't see any indices twice. 
-  SmallSet<int64_t, 8> SeenIdx; 
- 
-  // Ensure each load is in the same MBB. 
-  // TODO: Support multiple MachineBasicBlocks. 
-  MachineBasicBlock *MBB = nullptr; 
-  const MachineMemOperand *MMO = nullptr; 
- 
-  // Earliest instruction-order load in the pattern. 
-  MachineInstr *EarliestLoad = nullptr; 
- 
-  // Latest instruction-order load in the pattern. 
-  MachineInstr *LatestLoad = nullptr; 
- 
-  // Base pointer which every load should share. 
-  Register BasePtr; 
- 
-  // We want to find a load for each register. Each load should have some 
-  // appropriate bit twiddling arithmetic. During this loop, we will also keep 
-  // track of the load which uses the lowest index. Later, we will check if we 
-  // can use its pointer in the final, combined load. 
-  for (auto Reg : RegsToVisit) { 
-    // Find the load, and find the position that it will end up in (e.g. a 
-    // shifted) value. 
-    auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI); 
-    if (!LoadAndPos) 
-      return None; 
-    MachineInstr *Load; 
-    int64_t DstPos; 
-    std::tie(Load, DstPos) = *LoadAndPos; 
- 
-    // TODO: Handle multiple MachineBasicBlocks. Currently not handled because 
-    // it is difficult to check for stores/calls/etc between loads. 
-    MachineBasicBlock *LoadMBB = Load->getParent(); 
-    if (!MBB) 
-      MBB = LoadMBB; 
-    if (LoadMBB != MBB) 
-      return None; 
- 
-    // Make sure that the MachineMemOperands of every seen load are compatible. 
-    const MachineMemOperand *LoadMMO = *Load->memoperands_begin(); 
-    if (!MMO) 
-      MMO = LoadMMO; 
-    if (MMO->getAddrSpace() != LoadMMO->getAddrSpace()) 
-      return None; 
- 
-    // Find out what the base pointer and index for the load is. 
-    Register LoadPtr; 
-    int64_t Idx; 
-    if (!mi_match(Load->getOperand(1).getReg(), MRI, 
-                  m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) { 
-      LoadPtr = Load->getOperand(1).getReg(); 
-      Idx = 0; 
-    } 
- 
-    // Don't combine things like a[i], a[i] -> a bigger load. 
-    if (!SeenIdx.insert(Idx).second) 
-      return None; 
- 
-    // Every load must share the same base pointer; don't combine things like: 
-    // 
-    // a[i], b[i + 1] -> a bigger load. 
-    if (!BasePtr.isValid()) 
-      BasePtr = LoadPtr; 
-    if (BasePtr != LoadPtr) 
-      return None; 
- 
-    if (Idx < LowestIdx) { 
-      LowestIdx = Idx; 
-      LowestIdxLoad = Load; 
-    } 
- 
-    // Keep track of the byte offset that this load ends up at. If we have seen 
-    // the byte offset, then stop here. We do not want to combine: 
-    // 
-    // a[i] << 16, a[i + k] << 16 -> a bigger load. 
-    if (!MemOffset2Idx.try_emplace(DstPos, Idx).second) 
-      return None; 
-    Loads.insert(Load); 
- 
-    // Keep track of the position of the earliest/latest loads in the pattern. 
-    // We will check that there are no load fold barriers between them later 
-    // on. 
-    // 
-    // FIXME: Is there a better way to check for load fold barriers? 
-    if (!EarliestLoad || dominates(*Load, *EarliestLoad)) 
-      EarliestLoad = Load; 
-    if (!LatestLoad || dominates(*LatestLoad, *Load)) 
-      LatestLoad = Load; 
-  } 
- 
-  // We found a load for each register. Let's check if each load satisfies the 
-  // pattern. 
-  assert(Loads.size() == RegsToVisit.size() && 
-         "Expected to find a load for each register?"); 
-  assert(EarliestLoad != LatestLoad && EarliestLoad && 
-         LatestLoad && "Expected at least two loads?"); 
- 
-  // Check if there are any stores, calls, etc. between any of the loads. If 
-  // there are, then we can't safely perform the combine. 
-  // 
-  // MaxIter is chosen based off the (worst case) number of iterations it 
-  // typically takes to succeed in the LLVM test suite plus some padding. 
-  // 
-  // FIXME: Is there a better way to check for load fold barriers? 
-  const unsigned MaxIter = 20; 
-  unsigned Iter = 0; 
-  for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(), 
-                                                 LatestLoad->getIterator())) { 
-    if (Loads.count(&MI)) 
-      continue; 
-    if (MI.isLoadFoldBarrier()) 
-      return None; 
-    if (Iter++ == MaxIter) 
-      return None; 
-  } 
- 
-  return std::make_pair(LowestIdxLoad, LowestIdx); 
-} 
- 
-bool CombinerHelper::matchLoadOrCombine( 
-    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 
-  assert(MI.getOpcode() == TargetOpcode::G_OR); 
-  MachineFunction &MF = *MI.getMF(); 
-  // Assuming a little-endian target, transform: 
-  //  s8 *a = ... 
-  //  s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) 
-  // => 
-  //  s32 val = *((i32)a) 
-  // 
-  //  s8 *a = ... 
-  //  s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] 
-  // => 
-  //  s32 val = BSWAP(*((s32)a)) 
-  Register Dst = MI.getOperand(0).getReg(); 
-  LLT Ty = MRI.getType(Dst); 
-  if (Ty.isVector()) 
-    return false; 
- 
-  // We need to combine at least two loads into this type. Since the smallest 
-  // possible load is into a byte, we need at least a 16-bit wide type. 
-  const unsigned WideMemSizeInBits = Ty.getSizeInBits(); 
-  if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0) 
-    return false; 
- 
-  // Match a collection of non-OR instructions in the pattern. 
-  auto RegsToVisit = findCandidatesForLoadOrCombine(&MI); 
-  if (!RegsToVisit) 
-    return false; 
- 
-  // We have a collection of non-OR instructions. Figure out how wide each of 
-  // the small loads should be based off of the number of potential loads we 
-  // found. 
-  const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size(); 
-  if (NarrowMemSizeInBits % 8 != 0) 
-    return false; 
- 
-  // Check if each register feeding into each OR is a load from the same 
-  // base pointer + some arithmetic. 
-  // 
-  // e.g. a[0], a[1] << 8, a[2] << 16, etc. 
-  // 
-  // Also verify that each of these ends up putting a[i] into the same memory 
-  // offset as a load into a wide type would. 
-  SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx; 
-  MachineInstr *LowestIdxLoad; 
-  int64_t LowestIdx; 
-  auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine( 
-      MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits); 
-  if (!MaybeLoadInfo) 
-    return false; 
-  std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo; 
- 
-  // We have a bunch of loads being OR'd together. Using the addresses + offsets 
-  // we found before, check if this corresponds to a big or little endian byte 
-  // pattern. If it does, then we can represent it using a load + possibly a 
-  // BSWAP. 
-  bool IsBigEndianTarget = MF.getDataLayout().isBigEndian(); 
-  Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx); 
-  if (!IsBigEndian.hasValue()) 
-    return false; 
-  bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian; 
-  if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}})) 
-    return false; 
- 
-  // Make sure that the load from the lowest index produces offset 0 in the 
-  // final value. 
-  // 
-  // This ensures that we won't combine something like this: 
-  // 
-  // load x[i] -> byte 2 
-  // load x[i+1] -> byte 0 ---> wide_load x[i] 
-  // load x[i+2] -> byte 1 
-  const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits; 
-  const unsigned ZeroByteOffset = 
-      *IsBigEndian 
-          ? bigEndianByteAt(NumLoadsInTy, 0) 
-          : littleEndianByteAt(NumLoadsInTy, 0); 
-  auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset); 
-  if (ZeroOffsetIdx == MemOffset2Idx.end() || 
-      ZeroOffsetIdx->second != LowestIdx) 
-    return false; 
- 
-  // We wil reuse the pointer from the load which ends up at byte offset 0. It 
-  // may not use index 0. 
-  Register Ptr = LowestIdxLoad->getOperand(1).getReg(); 
-  const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin(); 
-  LegalityQuery::MemDesc MMDesc; 
-  MMDesc.SizeInBits = WideMemSizeInBits; 
-  MMDesc.AlignInBits = MMO.getAlign().value() * 8; 
-  MMDesc.Ordering = MMO.getOrdering(); 
-  if (!isLegalOrBeforeLegalizer( 
-          {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}})) 
-    return false; 
-  auto PtrInfo = MMO.getPointerInfo(); 
-  auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8); 
- 
-  // Load must be allowed and fast on the target. 
-  LLVMContext &C = MF.getFunction().getContext(); 
-  auto &DL = MF.getDataLayout(); 
-  bool Fast = false; 
-  if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) || 
-      !Fast) 
-    return false; 
- 
-  MatchInfo = [=](MachineIRBuilder &MIB) { 
-    Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst; 
-    MIB.buildLoad(LoadDst, Ptr, *NewMMO); 
-    if (NeedsBSwap) 
-      MIB.buildBSwap(Dst, LoadDst); 
-  }; 
-  return true; 
-} 
- 
-bool CombinerHelper::applyLoadOrCombine( 
-    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 
-  Builder.setInstrAndDebugLoc(MI); 
-  MatchInfo(Builder); 
-  MI.eraseFromParent(); 
-  return true; 
-} 
- 
+bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
+    MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
+  // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
+  //
+  // Creates the new hand + logic instruction (but does not insert them.)
+  //
+  // On success, MatchInfo is populated with the new instructions. These are
+  // inserted in applyHoistLogicOpWithSameOpcodeHands.
+  unsigned LogicOpcode = MI.getOpcode();
+  assert(LogicOpcode == TargetOpcode::G_AND ||
+         LogicOpcode == TargetOpcode::G_OR ||
+         LogicOpcode == TargetOpcode::G_XOR);
+  MachineIRBuilder MIB(MI);
+  Register Dst = MI.getOperand(0).getReg();
+  Register LHSReg = MI.getOperand(1).getReg();
+  Register RHSReg = MI.getOperand(2).getReg();
+
+  // Don't recompute anything.
+  if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
+    return false;
+
+  // Make sure we have (hand x, ...), (hand y, ...)
+  MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
+  MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
+  if (!LeftHandInst || !RightHandInst)
+    return false;
+  unsigned HandOpcode = LeftHandInst->getOpcode();
+  if (HandOpcode != RightHandInst->getOpcode())
+    return false;
+  if (!LeftHandInst->getOperand(1).isReg() ||
+      !RightHandInst->getOperand(1).isReg())
+    return false;
+
+  // Make sure the types match up, and if we're doing this post-legalization,
+  // we end up with legal types.
+  Register X = LeftHandInst->getOperand(1).getReg();
+  Register Y = RightHandInst->getOperand(1).getReg();
+  LLT XTy = MRI.getType(X);
+  LLT YTy = MRI.getType(Y);
+  if (XTy != YTy)
+    return false;
+  if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+    return false;
+
+  // Optional extra source register.
+  Register ExtraHandOpSrcReg;
+  switch (HandOpcode) {
+  default:
+    return false;
+  case TargetOpcode::G_ANYEXT:
+  case TargetOpcode::G_SEXT:
+  case TargetOpcode::G_ZEXT: {
+    // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
+    break;
+  }
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_ASHR:
+  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_SHL: {
+    // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
+    MachineOperand &ZOp = LeftHandInst->getOperand(2);
+    if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
+      return false;
+    ExtraHandOpSrcReg = ZOp.getReg();
+    break;
+  }
+  }
+
+  // Record the steps to build the new instructions.
+  //
+  // Steps to build (logic x, y)
+  auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
+  OperandBuildSteps LogicBuildSteps = {
+      [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
+  InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
+
+  // Steps to build hand (logic x, y), ...z
+  OperandBuildSteps HandBuildSteps = {
+      [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
+  if (ExtraHandOpSrcReg.isValid())
+    HandBuildSteps.push_back(
+        [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
+  InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
+
+  MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
+  return true;
+}
+
+bool CombinerHelper::applyBuildInstructionSteps(
+    MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
+  assert(MatchInfo.InstrsToBuild.size() &&
+         "Expected at least one instr to build?");
+  Builder.setInstr(MI);
+  for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
+    assert(InstrToBuild.Opcode && "Expected a valid opcode?");
+    assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
+    MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
+    for (auto &OperandFn : InstrToBuild.OperandFns)
+      OperandFn(Instr);
+  }
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchAshrShlToSextInreg(
+    MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+  int64_t ShlCst, AshrCst;
+  Register Src;
+  // FIXME: detect splat constant vectors.
+  if (!mi_match(MI.getOperand(0).getReg(), MRI,
+                m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
+    return false;
+  if (ShlCst != AshrCst)
+    return false;
+  if (!isLegalOrBeforeLegalizer(
+          {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
+    return false;
+  MatchInfo = std::make_tuple(Src, ShlCst);
+  return true;
+}
+bool CombinerHelper::applyAshShlToSextInreg(
+    MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+  Register Src;
+  int64_t ShiftAmt;
+  std::tie(Src, ShiftAmt) = MatchInfo;
+  unsigned Size = MRI.getType(Src).getScalarSizeInBits();
+  Builder.setInstrAndDebugLoc(MI);
+  Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
+                                       Register &Replacement) {
+  // Given
+  //
+  // %y:_(sN) = G_SOMETHING
+  // %x:_(sN) = G_SOMETHING
+  // %res:_(sN) = G_AND %x, %y
+  //
+  // Eliminate the G_AND when it is known that x & y == x or x & y == y.
+  //
+  // Patterns like this can appear as a result of legalization. E.g.
+  //
+  // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
+  // %one:_(s32) = G_CONSTANT i32 1
+  // %and:_(s32) = G_AND %cmp, %one
+  //
+  // In this case, G_ICMP only produces a single bit, so x & 1 == x.
+  assert(MI.getOpcode() == TargetOpcode::G_AND);
+  if (!KB)
+    return false;
+
+  Register AndDst = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(AndDst);
+
+  // FIXME: This should be removed once GISelKnownBits supports vectors.
+  if (DstTy.isVector())
+    return false;
+
+  Register LHS = MI.getOperand(1).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+  KnownBits LHSBits = KB->getKnownBits(LHS);
+  KnownBits RHSBits = KB->getKnownBits(RHS);
+
+  // Check that x & Mask == x.
+  // x & 1 == x, always
+  // x & 0 == x, only if x is also 0
+  // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
+  //
+  // Check if we can replace AndDst with the LHS of the G_AND
+  if (canReplaceReg(AndDst, LHS, MRI) &&
+      (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+    Replacement = LHS;
+    return true;
+  }
+
+  // Check if we can replace AndDst with the RHS of the G_AND
+  if (canReplaceReg(AndDst, RHS, MRI) &&
+      (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+    Replacement = RHS;
+    return true;
+  }
+
+  return false;
+}
+
+bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
+  // Given
+  //
+  // %y:_(sN) = G_SOMETHING
+  // %x:_(sN) = G_SOMETHING
+  // %res:_(sN) = G_OR %x, %y
+  //
+  // Eliminate the G_OR when it is known that x | y == x or x | y == y.
+  assert(MI.getOpcode() == TargetOpcode::G_OR);
+  if (!KB)
+    return false;
+
+  Register OrDst = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(OrDst);
+
+  // FIXME: This should be removed once GISelKnownBits supports vectors.
+  if (DstTy.isVector())
+    return false;
+
+  Register LHS = MI.getOperand(1).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+  KnownBits LHSBits = KB->getKnownBits(LHS);
+  KnownBits RHSBits = KB->getKnownBits(RHS);
+
+  // Check that x | Mask == x.
+  // x | 0 == x, always
+  // x | 1 == x, only if x is also 1
+  // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
+  //
+  // Check if we can replace OrDst with the LHS of the G_OR
+  if (canReplaceReg(OrDst, LHS, MRI) &&
+      (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+    Replacement = LHS;
+    return true;
+  }
+
+  // Check if we can replace OrDst with the RHS of the G_OR
+  if (canReplaceReg(OrDst, RHS, MRI) &&
+      (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+    Replacement = RHS;
+    return true;
+  }
+
+  return false;
+}
+
+bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
+  // If the input is already sign extended, just drop the extension.
+  Register Src = MI.getOperand(1).getReg();
+  unsigned ExtBits = MI.getOperand(2).getImm();
+  unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
+  return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
+}
+
+static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
+                             int64_t Cst, bool IsVector, bool IsFP) {
+  // For i1, Cst will always be -1 regardless of boolean contents.
+  return (ScalarSizeBits == 1 && Cst == -1) ||
+         isConstTrueVal(TLI, Cst, IsVector, IsFP);
+}
+
+bool CombinerHelper::matchNotCmp(MachineInstr &MI,
+                                 SmallVectorImpl<Register> &RegsToNegate) {
+  assert(MI.getOpcode() == TargetOpcode::G_XOR);
+  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+  const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
+  Register XorSrc;
+  Register CstReg;
+  // We match xor(src, true) here.
+  if (!mi_match(MI.getOperand(0).getReg(), MRI,
+                m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
+    return false;
+
+  if (!MRI.hasOneNonDBGUse(XorSrc))
+    return false;
+
+  // Check that XorSrc is the root of a tree of comparisons combined with ANDs
+  // and ORs. The suffix of RegsToNegate starting from index I is used a work
+  // list of tree nodes to visit.
+  RegsToNegate.push_back(XorSrc);
+  // Remember whether the comparisons are all integer or all floating point.
+  bool IsInt = false;
+  bool IsFP = false;
+  for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
+    Register Reg = RegsToNegate[I];
+    if (!MRI.hasOneNonDBGUse(Reg))
+      return false;
+    MachineInstr *Def = MRI.getVRegDef(Reg);
+    switch (Def->getOpcode()) {
+    default:
+      // Don't match if the tree contains anything other than ANDs, ORs and
+      // comparisons.
+      return false;
+    case TargetOpcode::G_ICMP:
+      if (IsFP)
+        return false;
+      IsInt = true;
+      // When we apply the combine we will invert the predicate.
+      break;
+    case TargetOpcode::G_FCMP:
+      if (IsInt)
+        return false;
+      IsFP = true;
+      // When we apply the combine we will invert the predicate.
+      break;
+    case TargetOpcode::G_AND:
+    case TargetOpcode::G_OR:
+      // Implement De Morgan's laws:
+      // ~(x & y) -> ~x | ~y
+      // ~(x | y) -> ~x & ~y
+      // When we apply the combine we will change the opcode and recursively
+      // negate the operands.
+      RegsToNegate.push_back(Def->getOperand(1).getReg());
+      RegsToNegate.push_back(Def->getOperand(2).getReg());
+      break;
+    }
+  }
+
+  // Now we know whether the comparisons are integer or floating point, check
+  // the constant in the xor.
+  int64_t Cst;
+  if (Ty.isVector()) {
+    MachineInstr *CstDef = MRI.getVRegDef(CstReg);
+    auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
+    if (!MaybeCst)
+      return false;
+    if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
+      return false;
+  } else {
+    if (!mi_match(CstReg, MRI, m_ICst(Cst)))
+      return false;
+    if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
+      return false;
+  }
+
+  return true;
+}
+
+bool CombinerHelper::applyNotCmp(MachineInstr &MI,
+                                 SmallVectorImpl<Register> &RegsToNegate) {
+  for (Register Reg : RegsToNegate) {
+    MachineInstr *Def = MRI.getVRegDef(Reg);
+    Observer.changingInstr(*Def);
+    // For each comparison, invert the opcode. For each AND and OR, change the
+    // opcode.
+    switch (Def->getOpcode()) {
+    default:
+      llvm_unreachable("Unexpected opcode");
+    case TargetOpcode::G_ICMP:
+    case TargetOpcode::G_FCMP: {
+      MachineOperand &PredOp = Def->getOperand(1);
+      CmpInst::Predicate NewP = CmpInst::getInversePredicate(
+          (CmpInst::Predicate)PredOp.getPredicate());
+      PredOp.setPredicate(NewP);
+      break;
+    }
+    case TargetOpcode::G_AND:
+      Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
+      break;
+    case TargetOpcode::G_OR:
+      Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
+      break;
+    }
+    Observer.changedInstr(*Def);
+  }
+
+  replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchXorOfAndWithSameReg(
+    MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+  // Match (xor (and x, y), y) (or any of its commuted cases)
+  assert(MI.getOpcode() == TargetOpcode::G_XOR);
+  Register &X = MatchInfo.first;
+  Register &Y = MatchInfo.second;
+  Register AndReg = MI.getOperand(1).getReg();
+  Register SharedReg = MI.getOperand(2).getReg();
+
+  // Find a G_AND on either side of the G_XOR.
+  // Look for one of
+  //
+  // (xor (and x, y), SharedReg)
+  // (xor SharedReg, (and x, y))
+  if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
+    std::swap(AndReg, SharedReg);
+    if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
+      return false;
+  }
+
+  // Only do this if we'll eliminate the G_AND.
+  if (!MRI.hasOneNonDBGUse(AndReg))
+    return false;
+
+  // We can combine if SharedReg is the same as either the LHS or RHS of the
+  // G_AND.
+  if (Y != SharedReg)
+    std::swap(X, Y);
+  return Y == SharedReg;
+}
+
+bool CombinerHelper::applyXorOfAndWithSameReg(
+    MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+  // Fold (xor (and x, y), y) -> (and (not x), y)
+  Builder.setInstrAndDebugLoc(MI);
+  Register X, Y;
+  std::tie(X, Y) = MatchInfo;
+  auto Not = Builder.buildNot(MRI.getType(X), X);
+  Observer.changingInstr(MI);
+  MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
+  MI.getOperand(1).setReg(Not->getOperand(0).getReg());
+  MI.getOperand(2).setReg(Y);
+  Observer.changedInstr(MI);
+  return true;
+}
+
+bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT Ty = MRI.getType(DstReg);
+  const DataLayout &DL = Builder.getMF().getDataLayout();
+
+  if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
+    return false;
+
+  if (Ty.isPointer()) {
+    auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
+    return ConstVal && *ConstVal == 0;
+  }
+
+  assert(Ty.isVector() && "Expecting a vector type");
+  const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+  return isBuildVectorAllZeros(*VecMI, MRI);
+}
+
+bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+  Builder.setInstrAndDebugLoc(MI);
+  Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
+  MI.eraseFromParent();
+  return true;
+}
+
+/// The second source operand is known to be a power of 2.
+bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
+  Register DstReg = MI.getOperand(0).getReg();
+  Register Src0 = MI.getOperand(1).getReg();
+  Register Pow2Src1 = MI.getOperand(2).getReg();
+  LLT Ty = MRI.getType(DstReg);
+  Builder.setInstrAndDebugLoc(MI);
+
+  // Fold (urem x, pow2) -> (and x, pow2-1)
+  auto NegOne = Builder.buildConstant(Ty, -1);
+  auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
+  Builder.buildAnd(DstReg, Src0, Add);
+  MI.eraseFromParent();
+  return true;
+}
+
+Optional<SmallVector<Register, 8>>
+CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
+  assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
+  // We want to detect if Root is part of a tree which represents a bunch
+  // of loads being merged into a larger load. We'll try to recognize patterns
+  // like, for example:
+  //
+  //  Reg   Reg
+  //   \    /
+  //    OR_1   Reg
+  //     \    /
+  //      OR_2
+  //        \     Reg
+  //         .. /
+  //        Root
+  //
+  //  Reg   Reg   Reg   Reg
+  //     \ /       \   /
+  //     OR_1      OR_2
+  //       \       /
+  //        \    /
+  //         ...
+  //         Root
+  //
+  // Each "Reg" may have been produced by a load + some arithmetic. This
+  // function will save each of them.
+  SmallVector<Register, 8> RegsToVisit;
+  SmallVector<const MachineInstr *, 7> Ors = {Root};
+
+  // In the "worst" case, we're dealing with a load for each byte. So, there
+  // are at most #bytes - 1 ORs.
+  const unsigned MaxIter =
+      MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
+  for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
+    if (Ors.empty())
+      break;
+    const MachineInstr *Curr = Ors.pop_back_val();
+    Register OrLHS = Curr->getOperand(1).getReg();
+    Register OrRHS = Curr->getOperand(2).getReg();
+
+    // In the combine, we want to elimate the entire tree.
+    if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
+      return None;
+
+    // If it's a G_OR, save it and continue to walk. If it's not, then it's
+    // something that may be a load + arithmetic.
+    if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
+      Ors.push_back(Or);
+    else
+      RegsToVisit.push_back(OrLHS);
+    if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
+      Ors.push_back(Or);
+    else
+      RegsToVisit.push_back(OrRHS);
+  }
+
+  // We're going to try and merge each register into a wider power-of-2 type,
+  // so we ought to have an even number of registers.
+  if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
+    return None;
+  return RegsToVisit;
+}
+
+/// Helper function for findLoadOffsetsForLoadOrCombine.
+///
+/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
+/// and then moving that value into a specific byte offset.
+///
+/// e.g. x[i] << 24
+///
+/// \returns The load instruction and the byte offset it is moved into.
+static Optional<std::pair<MachineInstr *, int64_t>>
+matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
+                         const MachineRegisterInfo &MRI) {
+  assert(MRI.hasOneNonDBGUse(Reg) &&
+         "Expected Reg to only have one non-debug use?");
+  Register MaybeLoad;
+  int64_t Shift;
+  if (!mi_match(Reg, MRI,
+                m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
+    Shift = 0;
+    MaybeLoad = Reg;
+  }
+
+  if (Shift % MemSizeInBits != 0)
+    return None;
+
+  // TODO: Handle other types of loads.
+  auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI);
+  if (!Load)
+    return None;
+
+  const auto &MMO = **Load->memoperands_begin();
+  if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits)
+    return None;
+
+  return std::make_pair(Load, Shift / MemSizeInBits);
+}
+
+Optional<std::pair<MachineInstr *, int64_t>>
+CombinerHelper::findLoadOffsetsForLoadOrCombine(
+    SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
+    const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
+
+  // Each load found for the pattern. There should be one for each RegsToVisit.
+  SmallSetVector<const MachineInstr *, 8> Loads;
+
+  // The lowest index used in any load. (The lowest "i" for each x[i].)
+  int64_t LowestIdx = INT64_MAX;
+
+  // The load which uses the lowest index.
+  MachineInstr *LowestIdxLoad = nullptr;
+
+  // Keeps track of the load indices we see. We shouldn't see any indices twice.
+  SmallSet<int64_t, 8> SeenIdx;
+
+  // Ensure each load is in the same MBB.
+  // TODO: Support multiple MachineBasicBlocks.
+  MachineBasicBlock *MBB = nullptr;
+  const MachineMemOperand *MMO = nullptr;
+
+  // Earliest instruction-order load in the pattern.
+  MachineInstr *EarliestLoad = nullptr;
+
+  // Latest instruction-order load in the pattern.
+  MachineInstr *LatestLoad = nullptr;
+
+  // Base pointer which every load should share.
+  Register BasePtr;
+
+  // We want to find a load for each register. Each load should have some
+  // appropriate bit twiddling arithmetic. During this loop, we will also keep
+  // track of the load which uses the lowest index. Later, we will check if we
+  // can use its pointer in the final, combined load.
+  for (auto Reg : RegsToVisit) {
+    // Find the load, and find the position that it will end up in (e.g. a
+    // shifted) value.
+    auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
+    if (!LoadAndPos)
+      return None;
+    MachineInstr *Load;
+    int64_t DstPos;
+    std::tie(Load, DstPos) = *LoadAndPos;
+
+    // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
+    // it is difficult to check for stores/calls/etc between loads.
+    MachineBasicBlock *LoadMBB = Load->getParent();
+    if (!MBB)
+      MBB = LoadMBB;
+    if (LoadMBB != MBB)
+      return None;
+
+    // Make sure that the MachineMemOperands of every seen load are compatible.
+    const MachineMemOperand *LoadMMO = *Load->memoperands_begin();
+    if (!MMO)
+      MMO = LoadMMO;
+    if (MMO->getAddrSpace() != LoadMMO->getAddrSpace())
+      return None;
+
+    // Find out what the base pointer and index for the load is.
+    Register LoadPtr;
+    int64_t Idx;
+    if (!mi_match(Load->getOperand(1).getReg(), MRI,
+                  m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
+      LoadPtr = Load->getOperand(1).getReg();
+      Idx = 0;
+    }
+
+    // Don't combine things like a[i], a[i] -> a bigger load.
+    if (!SeenIdx.insert(Idx).second)
+      return None;
+
+    // Every load must share the same base pointer; don't combine things like:
+    //
+    // a[i], b[i + 1] -> a bigger load.
+    if (!BasePtr.isValid())
+      BasePtr = LoadPtr;
+    if (BasePtr != LoadPtr)
+      return None;
+
+    if (Idx < LowestIdx) {
+      LowestIdx = Idx;
+      LowestIdxLoad = Load;
+    }
+
+    // Keep track of the byte offset that this load ends up at. If we have seen
+    // the byte offset, then stop here. We do not want to combine:
+    //
+    // a[i] << 16, a[i + k] << 16 -> a bigger load.
+    if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
+      return None;
+    Loads.insert(Load);
+
+    // Keep track of the position of the earliest/latest loads in the pattern.
+    // We will check that there are no load fold barriers between them later
+    // on.
+    //
+    // FIXME: Is there a better way to check for load fold barriers?
+    if (!EarliestLoad || dominates(*Load, *EarliestLoad))
+      EarliestLoad = Load;
+    if (!LatestLoad || dominates(*LatestLoad, *Load))
+      LatestLoad = Load;
+  }
+
+  // We found a load for each register. Let's check if each load satisfies the
+  // pattern.
+  assert(Loads.size() == RegsToVisit.size() &&
+         "Expected to find a load for each register?");
+  assert(EarliestLoad != LatestLoad && EarliestLoad &&
+         LatestLoad && "Expected at least two loads?");
+
+  // Check if there are any stores, calls, etc. between any of the loads. If
+  // there are, then we can't safely perform the combine.
+  //
+  // MaxIter is chosen based off the (worst case) number of iterations it
+  // typically takes to succeed in the LLVM test suite plus some padding.
+  //
+  // FIXME: Is there a better way to check for load fold barriers?
+  const unsigned MaxIter = 20;
+  unsigned Iter = 0;
+  for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
+                                                 LatestLoad->getIterator())) {
+    if (Loads.count(&MI))
+      continue;
+    if (MI.isLoadFoldBarrier())
+      return None;
+    if (Iter++ == MaxIter)
+      return None;
+  }
+
+  return std::make_pair(LowestIdxLoad, LowestIdx);
+}
+
+bool CombinerHelper::matchLoadOrCombine(
+    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_OR);
+  MachineFunction &MF = *MI.getMF();
+  // Assuming a little-endian target, transform:
+  //  s8 *a = ...
+  //  s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
+  // =>
+  //  s32 val = *((i32)a)
+  //
+  //  s8 *a = ...
+  //  s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
+  // =>
+  //  s32 val = BSWAP(*((s32)a))
+  Register Dst = MI.getOperand(0).getReg();
+  LLT Ty = MRI.getType(Dst);
+  if (Ty.isVector())
+    return false;
+
+  // We need to combine at least two loads into this type. Since the smallest
+  // possible load is into a byte, we need at least a 16-bit wide type.
+  const unsigned WideMemSizeInBits = Ty.getSizeInBits();
+  if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
+    return false;
+
+  // Match a collection of non-OR instructions in the pattern.
+  auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
+  if (!RegsToVisit)
+    return false;
+
+  // We have a collection of non-OR instructions. Figure out how wide each of
+  // the small loads should be based off of the number of potential loads we
+  // found.
+  const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
+  if (NarrowMemSizeInBits % 8 != 0)
+    return false;
+
+  // Check if each register feeding into each OR is a load from the same
+  // base pointer + some arithmetic.
+  //
+  // e.g. a[0], a[1] << 8, a[2] << 16, etc.
+  //
+  // Also verify that each of these ends up putting a[i] into the same memory
+  // offset as a load into a wide type would.
+  SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
+  MachineInstr *LowestIdxLoad;
+  int64_t LowestIdx;
+  auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
+      MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
+  if (!MaybeLoadInfo)
+    return false;
+  std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo;
+
+  // We have a bunch of loads being OR'd together. Using the addresses + offsets
+  // we found before, check if this corresponds to a big or little endian byte
+  // pattern. If it does, then we can represent it using a load + possibly a
+  // BSWAP.
+  bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
+  Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
+  if (!IsBigEndian.hasValue())
+    return false;
+  bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
+  if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
+    return false;
+
+  // Make sure that the load from the lowest index produces offset 0 in the
+  // final value.
+  //
+  // This ensures that we won't combine something like this:
+  //
+  // load x[i] -> byte 2
+  // load x[i+1] -> byte 0 ---> wide_load x[i]
+  // load x[i+2] -> byte 1
+  const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
+  const unsigned ZeroByteOffset =
+      *IsBigEndian
+          ? bigEndianByteAt(NumLoadsInTy, 0)
+          : littleEndianByteAt(NumLoadsInTy, 0);
+  auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
+  if (ZeroOffsetIdx == MemOffset2Idx.end() ||
+      ZeroOffsetIdx->second != LowestIdx)
+    return false;
+
+  // We wil reuse the pointer from the load which ends up at byte offset 0. It
+  // may not use index 0.
+  Register Ptr = LowestIdxLoad->getOperand(1).getReg();
+  const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin();
+  LegalityQuery::MemDesc MMDesc;
+  MMDesc.SizeInBits = WideMemSizeInBits;
+  MMDesc.AlignInBits = MMO.getAlign().value() * 8;
+  MMDesc.Ordering = MMO.getOrdering();
+  if (!isLegalOrBeforeLegalizer(
+          {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
+    return false;
+  auto PtrInfo = MMO.getPointerInfo();
+  auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
+
+  // Load must be allowed and fast on the target.
+  LLVMContext &C = MF.getFunction().getContext();
+  auto &DL = MF.getDataLayout();
+  bool Fast = false;
+  if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
+      !Fast)
+    return false;
+
+  MatchInfo = [=](MachineIRBuilder &MIB) {
+    Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
+    MIB.buildLoad(LoadDst, Ptr, *NewMMO);
+    if (NeedsBSwap)
+      MIB.buildBSwap(Dst, LoadDst);
+  };
+  return true;
+}
+
+bool CombinerHelper::applyLoadOrCombine(
+    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+  Builder.setInstrAndDebugLoc(MI);
+  MatchInfo(Builder);
+  MI.eraseFromParent();
+  return true;
+}
+
 bool CombinerHelper::tryCombine(MachineInstr &MI) {
   if (tryCombineCopy(MI))
     return true;
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
index 6bc72e4aa9..59f4d60a41 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
@@ -16,7 +16,7 @@
 using namespace llvm;
 
 void GISelChangeObserver::changingAllUsesOfReg(
-    const MachineRegisterInfo &MRI, Register Reg) { 
+    const MachineRegisterInfo &MRI, Register Reg) {
   for (auto &ChangingMI : MRI.use_instructions(Reg)) {
     changingInstr(ChangingMI);
     ChangingAllUsesOfReg.insert(&ChangingMI);
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index e38ede1b67..2de20489e1 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -94,25 +94,25 @@ dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
          << "\n";
 }
 
-/// Compute known bits for the intersection of \p Src0 and \p Src1 
-void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1, 
-                                         KnownBits &Known, 
-                                         const APInt &DemandedElts, 
-                                         unsigned Depth) { 
-  // Test src1 first, since we canonicalize simpler expressions to the RHS. 
-  computeKnownBitsImpl(Src1, Known, DemandedElts, Depth); 
- 
-  // If we don't know any bits, early out. 
-  if (Known.isUnknown()) 
-    return; 
- 
-  KnownBits Known2; 
-  computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth); 
- 
-  // Only known if known in both the LHS and RHS. 
-  Known = KnownBits::commonBits(Known, Known2); 
-} 
- 
+/// Compute known bits for the intersection of \p Src0 and \p Src1
+void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
+                                         KnownBits &Known,
+                                         const APInt &DemandedElts,
+                                         unsigned Depth) {
+  // Test src1 first, since we canonicalize simpler expressions to the RHS.
+  computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
+
+  // If we don't know any bits, early out.
+  if (Known.isUnknown())
+    return;
+
+  KnownBits Known2;
+  computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
+
+  // Only known if known in both the LHS and RHS.
+  Known = KnownBits::commonBits(Known, Known2);
+}
+
 void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
                                           const APInt &DemandedElts,
                                           unsigned Depth) {
@@ -200,7 +200,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
         // For COPYs we don't do anything, don't increase the depth.
         computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
                              Depth + (Opcode != TargetOpcode::COPY));
-        Known = KnownBits::commonBits(Known, Known2); 
+        Known = KnownBits::commonBits(Known, Known2);
         // If we reach a point where we don't know anything
         // just stop looking through the operands.
         if (Known.One == 0 && Known.Zero == 0)
@@ -217,7 +217,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
     auto CstVal = getConstantVRegVal(R, MRI);
     if (!CstVal)
       break;
-    Known = KnownBits::makeConstant(*CstVal); 
+    Known = KnownBits::makeConstant(*CstVal);
     break;
   }
   case TargetOpcode::G_FRAME_INDEX: {
@@ -284,52 +284,52 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
                          Depth + 1);
     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
                          Depth + 1);
-    Known = KnownBits::computeForMul(Known, Known2); 
+    Known = KnownBits::computeForMul(Known, Known2);
     break;
   }
   case TargetOpcode::G_SELECT: {
-    computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(), 
-                        Known, DemandedElts, Depth + 1); 
-    break; 
-  } 
-  case TargetOpcode::G_SMIN: { 
-    // TODO: Handle clamp pattern with number of sign bits 
-    KnownBits KnownRHS; 
-    computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 
+    computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
+                        Known, DemandedElts, Depth + 1);
+    break;
+  }
+  case TargetOpcode::G_SMIN: {
+    // TODO: Handle clamp pattern with number of sign bits
+    KnownBits KnownRHS;
+    computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
                          Depth + 1);
-    computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, 
+    computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
                          Depth + 1);
-    Known = KnownBits::smin(Known, KnownRHS); 
+    Known = KnownBits::smin(Known, KnownRHS);
+    break;
+  }
+  case TargetOpcode::G_SMAX: {
+    // TODO: Handle clamp pattern with number of sign bits
+    KnownBits KnownRHS;
+    computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+                         Depth + 1);
+    computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
+                         Depth + 1);
+    Known = KnownBits::smax(Known, KnownRHS);
+    break;
+  }
+  case TargetOpcode::G_UMIN: {
+    KnownBits KnownRHS;
+    computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
+                         DemandedElts, Depth + 1);
+    computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
+                         DemandedElts, Depth + 1);
+    Known = KnownBits::umin(Known, KnownRHS);
+    break;
+  }
+  case TargetOpcode::G_UMAX: {
+    KnownBits KnownRHS;
+    computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
+                         DemandedElts, Depth + 1);
+    computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
+                         DemandedElts, Depth + 1);
+    Known = KnownBits::umax(Known, KnownRHS);
     break;
   }
-  case TargetOpcode::G_SMAX: { 
-    // TODO: Handle clamp pattern with number of sign bits 
-    KnownBits KnownRHS; 
-    computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 
-                         Depth + 1); 
-    computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, 
-                         Depth + 1); 
-    Known = KnownBits::smax(Known, KnownRHS); 
-    break; 
-  } 
-  case TargetOpcode::G_UMIN: { 
-    KnownBits KnownRHS; 
-    computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, 
-                         DemandedElts, Depth + 1); 
-    computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, 
-                         DemandedElts, Depth + 1); 
-    Known = KnownBits::umin(Known, KnownRHS); 
-    break; 
-  } 
-  case TargetOpcode::G_UMAX: { 
-    KnownBits KnownRHS; 
-    computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, 
-                         DemandedElts, Depth + 1); 
-    computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, 
-                         DemandedElts, Depth + 1); 
-    Known = KnownBits::umax(Known, KnownRHS); 
-    break; 
-  } 
   case TargetOpcode::G_FCMP:
   case TargetOpcode::G_ICMP: {
     if (TL.getBooleanContents(DstTy.isVector(),
@@ -347,58 +347,58 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
     Known = Known.sext(BitWidth);
     break;
   }
-  case TargetOpcode::G_SEXT_INREG: { 
-    computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 
-                         Depth + 1); 
-    Known = Known.sextInReg(MI.getOperand(2).getImm()); 
-    break; 
-  } 
+  case TargetOpcode::G_SEXT_INREG: {
+    computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+                         Depth + 1);
+    Known = Known.sextInReg(MI.getOperand(2).getImm());
+    break;
+  }
   case TargetOpcode::G_ANYEXT: {
     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
                          Depth + 1);
-    Known = Known.anyext(BitWidth); 
+    Known = Known.anyext(BitWidth);
     break;
   }
   case TargetOpcode::G_LOAD: {
-    const MachineMemOperand *MMO = *MI.memoperands_begin(); 
-    if (const MDNode *Ranges = MMO->getRanges()) { 
-      computeKnownBitsFromRangeMetadata(*Ranges, Known); 
+    const MachineMemOperand *MMO = *MI.memoperands_begin();
+    if (const MDNode *Ranges = MMO->getRanges()) {
+      computeKnownBitsFromRangeMetadata(*Ranges, Known);
     }
- 
+
     break;
   }
   case TargetOpcode::G_ZEXTLOAD: {
     // Everything above the retrieved bits is zero
-    Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits()); 
+    Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
     break;
   }
-  case TargetOpcode::G_ASHR: { 
-    KnownBits LHSKnown, RHSKnown; 
-    computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, 
-                         Depth + 1); 
+  case TargetOpcode::G_ASHR: {
+    KnownBits LHSKnown, RHSKnown;
+    computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
+                         Depth + 1);
     computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
                          Depth + 1);
-    Known = KnownBits::ashr(LHSKnown, RHSKnown); 
-    break; 
-  } 
-  case TargetOpcode::G_LSHR: { 
-    KnownBits LHSKnown, RHSKnown; 
-    computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, 
+    Known = KnownBits::ashr(LHSKnown, RHSKnown);
+    break;
+  }
+  case TargetOpcode::G_LSHR: {
+    KnownBits LHSKnown, RHSKnown;
+    computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
+                         Depth + 1);
+    computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
+                         Depth + 1);
+    Known = KnownBits::lshr(LHSKnown, RHSKnown);
+    break;
+  }
+  case TargetOpcode::G_SHL: {
+    KnownBits LHSKnown, RHSKnown;
+    computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
+                         Depth + 1);
+    computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
                          Depth + 1);
-    computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, 
-                         Depth + 1); 
-    Known = KnownBits::lshr(LHSKnown, RHSKnown); 
+    Known = KnownBits::shl(LHSKnown, RHSKnown);
     break;
   }
-  case TargetOpcode::G_SHL: { 
-    KnownBits LHSKnown, RHSKnown; 
-    computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, 
-                         Depth + 1); 
-    computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, 
-                         Depth + 1); 
-    Known = KnownBits::shl(LHSKnown, RHSKnown); 
-    break; 
-  } 
   case TargetOpcode::G_INTTOPTR:
   case TargetOpcode::G_PTRTOINT:
     // Fall through and handle them the same as zext/trunc.
@@ -418,50 +418,50 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
       Known.Zero.setBitsFrom(SrcBitWidth);
     break;
   }
-  case TargetOpcode::G_MERGE_VALUES: { 
-    unsigned NumOps = MI.getNumOperands(); 
-    unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 
- 
-    for (unsigned I = 0; I != NumOps - 1; ++I) { 
-      KnownBits SrcOpKnown; 
-      computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown, 
-                           DemandedElts, Depth + 1); 
-      Known.insertBits(SrcOpKnown, I * OpSize); 
-    } 
-    break; 
-  }
-  case TargetOpcode::G_UNMERGE_VALUES: { 
-    unsigned NumOps = MI.getNumOperands(); 
-    Register SrcReg = MI.getOperand(NumOps - 1).getReg(); 
-    if (MRI.getType(SrcReg).isVector()) 
-      return; // TODO: Handle vectors. 
-
-    KnownBits SrcOpKnown; 
-    computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1); 
- 
-    // Figure out the result operand index 
-    unsigned DstIdx = 0; 
-    for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R; 
-         ++DstIdx) 
-      ; 
- 
-    Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx); 
-    break; 
-  } 
-  case TargetOpcode::G_BSWAP: { 
-    Register SrcReg = MI.getOperand(1).getReg(); 
-    computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); 
-    Known.byteSwap(); 
-    break; 
-  } 
-  case TargetOpcode::G_BITREVERSE: { 
-    Register SrcReg = MI.getOperand(1).getReg(); 
-    computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); 
-    Known.reverseBits(); 
-    break; 
-  } 
-  } 
- 
+  case TargetOpcode::G_MERGE_VALUES: {
+    unsigned NumOps = MI.getNumOperands();
+    unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+
+    for (unsigned I = 0; I != NumOps - 1; ++I) {
+      KnownBits SrcOpKnown;
+      computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
+                           DemandedElts, Depth + 1);
+      Known.insertBits(SrcOpKnown, I * OpSize);
+    }
+    break;
+  }
+  case TargetOpcode::G_UNMERGE_VALUES: {
+    unsigned NumOps = MI.getNumOperands();
+    Register SrcReg = MI.getOperand(NumOps - 1).getReg();
+    if (MRI.getType(SrcReg).isVector())
+      return; // TODO: Handle vectors.
+
+    KnownBits SrcOpKnown;
+    computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1);
+
+    // Figure out the result operand index
+    unsigned DstIdx = 0;
+    for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
+         ++DstIdx)
+      ;
+
+    Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
+    break;
+  }
+  case TargetOpcode::G_BSWAP: {
+    Register SrcReg = MI.getOperand(1).getReg();
+    computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+    Known.byteSwap();
+    break;
+  }
+  case TargetOpcode::G_BITREVERSE: {
+    Register SrcReg = MI.getOperand(1).getReg();
+    computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+    Known.reverseBits();
+    break;
+  }
+  }
+
   assert(!Known.hasConflict() && "Bits known to be one AND zero?");
   LLVM_DEBUG(dumpResult(MI, Known, Depth));
 
@@ -469,17 +469,17 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
   ComputeKnownBitsCache[R] = Known;
 }
 
-/// Compute number of sign bits for the intersection of \p Src0 and \p Src1 
-unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1, 
-                                               const APInt &DemandedElts, 
-                                               unsigned Depth) { 
-  // Test src1 first, since we canonicalize simpler expressions to the RHS. 
-  unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth); 
-  if (Src1SignBits == 1) 
-    return 1; 
-  return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits); 
-} 
- 
+/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
+unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
+                                               const APInt &DemandedElts,
+                                               unsigned Depth) {
+  // Test src1 first, since we canonicalize simpler expressions to the RHS.
+  unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
+  if (Src1SignBits == 1)
+    return 1;
+  return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
+}
+
 unsigned GISelKnownBits::computeNumSignBits(Register R,
                                             const APInt &DemandedElts,
                                             unsigned Depth) {
@@ -523,31 +523,31 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
     unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
     return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
   }
-  case TargetOpcode::G_SEXT_INREG: { 
-    // Max of the input and what this extends. 
-    Register Src = MI.getOperand(1).getReg(); 
-    unsigned SrcBits = MI.getOperand(2).getImm(); 
-    unsigned InRegBits = TyBits - SrcBits + 1; 
-    return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits); 
-  } 
+  case TargetOpcode::G_SEXT_INREG: {
+    // Max of the input and what this extends.
+    Register Src = MI.getOperand(1).getReg();
+    unsigned SrcBits = MI.getOperand(2).getImm();
+    unsigned InRegBits = TyBits - SrcBits + 1;
+    return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
+  }
   case TargetOpcode::G_SEXTLOAD: {
-    // FIXME: We need an in-memory type representation. 
-    if (DstTy.isVector()) 
-      return 1; 
- 
-    // e.g. i16->i32 = '17' bits known. 
-    const MachineMemOperand *MMO = *MI.memoperands_begin(); 
-    return TyBits - MMO->getSizeInBits() + 1; 
-  }
-  case TargetOpcode::G_ZEXTLOAD: { 
-    // FIXME: We need an in-memory type representation. 
-    if (DstTy.isVector()) 
-      return 1; 
- 
-    // e.g. i16->i32 = '16' bits known. 
-    const MachineMemOperand *MMO = *MI.memoperands_begin(); 
-    return TyBits - MMO->getSizeInBits(); 
-  } 
+    // FIXME: We need an in-memory type representation.
+    if (DstTy.isVector())
+      return 1;
+
+    // e.g. i16->i32 = '17' bits known.
+    const MachineMemOperand *MMO = *MI.memoperands_begin();
+    return TyBits - MMO->getSizeInBits() + 1;
+  }
+  case TargetOpcode::G_ZEXTLOAD: {
+    // FIXME: We need an in-memory type representation.
+    if (DstTy.isVector())
+      return 1;
+
+    // e.g. i16->i32 = '16' bits known.
+    const MachineMemOperand *MMO = *MI.memoperands_begin();
+    return TyBits - MMO->getSizeInBits();
+  }
   case TargetOpcode::G_TRUNC: {
     Register Src = MI.getOperand(1).getReg();
     LLT SrcTy = MRI.getType(Src);
@@ -560,11 +560,11 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
       return NumSrcSignBits - (NumSrcBits - DstTyBits);
     break;
   }
-  case TargetOpcode::G_SELECT: { 
-    return computeNumSignBitsMin(MI.getOperand(2).getReg(), 
-                                 MI.getOperand(3).getReg(), DemandedElts, 
-                                 Depth + 1); 
-  } 
+  case TargetOpcode::G_SELECT: {
+    return computeNumSignBitsMin(MI.getOperand(2).getReg(),
+                                 MI.getOperand(3).getReg(), DemandedElts,
+                                 Depth + 1);
+  }
   case TargetOpcode::G_INTRINSIC:
   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
   default: {
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/IRTranslator.cpp
index c81add2e6b..b7883cbc31 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -29,11 +29,11 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h" 
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/StackProtector.h"
-#include "llvm/CodeGen/SwitchLoweringUtils.h" 
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
@@ -50,13 +50,13 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/InstrTypes.h" 
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
-#include "llvm/IR/PatternMatch.h" 
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -74,7 +74,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 #include <cassert>
-#include <cstddef> 
+#include <cstddef>
 #include <cstdint>
 #include <iterator>
 #include <string>
@@ -95,8 +95,8 @@ INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
                 false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
 INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 
-INITIALIZE_PASS_DEPENDENCY(StackProtector) 
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
 INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
                 false, false)
 
@@ -117,8 +117,8 @@ static void reportTranslationError(MachineFunction &MF,
     ORE.emit(R);
 }
 
-IRTranslator::IRTranslator(CodeGenOpt::Level optlevel) 
-    : MachineFunctionPass(ID), OptLevel(optlevel) {} 
+IRTranslator::IRTranslator(CodeGenOpt::Level optlevel)
+    : MachineFunctionPass(ID), OptLevel(optlevel) {}
 
 #ifndef NDEBUG
 namespace {
@@ -162,17 +162,17 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<StackProtector>();
   AU.addRequired<TargetPassConfig>();
   AU.addRequired<GISelCSEAnalysisWrapperPass>();
-  if (OptLevel != CodeGenOpt::None) 
-    AU.addRequired<BranchProbabilityInfoWrapperPass>(); 
+  if (OptLevel != CodeGenOpt::None)
+    AU.addRequired<BranchProbabilityInfoWrapperPass>();
   getSelectionDAGFallbackAnalysisUsage(AU);
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
 IRTranslator::ValueToVRegInfo::VRegListT &
 IRTranslator::allocateVRegs(const Value &Val) {
-  auto VRegsIt = VMap.findVRegs(Val); 
-  if (VRegsIt != VMap.vregs_end()) 
-    return *VRegsIt->second; 
+  auto VRegsIt = VMap.findVRegs(Val);
+  if (VRegsIt != VMap.vregs_end())
+    return *VRegsIt->second;
   auto *Regs = VMap.getVRegs(Val);
   auto *Offsets = VMap.getOffsets(Val);
   SmallVector<LLT, 4> SplitTys;
@@ -234,9 +234,9 @@ ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
 }
 
 int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
-  auto MapEntry = FrameIndices.find(&AI); 
-  if (MapEntry != FrameIndices.end()) 
-    return MapEntry->second; 
+  auto MapEntry = FrameIndices.find(&AI);
+  if (MapEntry != FrameIndices.end())
+    return MapEntry->second;
 
   uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
   uint64_t Size =
@@ -306,8 +306,8 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
   return true;
 }
 
-bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U, 
-                                    MachineIRBuilder &MIRBuilder) { 
+bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
+                                    MachineIRBuilder &MIRBuilder) {
   Register Op0 = getOrCreateVReg(*U.getOperand(0));
   Register Res = getOrCreateVReg(U);
   uint16_t Flags = 0;
@@ -315,14 +315,14 @@ bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
     const Instruction &I = cast<Instruction>(U);
     Flags = MachineInstr::copyFlagsFromInstruction(I);
   }
-  MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags); 
+  MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags);
   return true;
 }
 
-bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { 
-  return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder); 
-} 
- 
+bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
+  return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder);
+}
+
 bool IRTranslator::translateCompare(const User &U,
                                     MachineIRBuilder &MIRBuilder) {
   auto *CI = dyn_cast<CmpInst>(&U);
@@ -368,289 +368,289 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
   // The target may mess up with the insertion point, but
   // this is not important as a return is the last instruction
   // of the block anyway.
-  return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg); 
-}
-
-void IRTranslator::emitBranchForMergedCondition( 
-    const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 
-    MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, 
-    BranchProbability TProb, BranchProbability FProb, bool InvertCond) { 
-  // If the leaf of the tree is a comparison, merge the condition into 
-  // the caseblock. 
-  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { 
-    CmpInst::Predicate Condition; 
-    if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { 
-      Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate(); 
-    } else { 
-      const FCmpInst *FC = cast<FCmpInst>(Cond); 
-      Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate(); 
-    } 
- 
-    SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0), 
-                           BOp->getOperand(1), nullptr, TBB, FBB, CurBB, 
-                           CurBuilder->getDebugLoc(), TProb, FProb); 
-    SL->SwitchCases.push_back(CB); 
-    return; 
-  } 
- 
-  // Create a CaseBlock record representing this branch. 
-  CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; 
-  SwitchCG::CaseBlock CB( 
-      Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()), 
-      nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb); 
-  SL->SwitchCases.push_back(CB); 
-} 
- 
-static bool isValInBlock(const Value *V, const BasicBlock *BB) { 
-  if (const Instruction *I = dyn_cast<Instruction>(V)) 
-    return I->getParent() == BB; 
-  return true; 
-} 
- 
-void IRTranslator::findMergedConditions( 
-    const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 
-    MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, 
-    Instruction::BinaryOps Opc, BranchProbability TProb, 
-    BranchProbability FProb, bool InvertCond) { 
-  using namespace PatternMatch; 
-  assert((Opc == Instruction::And || Opc == Instruction::Or) && 
-         "Expected Opc to be AND/OR"); 
-  // Skip over not part of the tree and remember to invert op and operands at 
-  // next level. 
-  Value *NotCond; 
-  if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) && 
-      isValInBlock(NotCond, CurBB->getBasicBlock())) { 
-    findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, 
-                         !InvertCond); 
-    return; 
-  } 
- 
-  const Instruction *BOp = dyn_cast<Instruction>(Cond); 
-  const Value *BOpOp0, *BOpOp1; 
-  // Compute the effective opcode for Cond, taking into account whether it needs 
-  // to be inverted, e.g. 
-  //   and (not (or A, B)), C 
-  // gets lowered as 
-  //   and (and (not A, not B), C) 
-  Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0; 
-  if (BOp) { 
-    BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1))) 
-               ? Instruction::And 
-               : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1))) 
-                      ? Instruction::Or 
-                      : (Instruction::BinaryOps)0); 
-    if (InvertCond) { 
-      if (BOpc == Instruction::And) 
-        BOpc = Instruction::Or; 
-      else if (BOpc == Instruction::Or) 
-        BOpc = Instruction::And; 
-    } 
-  } 
- 
-  // If this node is not part of the or/and tree, emit it as a branch. 
-  // Note that all nodes in the tree should have same opcode. 
-  bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse(); 
-  if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() || 
-      !isValInBlock(BOpOp0, CurBB->getBasicBlock()) || 
-      !isValInBlock(BOpOp1, CurBB->getBasicBlock())) { 
-    emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb, 
-                                 InvertCond); 
-    return; 
-  } 
- 
-  //  Create TmpBB after CurBB. 
-  MachineFunction::iterator BBI(CurBB); 
-  MachineBasicBlock *TmpBB = 
-      MF->CreateMachineBasicBlock(CurBB->getBasicBlock()); 
-  CurBB->getParent()->insert(++BBI, TmpBB); 
- 
-  if (Opc == Instruction::Or) { 
-    // Codegen X | Y as: 
-    // BB1: 
-    //   jmp_if_X TBB 
-    //   jmp TmpBB 
-    // TmpBB: 
-    //   jmp_if_Y TBB 
-    //   jmp FBB 
-    // 
- 
-    // We have flexibility in setting Prob for BB1 and Prob for TmpBB. 
-    // The requirement is that 
-    //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) 
-    //     = TrueProb for original BB. 
-    // Assuming the original probabilities are A and B, one choice is to set 
-    // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to 
-    // A/(1+B) and 2B/(1+B). This choice assumes that 
-    //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. 
-    // Another choice is to assume TrueProb for BB1 equals to TrueProb for 
-    // TmpBB, but the math is more complicated. 
- 
-    auto NewTrueProb = TProb / 2; 
-    auto NewFalseProb = TProb / 2 + FProb; 
-    // Emit the LHS condition. 
-    findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb, 
-                         NewFalseProb, InvertCond); 
- 
-    // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). 
-    SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; 
-    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); 
-    // Emit the RHS condition into TmpBB. 
-    findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], 
-                         Probs[1], InvertCond); 
-  } else { 
-    assert(Opc == Instruction::And && "Unknown merge op!"); 
-    // Codegen X & Y as: 
-    // BB1: 
-    //   jmp_if_X TmpBB 
-    //   jmp FBB 
-    // TmpBB: 
-    //   jmp_if_Y TBB 
-    //   jmp FBB 
-    // 
-    //  This requires creation of TmpBB after CurBB. 
- 
-    // We have flexibility in setting Prob for BB1 and Prob for TmpBB. 
-    // The requirement is that 
-    //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) 
-    //     = FalseProb for original BB. 
-    // Assuming the original probabilities are A and B, one choice is to set 
-    // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to 
-    // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == 
-    // TrueProb for BB1 * FalseProb for TmpBB. 
- 
-    auto NewTrueProb = TProb + FProb / 2; 
-    auto NewFalseProb = FProb / 2; 
-    // Emit the LHS condition. 
-    findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb, 
-                         NewFalseProb, InvertCond); 
- 
-    // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). 
-    SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; 
-    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); 
-    // Emit the RHS condition into TmpBB. 
-    findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], 
-                         Probs[1], InvertCond); 
-  } 
-} 
- 
-bool IRTranslator::shouldEmitAsBranches( 
-    const std::vector<SwitchCG::CaseBlock> &Cases) { 
-  // For multiple cases, it's better to emit as branches. 
-  if (Cases.size() != 2) 
-    return true; 
- 
-  // If this is two comparisons of the same values or'd or and'd together, they 
-  // will get folded into a single comparison, so don't emit two blocks. 
-  if ((Cases[0].CmpLHS == Cases[1].CmpLHS && 
-       Cases[0].CmpRHS == Cases[1].CmpRHS) || 
-      (Cases[0].CmpRHS == Cases[1].CmpLHS && 
-       Cases[0].CmpLHS == Cases[1].CmpRHS)) { 
-    return false; 
-  } 
- 
-  // Handle: (X != null) | (Y != null) --> (X|Y) != 0 
-  // Handle: (X == null) & (Y == null) --> (X|Y) == 0 
-  if (Cases[0].CmpRHS == Cases[1].CmpRHS && 
-      Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred && 
-      isa<Constant>(Cases[0].CmpRHS) && 
-      cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { 
-    if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ && 
-        Cases[0].TrueBB == Cases[1].ThisBB) 
-      return false; 
-    if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE && 
-        Cases[0].FalseBB == Cases[1].ThisBB) 
-      return false; 
-  } 
- 
-  return true; 
-} 
- 
+  return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg);
+}
+
+void IRTranslator::emitBranchForMergedCondition(
+    const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+    MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
+    BranchProbability TProb, BranchProbability FProb, bool InvertCond) {
+  // If the leaf of the tree is a comparison, merge the condition into
+  // the caseblock.
+  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+    CmpInst::Predicate Condition;
+    if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+      Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate();
+    } else {
+      const FCmpInst *FC = cast<FCmpInst>(Cond);
+      Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate();
+    }
+
+    SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0),
+                           BOp->getOperand(1), nullptr, TBB, FBB, CurBB,
+                           CurBuilder->getDebugLoc(), TProb, FProb);
+    SL->SwitchCases.push_back(CB);
+    return;
+  }
+
+  // Create a CaseBlock record representing this branch.
+  CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
+  SwitchCG::CaseBlock CB(
+      Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()),
+      nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb);
+  SL->SwitchCases.push_back(CB);
+}
+
+static bool isValInBlock(const Value *V, const BasicBlock *BB) {
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    return I->getParent() == BB;
+  return true;
+}
+
+void IRTranslator::findMergedConditions(
+    const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+    MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
+    Instruction::BinaryOps Opc, BranchProbability TProb,
+    BranchProbability FProb, bool InvertCond) {
+  using namespace PatternMatch;
+  assert((Opc == Instruction::And || Opc == Instruction::Or) &&
+         "Expected Opc to be AND/OR");
+  // Skip over not part of the tree and remember to invert op and operands at
+  // next level.
+  Value *NotCond;
+  if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
+      isValInBlock(NotCond, CurBB->getBasicBlock())) {
+    findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+                         !InvertCond);
+    return;
+  }
+
+  const Instruction *BOp = dyn_cast<Instruction>(Cond);
+  const Value *BOpOp0, *BOpOp1;
+  // Compute the effective opcode for Cond, taking into account whether it needs
+  // to be inverted, e.g.
+  //   and (not (or A, B)), C
+  // gets lowered as
+  //   and (and (not A, not B), C)
+  Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
+  if (BOp) {
+    BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
+               ? Instruction::And
+               : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
+                      ? Instruction::Or
+                      : (Instruction::BinaryOps)0);
+    if (InvertCond) {
+      if (BOpc == Instruction::And)
+        BOpc = Instruction::Or;
+      else if (BOpc == Instruction::Or)
+        BOpc = Instruction::And;
+    }
+  }
+
+  // If this node is not part of the or/and tree, emit it as a branch.
+  // Note that all nodes in the tree should have same opcode.
+  bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
+  if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
+      !isValInBlock(BOpOp0, CurBB->getBasicBlock()) ||
+      !isValInBlock(BOpOp1, CurBB->getBasicBlock())) {
+    emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb,
+                                 InvertCond);
+    return;
+  }
+
+  //  Create TmpBB after CurBB.
+  MachineFunction::iterator BBI(CurBB);
+  MachineBasicBlock *TmpBB =
+      MF->CreateMachineBasicBlock(CurBB->getBasicBlock());
+  CurBB->getParent()->insert(++BBI, TmpBB);
+
+  if (Opc == Instruction::Or) {
+    // Codegen X | Y as:
+    // BB1:
+    //   jmp_if_X TBB
+    //   jmp TmpBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+
+    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+    // The requirement is that
+    //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+    //     = TrueProb for original BB.
+    // Assuming the original probabilities are A and B, one choice is to set
+    // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
+    // A/(1+B) and 2B/(1+B). This choice assumes that
+    //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+    // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+    // TmpBB, but the math is more complicated.
+
+    auto NewTrueProb = TProb / 2;
+    auto NewFalseProb = TProb / 2 + FProb;
+    // Emit the LHS condition.
+    findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
+                         NewFalseProb, InvertCond);
+
+    // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
+    SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+    // Emit the RHS condition into TmpBB.
+    findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+                         Probs[1], InvertCond);
+  } else {
+    assert(Opc == Instruction::And && "Unknown merge op!");
+    // Codegen X & Y as:
+    // BB1:
+    //   jmp_if_X TmpBB
+    //   jmp FBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+    //  This requires creation of TmpBB after CurBB.
+
+    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+    // The requirement is that
+    //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+    //     = FalseProb for original BB.
+    // Assuming the original probabilities are A and B, one choice is to set
+    // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
+    // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
+    // TrueProb for BB1 * FalseProb for TmpBB.
+
+    auto NewTrueProb = TProb + FProb / 2;
+    auto NewFalseProb = FProb / 2;
+    // Emit the LHS condition.
+    findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
+                         NewFalseProb, InvertCond);
+
+    // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
+    SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+    // Emit the RHS condition into TmpBB.
+    findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+                         Probs[1], InvertCond);
+  }
+}
+
+bool IRTranslator::shouldEmitAsBranches(
+    const std::vector<SwitchCG::CaseBlock> &Cases) {
+  // For multiple cases, it's better to emit as branches.
+  if (Cases.size() != 2)
+    return true;
+
+  // If this is two comparisons of the same values or'd or and'd together, they
+  // will get folded into a single comparison, so don't emit two blocks.
+  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+       Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+    return false;
+  }
+
+  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+      Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred &&
+      isa<Constant>(Cases[0].CmpRHS) &&
+      cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+    if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ &&
+        Cases[0].TrueBB == Cases[1].ThisBB)
+      return false;
+    if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE &&
+        Cases[0].FalseBB == Cases[1].ThisBB)
+      return false;
+  }
+
+  return true;
+}
+
 bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
   const BranchInst &BrInst = cast<BranchInst>(U);
-  auto &CurMBB = MIRBuilder.getMBB(); 
-  auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0)); 
- 
-  if (BrInst.isUnconditional()) { 
-    // If the unconditional target is the layout successor, fallthrough. 
-    if (!CurMBB.isLayoutSuccessor(Succ0MBB)) 
-      MIRBuilder.buildBr(*Succ0MBB); 
- 
-    // Link successors. 
-    for (const BasicBlock *Succ : successors(&BrInst)) 
-      CurMBB.addSuccessor(&getMBB(*Succ)); 
-    return true; 
-  }
-
-  // If this condition is one of the special cases we handle, do special stuff 
-  // now. 
-  const Value *CondVal = BrInst.getCondition(); 
-  MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1)); 
-
-  const auto &TLI = *MF->getSubtarget().getTargetLowering(); 
-
-  // If this is a series of conditions that are or'd or and'd together, emit 
-  // this as a sequence of branches instead of setcc's with and/or operations. 
-  // As long as jumps are not expensive (exceptions for multi-use logic ops, 
-  // unpredictable branches, and vector extracts because those jumps are likely 
-  // expensive for any target), this should improve performance. 
-  // For example, instead of something like: 
-  //     cmp A, B 
-  //     C = seteq 
-  //     cmp D, E 
-  //     F = setle 
-  //     or C, F 
-  //     jnz foo 
-  // Emit: 
-  //     cmp A, B 
-  //     je foo 
-  //     cmp D, E 
-  //     jle foo 
-  using namespace PatternMatch; 
-  const Instruction *CondI = dyn_cast<Instruction>(CondVal); 
-  if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() && 
-      !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) { 
-    Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0; 
-    Value *Vec; 
-    const Value *BOp0, *BOp1; 
-    if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1)))) 
-      Opcode = Instruction::And; 
-    else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1)))) 
-      Opcode = Instruction::Or; 
- 
-    if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && 
-                    match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { 
-      findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode, 
-                           getEdgeProbability(&CurMBB, Succ0MBB), 
-                           getEdgeProbability(&CurMBB, Succ1MBB), 
-                           /*InvertCond=*/false); 
-      assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!"); 
- 
-      // Allow some cases to be rejected. 
-      if (shouldEmitAsBranches(SL->SwitchCases)) { 
-        // Emit the branch for this block. 
-        emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder); 
-        SL->SwitchCases.erase(SL->SwitchCases.begin()); 
-        return true; 
-      } 
- 
-      // Okay, we decided not to do this, remove any inserted MBB's and clear 
-      // SwitchCases. 
-      for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I) 
-        MF->erase(SL->SwitchCases[I].ThisBB); 
- 
-      SL->SwitchCases.clear(); 
-    } 
-  } 
- 
-  // Create a CaseBlock record representing this branch. 
-  SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal, 
-                         ConstantInt::getTrue(MF->getFunction().getContext()), 
-                         nullptr, Succ0MBB, Succ1MBB, &CurMBB, 
-                         CurBuilder->getDebugLoc()); 
- 
-  // Use emitSwitchCase to actually insert the fast branch sequence for this 
-  // cond branch. 
-  emitSwitchCase(CB, &CurMBB, *CurBuilder); 
+  auto &CurMBB = MIRBuilder.getMBB();
+  auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0));
+
+  if (BrInst.isUnconditional()) {
+    // If the unconditional target is the layout successor, fallthrough.
+    if (!CurMBB.isLayoutSuccessor(Succ0MBB))
+      MIRBuilder.buildBr(*Succ0MBB);
+
+    // Link successors.
+    for (const BasicBlock *Succ : successors(&BrInst))
+      CurMBB.addSuccessor(&getMBB(*Succ));
+    return true;
+  }
+
+  // If this condition is one of the special cases we handle, do special stuff
+  // now.
+  const Value *CondVal = BrInst.getCondition();
+  MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1));
+
+  const auto &TLI = *MF->getSubtarget().getTargetLowering();
+
+  // If this is a series of conditions that are or'd or and'd together, emit
+  // this as a sequence of branches instead of setcc's with and/or operations.
+  // As long as jumps are not expensive (exceptions for multi-use logic ops,
+  // unpredictable branches, and vector extracts because those jumps are likely
+  // expensive for any target), this should improve performance.
+  // For example, instead of something like:
+  //     cmp A, B
+  //     C = seteq
+  //     cmp D, E
+  //     F = setle
+  //     or C, F
+  //     jnz foo
+  // Emit:
+  //     cmp A, B
+  //     je foo
+  //     cmp D, E
+  //     jle foo
+  using namespace PatternMatch;
+  const Instruction *CondI = dyn_cast<Instruction>(CondVal);
+  if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() &&
+      !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) {
+    Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
+    Value *Vec;
+    const Value *BOp0, *BOp1;
+    if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
+      Opcode = Instruction::And;
+    else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
+      Opcode = Instruction::Or;
+
+    if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+                    match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
+      findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode,
+                           getEdgeProbability(&CurMBB, Succ0MBB),
+                           getEdgeProbability(&CurMBB, Succ1MBB),
+                           /*InvertCond=*/false);
+      assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!");
+
+      // Allow some cases to be rejected.
+      if (shouldEmitAsBranches(SL->SwitchCases)) {
+        // Emit the branch for this block.
+        emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder);
+        SL->SwitchCases.erase(SL->SwitchCases.begin());
+        return true;
+      }
+
+      // Okay, we decided not to do this, remove any inserted MBB's and clear
+      // SwitchCases.
+      for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I)
+        MF->erase(SL->SwitchCases[I].ThisBB);
+
+      SL->SwitchCases.clear();
+    }
+  }
+
+  // Create a CaseBlock record representing this branch.
+  SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal,
+                         ConstantInt::getTrue(MF->getFunction().getContext()),
+                         nullptr, Succ0MBB, Succ1MBB, &CurMBB,
+                         CurBuilder->getDebugLoc());
+
+  // Use emitSwitchCase to actually insert the fast branch sequence for this
+  // cond branch.
+  emitSwitchCase(CB, &CurMBB, *CurBuilder);
   return true;
 }
 
@@ -715,7 +715,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
   }
 
   SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
-  SL->findBitTestClusters(Clusters, &SI); 
+  SL->findBitTestClusters(Clusters, &SI);
 
   LLVM_DEBUG({
     dbgs() << "Case clusters: ";
@@ -836,22 +836,22 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
   const LLT i1Ty = LLT::scalar(1);
   // Build the compare.
   if (!CB.CmpMHS) {
-    const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS); 
-    // For conditional branch lowering, we might try to do something silly like 
-    // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, 
-    // just re-use the existing condition vreg. 
-    if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && 
-        CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) { 
-      Cond = CondLHS; 
-    } else { 
-      Register CondRHS = getOrCreateVReg(*CB.CmpRHS); 
-      if (CmpInst::isFPPredicate(CB.PredInfo.Pred)) 
-        Cond = 
-            MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); 
-      else 
-        Cond = 
-            MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); 
-    } 
+    const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS);
+    // For conditional branch lowering, we might try to do something silly like
+    // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
+    // just re-use the existing condition vreg.
+    if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
+        CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
+      Cond = CondLHS;
+    } else {
+      Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
+      if (CmpInst::isFPPredicate(CB.PredInfo.Pred))
+        Cond =
+            MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+      else
+        Cond =
+            MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+    }
   } else {
     assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
            "Can only handle SLE ranges");
@@ -884,8 +884,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
     addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
   CB.ThisBB->normalizeSuccProbs();
 
-  addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()}, 
-                    CB.ThisBB); 
+  addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
+                    CB.ThisBB);
 
   MIB.buildBrCond(Cond, *CB.TrueBB);
   MIB.buildBr(*CB.FalseBB);
@@ -998,156 +998,156 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
   return true;
 }
 
-void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B, 
-                                     MachineBasicBlock *SwitchBB) { 
-  MachineIRBuilder &MIB = *CurBuilder; 
-  MIB.setMBB(*SwitchBB); 
- 
-  // Subtract the minimum value. 
-  Register SwitchOpReg = getOrCreateVReg(*B.SValue); 
- 
-  LLT SwitchOpTy = MRI->getType(SwitchOpReg); 
-  Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0); 
-  auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg); 
- 
-  // Ensure that the type will fit the mask value. 
-  LLT MaskTy = SwitchOpTy; 
-  for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) { 
-    if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) { 
-      // Switch table case range are encoded into series of masks. 
-      // Just use pointer type, it's guaranteed to fit. 
-      MaskTy = LLT::scalar(64); 
-      break; 
-    } 
-  } 
-  Register SubReg = RangeSub.getReg(0); 
-  if (SwitchOpTy != MaskTy) 
-    SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0); 
- 
-  B.RegVT = getMVTForLLT(MaskTy); 
-  B.Reg = SubReg; 
- 
-  MachineBasicBlock *MBB = B.Cases[0].ThisBB; 
- 
-  if (!B.OmitRangeCheck) 
-    addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); 
-  addSuccessorWithProb(SwitchBB, MBB, B.Prob); 
- 
-  SwitchBB->normalizeSuccProbs(); 
- 
-  if (!B.OmitRangeCheck) { 
-    // Conditional branch to the default block. 
-    auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range); 
-    auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1), 
-                                  RangeSub, RangeCst); 
-    MIB.buildBrCond(RangeCmp, *B.Default); 
-  } 
- 
-  // Avoid emitting unnecessary branches to the next block. 
-  if (MBB != SwitchBB->getNextNode()) 
-    MIB.buildBr(*MBB); 
-} 
- 
-void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB, 
-                                   MachineBasicBlock *NextMBB, 
-                                   BranchProbability BranchProbToNext, 
-                                   Register Reg, SwitchCG::BitTestCase &B, 
-                                   MachineBasicBlock *SwitchBB) { 
-  MachineIRBuilder &MIB = *CurBuilder; 
-  MIB.setMBB(*SwitchBB); 
- 
-  LLT SwitchTy = getLLTForMVT(BB.RegVT); 
-  Register Cmp; 
-  unsigned PopCount = countPopulation(B.Mask); 
-  if (PopCount == 1) { 
-    // Testing for a single bit; just compare the shift count with what it 
-    // would need to be to shift a 1 bit in that position. 
-    auto MaskTrailingZeros = 
-        MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask)); 
-    Cmp = 
-        MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros) 
-            .getReg(0); 
-  } else if (PopCount == BB.Range) { 
-    // There is only one zero bit in the range, test for it directly. 
-    auto MaskTrailingOnes = 
-        MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask)); 
-    Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes) 
-              .getReg(0); 
-  } else { 
-    // Make desired shift. 
-    auto CstOne = MIB.buildConstant(SwitchTy, 1); 
-    auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg); 
- 
-    // Emit bit tests and jumps. 
-    auto CstMask = MIB.buildConstant(SwitchTy, B.Mask); 
-    auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask); 
-    auto CstZero = MIB.buildConstant(SwitchTy, 0); 
-    Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero) 
-              .getReg(0); 
-  } 
- 
-  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. 
-  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); 
-  // The branch probability from SwitchBB to NextMBB is BranchProbToNext. 
-  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); 
-  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is 
-  // one as they are relative probabilities (and thus work more like weights), 
-  // and hence we need to normalize them to let the sum of them become one. 
-  SwitchBB->normalizeSuccProbs(); 
- 
-  // Record the fact that the IR edge from the header to the bit test target 
-  // will go through our new block. Neeeded for PHIs to have nodes added. 
-  addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()}, 
-                    SwitchBB); 
- 
-  MIB.buildBrCond(Cmp, *B.TargetBB); 
- 
-  // Avoid emitting unnecessary branches to the next block. 
-  if (NextMBB != SwitchBB->getNextNode()) 
-    MIB.buildBr(*NextMBB); 
-} 
- 
-bool IRTranslator::lowerBitTestWorkItem( 
-    SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB, 
-    MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB, 
-    MachineIRBuilder &MIB, MachineFunction::iterator BBI, 
-    BranchProbability DefaultProb, BranchProbability UnhandledProbs, 
-    SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough, 
-    bool FallthroughUnreachable) { 
-  using namespace SwitchCG; 
-  MachineFunction *CurMF = SwitchMBB->getParent(); 
-  // FIXME: Optimize away range check based on pivot comparisons. 
-  BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; 
-  // The bit test blocks haven't been inserted yet; insert them here. 
-  for (BitTestCase &BTC : BTB->Cases) 
-    CurMF->insert(BBI, BTC.ThisBB); 
- 
-  // Fill in fields of the BitTestBlock. 
-  BTB->Parent = CurMBB; 
-  BTB->Default = Fallthrough; 
- 
-  BTB->DefaultProb = UnhandledProbs; 
-  // If the cases in bit test don't form a contiguous range, we evenly 
-  // distribute the probability on the edge to Fallthrough to two 
-  // successors of CurMBB. 
-  if (!BTB->ContiguousRange) { 
-    BTB->Prob += DefaultProb / 2; 
-    BTB->DefaultProb -= DefaultProb / 2; 
-  } 
- 
-  if (FallthroughUnreachable) { 
-    // Skip the range check if the fallthrough block is unreachable. 
-    BTB->OmitRangeCheck = true; 
-  } 
- 
-  // If we're in the right place, emit the bit test header right now. 
-  if (CurMBB == SwitchMBB) { 
-    emitBitTestHeader(*BTB, SwitchMBB); 
-    BTB->Emitted = true; 
-  } 
-  return true; 
-} 
- 
+void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
+                                     MachineBasicBlock *SwitchBB) {
+  MachineIRBuilder &MIB = *CurBuilder;
+  MIB.setMBB(*SwitchBB);
+
+  // Subtract the minimum value.
+  Register SwitchOpReg = getOrCreateVReg(*B.SValue);
+
+  LLT SwitchOpTy = MRI->getType(SwitchOpReg);
+  Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
+  auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
+
+  // Ensure that the type will fit the mask value.
+  LLT MaskTy = SwitchOpTy;
+  for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
+    if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
+      // Switch table case range are encoded into series of masks.
+      // Just use pointer type, it's guaranteed to fit.
+      MaskTy = LLT::scalar(64);
+      break;
+    }
+  }
+  Register SubReg = RangeSub.getReg(0);
+  if (SwitchOpTy != MaskTy)
+    SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0);
+
+  B.RegVT = getMVTForLLT(MaskTy);
+  B.Reg = SubReg;
+
+  MachineBasicBlock *MBB = B.Cases[0].ThisBB;
+
+  if (!B.OmitRangeCheck)
+    addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+  addSuccessorWithProb(SwitchBB, MBB, B.Prob);
+
+  SwitchBB->normalizeSuccProbs();
+
+  if (!B.OmitRangeCheck) {
+    // Conditional branch to the default block.
+    auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
+    auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
+                                  RangeSub, RangeCst);
+    MIB.buildBrCond(RangeCmp, *B.Default);
+  }
+
+  // Avoid emitting unnecessary branches to the next block.
+  if (MBB != SwitchBB->getNextNode())
+    MIB.buildBr(*MBB);
+}
+
+void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
+                                   MachineBasicBlock *NextMBB,
+                                   BranchProbability BranchProbToNext,
+                                   Register Reg, SwitchCG::BitTestCase &B,
+                                   MachineBasicBlock *SwitchBB) {
+  MachineIRBuilder &MIB = *CurBuilder;
+  MIB.setMBB(*SwitchBB);
+
+  LLT SwitchTy = getLLTForMVT(BB.RegVT);
+  Register Cmp;
+  unsigned PopCount = countPopulation(B.Mask);
+  if (PopCount == 1) {
+    // Testing for a single bit; just compare the shift count with what it
+    // would need to be to shift a 1 bit in that position.
+    auto MaskTrailingZeros =
+        MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask));
+    Cmp =
+        MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
+            .getReg(0);
+  } else if (PopCount == BB.Range) {
+    // There is only one zero bit in the range, test for it directly.
+    auto MaskTrailingOnes =
+        MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask));
+    Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
+              .getReg(0);
+  } else {
+    // Make desired shift.
+    auto CstOne = MIB.buildConstant(SwitchTy, 1);
+    auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg);
+
+    // Emit bit tests and jumps.
+    auto CstMask = MIB.buildConstant(SwitchTy, B.Mask);
+    auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask);
+    auto CstZero = MIB.buildConstant(SwitchTy, 0);
+    Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero)
+              .getReg(0);
+  }
+
+  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
+  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
+  // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
+  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
+  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
+  // one as they are relative probabilities (and thus work more like weights),
+  // and hence we need to normalize them to let the sum of them become one.
+  SwitchBB->normalizeSuccProbs();
+
+  // Record the fact that the IR edge from the header to the bit test target
+  // will go through our new block. Neeeded for PHIs to have nodes added.
+  addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()},
+                    SwitchBB);
+
+  MIB.buildBrCond(Cmp, *B.TargetBB);
+
+  // Avoid emitting unnecessary branches to the next block.
+  if (NextMBB != SwitchBB->getNextNode())
+    MIB.buildBr(*NextMBB);
+}
+
+bool IRTranslator::lowerBitTestWorkItem(
+    SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
+    MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
+    MachineIRBuilder &MIB, MachineFunction::iterator BBI,
+    BranchProbability DefaultProb, BranchProbability UnhandledProbs,
+    SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
+    bool FallthroughUnreachable) {
+  using namespace SwitchCG;
+  MachineFunction *CurMF = SwitchMBB->getParent();
+  // FIXME: Optimize away range check based on pivot comparisons.
+  BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
+  // The bit test blocks haven't been inserted yet; insert them here.
+  for (BitTestCase &BTC : BTB->Cases)
+    CurMF->insert(BBI, BTC.ThisBB);
+
+  // Fill in fields of the BitTestBlock.
+  BTB->Parent = CurMBB;
+  BTB->Default = Fallthrough;
+
+  BTB->DefaultProb = UnhandledProbs;
+  // If the cases in bit test don't form a contiguous range, we evenly
+  // distribute the probability on the edge to Fallthrough to two
+  // successors of CurMBB.
+  if (!BTB->ContiguousRange) {
+    BTB->Prob += DefaultProb / 2;
+    BTB->DefaultProb -= DefaultProb / 2;
+  }
+
+  if (FallthroughUnreachable) {
+    // Skip the range check if the fallthrough block is unreachable.
+    BTB->OmitRangeCheck = true;
+  }
+
+  // If we're in the right place, emit the bit test header right now.
+  if (CurMBB == SwitchMBB) {
+    emitBitTestHeader(*BTB, SwitchMBB);
+    BTB->Emitted = true;
+  }
+  return true;
+}
+
 bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
                                        Value *Cond,
                                        MachineBasicBlock *SwitchMBB,
@@ -1208,15 +1208,15 @@ bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
 
     switch (I->Kind) {
     case CC_BitTests: {
-      if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI, 
-                                DefaultProb, UnhandledProbs, I, Fallthrough, 
-                                FallthroughUnreachable)) { 
-        LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch"); 
-        return false; 
-      } 
-      break; 
+      if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
+                                DefaultProb, UnhandledProbs, I, Fallthrough,
+                                FallthroughUnreachable)) {
+        LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch");
+        return false;
+      }
+      break;
     }
- 
+
     case CC_JumpTable: {
       if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
                                   UnhandledProbs, I, Fallthrough,
@@ -1557,34 +1557,34 @@ bool IRTranslator::translateGetElementPtr(const User &U,
 
 bool IRTranslator::translateMemFunc(const CallInst &CI,
                                     MachineIRBuilder &MIRBuilder,
-                                    unsigned Opcode) { 
+                                    unsigned Opcode) {
 
   // If the source is undef, then just emit a nop.
   if (isa<UndefValue>(CI.getArgOperand(1)))
     return true;
 
-  SmallVector<Register, 3> SrcRegs; 
-
-  unsigned MinPtrSize = UINT_MAX; 
-  for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) { 
-    Register SrcReg = getOrCreateVReg(**AI); 
-    LLT SrcTy = MRI->getType(SrcReg); 
-    if (SrcTy.isPointer()) 
-      MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize); 
-    SrcRegs.push_back(SrcReg); 
-  } 
- 
-  LLT SizeTy = LLT::scalar(MinPtrSize); 
- 
-  // The size operand should be the minimum of the pointer sizes. 
-  Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1]; 
-  if (MRI->getType(SizeOpReg) != SizeTy) 
-    SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0); 
- 
-  auto ICall = MIRBuilder.buildInstr(Opcode); 
-  for (Register SrcReg : SrcRegs) 
-    ICall.addUse(SrcReg); 
- 
+  SmallVector<Register, 3> SrcRegs;
+
+  unsigned MinPtrSize = UINT_MAX;
+  for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) {
+    Register SrcReg = getOrCreateVReg(**AI);
+    LLT SrcTy = MRI->getType(SrcReg);
+    if (SrcTy.isPointer())
+      MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize);
+    SrcRegs.push_back(SrcReg);
+  }
+
+  LLT SizeTy = LLT::scalar(MinPtrSize);
+
+  // The size operand should be the minimum of the pointer sizes.
+  Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1];
+  if (MRI->getType(SizeOpReg) != SizeTy)
+    SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0);
+
+  auto ICall = MIRBuilder.buildInstr(Opcode);
+  for (Register SrcReg : SrcRegs)
+    ICall.addUse(SrcReg);
+
   Align DstAlign;
   Align SrcAlign;
   unsigned IsVol =
@@ -1612,7 +1612,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
   ICall.addMemOperand(MF->getMachineMemOperand(
       MachinePointerInfo(CI.getArgOperand(0)),
       MachineMemOperand::MOStore | VolFlag, 1, DstAlign));
-  if (Opcode != TargetOpcode::G_MEMSET) 
+  if (Opcode != TargetOpcode::G_MEMSET)
     ICall.addMemOperand(MF->getMachineMemOperand(
         MachinePointerInfo(CI.getArgOperand(1)),
         MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign));
@@ -1651,16 +1651,16 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
   return true;
 }
 
-bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI, 
-                                                MachineIRBuilder &MIRBuilder) { 
-  Register Dst = getOrCreateVReg(CI); 
-  Register Src0 = getOrCreateVReg(*CI.getOperand(0)); 
-  Register Src1 = getOrCreateVReg(*CI.getOperand(1)); 
-  uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue(); 
-  MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale }); 
-  return true; 
-} 
- 
+bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
+                                                MachineIRBuilder &MIRBuilder) {
+  Register Dst = getOrCreateVReg(CI);
+  Register Src0 = getOrCreateVReg(*CI.getOperand(0));
+  Register Src1 = getOrCreateVReg(*CI.getOperand(1));
+  uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
+  MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale });
+  return true;
+}
+
 unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
   switch (ID) {
     default:
@@ -1711,14 +1711,14 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
       return TargetOpcode::G_FNEARBYINT;
     case Intrinsic::pow:
       return TargetOpcode::G_FPOW;
-    case Intrinsic::powi: 
-      return TargetOpcode::G_FPOWI; 
+    case Intrinsic::powi:
+      return TargetOpcode::G_FPOWI;
     case Intrinsic::rint:
       return TargetOpcode::G_FRINT;
     case Intrinsic::round:
       return TargetOpcode::G_INTRINSIC_ROUND;
-    case Intrinsic::roundeven: 
-      return TargetOpcode::G_INTRINSIC_ROUNDEVEN; 
+    case Intrinsic::roundeven:
+      return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
     case Intrinsic::sin:
       return TargetOpcode::G_FSIN;
     case Intrinsic::sqrt:
@@ -1729,31 +1729,31 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
       return TargetOpcode::G_READCYCLECOUNTER;
     case Intrinsic::ptrmask:
       return TargetOpcode::G_PTRMASK;
-    case Intrinsic::lrint: 
-      return TargetOpcode::G_INTRINSIC_LRINT; 
-    // FADD/FMUL require checking the FMF, so are handled elsewhere. 
-    case Intrinsic::vector_reduce_fmin: 
-      return TargetOpcode::G_VECREDUCE_FMIN; 
-    case Intrinsic::vector_reduce_fmax: 
-      return TargetOpcode::G_VECREDUCE_FMAX; 
-    case Intrinsic::vector_reduce_add: 
-      return TargetOpcode::G_VECREDUCE_ADD; 
-    case Intrinsic::vector_reduce_mul: 
-      return TargetOpcode::G_VECREDUCE_MUL; 
-    case Intrinsic::vector_reduce_and: 
-      return TargetOpcode::G_VECREDUCE_AND; 
-    case Intrinsic::vector_reduce_or: 
-      return TargetOpcode::G_VECREDUCE_OR; 
-    case Intrinsic::vector_reduce_xor: 
-      return TargetOpcode::G_VECREDUCE_XOR; 
-    case Intrinsic::vector_reduce_smax: 
-      return TargetOpcode::G_VECREDUCE_SMAX; 
-    case Intrinsic::vector_reduce_smin: 
-      return TargetOpcode::G_VECREDUCE_SMIN; 
-    case Intrinsic::vector_reduce_umax: 
-      return TargetOpcode::G_VECREDUCE_UMAX; 
-    case Intrinsic::vector_reduce_umin: 
-      return TargetOpcode::G_VECREDUCE_UMIN; 
+    case Intrinsic::lrint:
+      return TargetOpcode::G_INTRINSIC_LRINT;
+    // FADD/FMUL require checking the FMF, so are handled elsewhere.
+    case Intrinsic::vector_reduce_fmin:
+      return TargetOpcode::G_VECREDUCE_FMIN;
+    case Intrinsic::vector_reduce_fmax:
+      return TargetOpcode::G_VECREDUCE_FMAX;
+    case Intrinsic::vector_reduce_add:
+      return TargetOpcode::G_VECREDUCE_ADD;
+    case Intrinsic::vector_reduce_mul:
+      return TargetOpcode::G_VECREDUCE_MUL;
+    case Intrinsic::vector_reduce_and:
+      return TargetOpcode::G_VECREDUCE_AND;
+    case Intrinsic::vector_reduce_or:
+      return TargetOpcode::G_VECREDUCE_OR;
+    case Intrinsic::vector_reduce_xor:
+      return TargetOpcode::G_VECREDUCE_XOR;
+    case Intrinsic::vector_reduce_smax:
+      return TargetOpcode::G_VECREDUCE_SMAX;
+    case Intrinsic::vector_reduce_smin:
+      return TargetOpcode::G_VECREDUCE_SMIN;
+    case Intrinsic::vector_reduce_umax:
+      return TargetOpcode::G_VECREDUCE_UMAX;
+    case Intrinsic::vector_reduce_umin:
+      return TargetOpcode::G_VECREDUCE_UMIN;
   }
   return Intrinsic::not_intrinsic;
 }
@@ -1846,7 +1846,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     // Get the underlying objects for the location passed on the lifetime
     // marker.
     SmallVector<const Value *, 4> Allocas;
-    getUnderlyingObjects(CI.getArgOperand(1), Allocas); 
+    getUnderlyingObjects(CI.getArgOperand(1), Allocas);
 
     // Iterate over each underlying object, creating lifetime markers for each
     // static alloca. Quit if we find a non-static alloca.
@@ -1960,37 +1960,37 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder);
   case Intrinsic::ssub_sat:
     return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder);
-  case Intrinsic::ushl_sat: 
-    return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder); 
-  case Intrinsic::sshl_sat: 
-    return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder); 
-  case Intrinsic::umin: 
-    return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder); 
-  case Intrinsic::umax: 
-    return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder); 
-  case Intrinsic::smin: 
-    return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder); 
-  case Intrinsic::smax: 
-    return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder); 
-  case Intrinsic::abs: 
-    // TODO: Preserve "int min is poison" arg in GMIR? 
-    return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder); 
-  case Intrinsic::smul_fix: 
-    return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder); 
-  case Intrinsic::umul_fix: 
-    return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder); 
-  case Intrinsic::smul_fix_sat: 
-    return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder); 
-  case Intrinsic::umul_fix_sat: 
-    return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder); 
-  case Intrinsic::sdiv_fix: 
-    return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder); 
-  case Intrinsic::udiv_fix: 
-    return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder); 
-  case Intrinsic::sdiv_fix_sat: 
-    return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder); 
-  case Intrinsic::udiv_fix_sat: 
-    return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder); 
+  case Intrinsic::ushl_sat:
+    return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder);
+  case Intrinsic::sshl_sat:
+    return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder);
+  case Intrinsic::umin:
+    return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder);
+  case Intrinsic::umax:
+    return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder);
+  case Intrinsic::smin:
+    return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder);
+  case Intrinsic::smax:
+    return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder);
+  case Intrinsic::abs:
+    // TODO: Preserve "int min is poison" arg in GMIR?
+    return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
+  case Intrinsic::smul_fix:
+    return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder);
+  case Intrinsic::umul_fix:
+    return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
+  case Intrinsic::smul_fix_sat:
+    return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
+  case Intrinsic::umul_fix_sat:
+    return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
+  case Intrinsic::sdiv_fix:
+    return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
+  case Intrinsic::udiv_fix:
+    return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
+  case Intrinsic::sdiv_fix_sat:
+    return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
+  case Intrinsic::udiv_fix_sat:
+    return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
   case Intrinsic::fmuladd: {
     const TargetMachine &TM = MF->getTarget();
     const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
@@ -2014,24 +2014,24 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     }
     return true;
   }
-  case Intrinsic::convert_from_fp16: 
-    // FIXME: This intrinsic should probably be removed from the IR. 
-    MIRBuilder.buildFPExt(getOrCreateVReg(CI), 
-                          getOrCreateVReg(*CI.getArgOperand(0)), 
-                          MachineInstr::copyFlagsFromInstruction(CI)); 
-    return true; 
-  case Intrinsic::convert_to_fp16: 
-    // FIXME: This intrinsic should probably be removed from the IR. 
-    MIRBuilder.buildFPTrunc(getOrCreateVReg(CI), 
-                            getOrCreateVReg(*CI.getArgOperand(0)), 
-                            MachineInstr::copyFlagsFromInstruction(CI)); 
-    return true; 
+  case Intrinsic::convert_from_fp16:
+    // FIXME: This intrinsic should probably be removed from the IR.
+    MIRBuilder.buildFPExt(getOrCreateVReg(CI),
+                          getOrCreateVReg(*CI.getArgOperand(0)),
+                          MachineInstr::copyFlagsFromInstruction(CI));
+    return true;
+  case Intrinsic::convert_to_fp16:
+    // FIXME: This intrinsic should probably be removed from the IR.
+    MIRBuilder.buildFPTrunc(getOrCreateVReg(CI),
+                            getOrCreateVReg(*CI.getArgOperand(0)),
+                            MachineInstr::copyFlagsFromInstruction(CI));
+    return true;
   case Intrinsic::memcpy:
-    return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY); 
+    return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
   case Intrinsic::memmove:
-    return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE); 
+    return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE);
   case Intrinsic::memset:
-    return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET); 
+    return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET);
   case Intrinsic::eh_typeid_for: {
     GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
     Register Reg = getOrCreateVReg(CI);
@@ -2114,18 +2114,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
   }
   case Intrinsic::invariant_end:
     return true;
-  case Intrinsic::expect: 
-  case Intrinsic::annotation: 
-  case Intrinsic::ptr_annotation: 
-  case Intrinsic::launder_invariant_group: 
-  case Intrinsic::strip_invariant_group: { 
-    // Drop the intrinsic, but forward the value. 
-    MIRBuilder.buildCopy(getOrCreateVReg(CI), 
-                         getOrCreateVReg(*CI.getArgOperand(0))); 
-    return true; 
-  } 
+  case Intrinsic::expect:
+  case Intrinsic::annotation:
+  case Intrinsic::ptr_annotation:
+  case Intrinsic::launder_invariant_group:
+  case Intrinsic::strip_invariant_group: {
+    // Drop the intrinsic, but forward the value.
+    MIRBuilder.buildCopy(getOrCreateVReg(CI),
+                         getOrCreateVReg(*CI.getArgOperand(0)));
+    return true;
+  }
   case Intrinsic::assume:
-  case Intrinsic::experimental_noalias_scope_decl: 
+  case Intrinsic::experimental_noalias_scope_decl:
   case Intrinsic::var_annotation:
   case Intrinsic::sideeffect:
     // Discard annotate attributes, assumptions, and artificial side-effects.
@@ -2145,68 +2145,68 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
       .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
     return true;
   }
-  case Intrinsic::localescape: { 
-    MachineBasicBlock &EntryMBB = MF->front(); 
-    StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName()); 
- 
-    // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission 
-    // is the same on all targets. 
-    for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) { 
-      Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts(); 
-      if (isa<ConstantPointerNull>(Arg)) 
-        continue; // Skip null pointers. They represent a hole in index space. 
- 
-      int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg)); 
-      MCSymbol *FrameAllocSym = 
-          MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName, 
-                                                                Idx); 
- 
-      // This should be inserted at the start of the entry block. 
-      auto LocalEscape = 
-          MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE) 
-              .addSym(FrameAllocSym) 
-              .addFrameIndex(FI); 
- 
-      EntryMBB.insert(EntryMBB.begin(), LocalEscape); 
-    } 
- 
-    return true; 
-  } 
-  case Intrinsic::vector_reduce_fadd: 
-  case Intrinsic::vector_reduce_fmul: { 
-    // Need to check for the reassoc flag to decide whether we want a 
-    // sequential reduction opcode or not. 
-    Register Dst = getOrCreateVReg(CI); 
-    Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0)); 
-    Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1)); 
-    unsigned Opc = 0; 
-    if (!CI.hasAllowReassoc()) { 
-      // The sequential ordering case. 
-      Opc = ID == Intrinsic::vector_reduce_fadd 
-                ? TargetOpcode::G_VECREDUCE_SEQ_FADD 
-                : TargetOpcode::G_VECREDUCE_SEQ_FMUL; 
-      MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc}, 
-                            MachineInstr::copyFlagsFromInstruction(CI)); 
-      return true; 
-    } 
-    // We split the operation into a separate G_FADD/G_FMUL + the reduce, 
-    // since the associativity doesn't matter. 
-    unsigned ScalarOpc; 
-    if (ID == Intrinsic::vector_reduce_fadd) { 
-      Opc = TargetOpcode::G_VECREDUCE_FADD; 
-      ScalarOpc = TargetOpcode::G_FADD; 
-    } else { 
-      Opc = TargetOpcode::G_VECREDUCE_FMUL; 
-      ScalarOpc = TargetOpcode::G_FMUL; 
-    } 
-    LLT DstTy = MRI->getType(Dst); 
-    auto Rdx = MIRBuilder.buildInstr( 
-        Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI)); 
-    MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx}, 
-                          MachineInstr::copyFlagsFromInstruction(CI)); 
- 
-    return true; 
-  } 
+  case Intrinsic::localescape: {
+    MachineBasicBlock &EntryMBB = MF->front();
+    StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName());
+
+    // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
+    // is the same on all targets.
+    for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) {
+      Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
+      if (isa<ConstantPointerNull>(Arg))
+        continue; // Skip null pointers. They represent a hole in index space.
+
+      int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg));
+      MCSymbol *FrameAllocSym =
+          MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName,
+                                                                Idx);
+
+      // This should be inserted at the start of the entry block.
+      auto LocalEscape =
+          MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE)
+              .addSym(FrameAllocSym)
+              .addFrameIndex(FI);
+
+      EntryMBB.insert(EntryMBB.begin(), LocalEscape);
+    }
+
+    return true;
+  }
+  case Intrinsic::vector_reduce_fadd:
+  case Intrinsic::vector_reduce_fmul: {
+    // Need to check for the reassoc flag to decide whether we want a
+    // sequential reduction opcode or not.
+    Register Dst = getOrCreateVReg(CI);
+    Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0));
+    Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1));
+    unsigned Opc = 0;
+    if (!CI.hasAllowReassoc()) {
+      // The sequential ordering case.
+      Opc = ID == Intrinsic::vector_reduce_fadd
+                ? TargetOpcode::G_VECREDUCE_SEQ_FADD
+                : TargetOpcode::G_VECREDUCE_SEQ_FMUL;
+      MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
+                            MachineInstr::copyFlagsFromInstruction(CI));
+      return true;
+    }
+    // We split the operation into a separate G_FADD/G_FMUL + the reduce,
+    // since the associativity doesn't matter.
+    unsigned ScalarOpc;
+    if (ID == Intrinsic::vector_reduce_fadd) {
+      Opc = TargetOpcode::G_VECREDUCE_FADD;
+      ScalarOpc = TargetOpcode::G_FADD;
+    } else {
+      Opc = TargetOpcode::G_VECREDUCE_FMUL;
+      ScalarOpc = TargetOpcode::G_FMUL;
+    }
+    LLT DstTy = MRI->getType(Dst);
+    auto Rdx = MIRBuilder.buildInstr(
+        Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI));
+    MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx},
+                          MachineInstr::copyFlagsFromInstruction(CI));
+
+    return true;
+  }
 #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC)  \
   case Intrinsic::INTRINSIC:
 #include "llvm/IR/ConstrainedOps.def"
@@ -2328,11 +2328,11 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
       } else {
         MIB.addFPImm(cast<ConstantFP>(Arg.value()));
       }
-    } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) { 
-      auto *MDN = dyn_cast<MDNode>(MD->getMetadata()); 
-      if (!MDN) // This was probably an MDString. 
-        return false; 
-      MIB.addMetadata(MDN); 
+    } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) {
+      auto *MDN = dyn_cast<MDNode>(MD->getMetadata());
+      if (!MDN) // This was probably an MDString.
+        return false;
+      MIB.addMetadata(MDN);
     } else {
       ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
       if (VRegs.size() > 1)
@@ -2357,62 +2357,62 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
   return true;
 }
 
-bool IRTranslator::findUnwindDestinations( 
-    const BasicBlock *EHPadBB, 
-    BranchProbability Prob, 
-    SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> 
-        &UnwindDests) { 
-  EHPersonality Personality = classifyEHPersonality( 
-      EHPadBB->getParent()->getFunction().getPersonalityFn()); 
-  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; 
-  bool IsCoreCLR = Personality == EHPersonality::CoreCLR; 
-  bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX; 
-  bool IsSEH = isAsynchronousEHPersonality(Personality); 
- 
-  if (IsWasmCXX) { 
-    // Ignore this for now. 
-    return false; 
-  } 
- 
-  while (EHPadBB) { 
-    const Instruction *Pad = EHPadBB->getFirstNonPHI(); 
-    BasicBlock *NewEHPadBB = nullptr; 
-    if (isa<LandingPadInst>(Pad)) { 
-      // Stop on landingpads. They are not funclets. 
-      UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob); 
-      break; 
-    } 
-    if (isa<CleanupPadInst>(Pad)) { 
-      // Stop on cleanup pads. Cleanups are always funclet entries for all known 
-      // personalities. 
-      UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob); 
-      UnwindDests.back().first->setIsEHScopeEntry(); 
-      UnwindDests.back().first->setIsEHFuncletEntry(); 
-      break; 
-    } 
-    if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { 
-      // Add the catchpad handlers to the possible destinations. 
-      for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { 
-        UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob); 
-        // For MSVC++ and the CLR, catchblocks are funclets and need prologues. 
-        if (IsMSVCCXX || IsCoreCLR) 
-          UnwindDests.back().first->setIsEHFuncletEntry(); 
-        if (!IsSEH) 
-          UnwindDests.back().first->setIsEHScopeEntry(); 
-      } 
-      NewEHPadBB = CatchSwitch->getUnwindDest(); 
-    } else { 
-      continue; 
-    } 
- 
-    BranchProbabilityInfo *BPI = FuncInfo.BPI; 
-    if (BPI && NewEHPadBB) 
-      Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); 
-    EHPadBB = NewEHPadBB; 
-  } 
-  return true; 
-} 
- 
+bool IRTranslator::findUnwindDestinations(
+    const BasicBlock *EHPadBB,
+    BranchProbability Prob,
+    SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+        &UnwindDests) {
+  EHPersonality Personality = classifyEHPersonality(
+      EHPadBB->getParent()->getFunction().getPersonalityFn());
+  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+  bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
+  bool IsSEH = isAsynchronousEHPersonality(Personality);
+
+  if (IsWasmCXX) {
+    // Ignore this for now.
+    return false;
+  }
+
+  while (EHPadBB) {
+    const Instruction *Pad = EHPadBB->getFirstNonPHI();
+    BasicBlock *NewEHPadBB = nullptr;
+    if (isa<LandingPadInst>(Pad)) {
+      // Stop on landingpads. They are not funclets.
+      UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
+      break;
+    }
+    if (isa<CleanupPadInst>(Pad)) {
+      // Stop on cleanup pads. Cleanups are always funclet entries for all known
+      // personalities.
+      UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
+      UnwindDests.back().first->setIsEHScopeEntry();
+      UnwindDests.back().first->setIsEHFuncletEntry();
+      break;
+    }
+    if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+      // Add the catchpad handlers to the possible destinations.
+      for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+        UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob);
+        // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
+        if (IsMSVCCXX || IsCoreCLR)
+          UnwindDests.back().first->setIsEHFuncletEntry();
+        if (!IsSEH)
+          UnwindDests.back().first->setIsEHScopeEntry();
+      }
+      NewEHPadBB = CatchSwitch->getUnwindDest();
+    } else {
+      continue;
+    }
+
+    BranchProbabilityInfo *BPI = FuncInfo.BPI;
+    if (BPI && NewEHPadBB)
+      Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
+    EHPadBB = NewEHPadBB;
+  }
+  return true;
+}
+
 bool IRTranslator::translateInvoke(const User &U,
                                    MachineIRBuilder &MIRBuilder) {
   const InvokeInst &I = cast<InvokeInst>(U);
@@ -2438,7 +2438,7 @@ bool IRTranslator::translateInvoke(const User &U,
     return false;
 
   // FIXME: support Windows exception handling.
-  if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI())) 
+  if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
     return false;
 
   // Emit the actual call, bracketed by EH_LABELs so that the MF knows about
@@ -2452,26 +2452,26 @@ bool IRTranslator::translateInvoke(const User &U,
   MCSymbol *EndSymbol = Context.createTempSymbol();
   MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
 
-  SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; 
-  BranchProbabilityInfo *BPI = FuncInfo.BPI; 
-  MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB(); 
-  BranchProbability EHPadBBProb = 
-      BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB) 
-          : BranchProbability::getZero(); 
- 
-  if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests)) 
-    return false; 
- 
+  SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB();
+  BranchProbability EHPadBBProb =
+      BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
+          : BranchProbability::getZero();
+
+  if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests))
+    return false;
+
   MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
                     &ReturnMBB = getMBB(*ReturnBB);
-  // Update successor info. 
-  addSuccessorWithProb(InvokeMBB, &ReturnMBB); 
-  for (auto &UnwindDest : UnwindDests) { 
-    UnwindDest.first->setIsEHPad(); 
-    addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); 
-  } 
-  InvokeMBB->normalizeSuccProbs(); 
- 
+  // Update successor info.
+  addSuccessorWithProb(InvokeMBB, &ReturnMBB);
+  for (auto &UnwindDest : UnwindDests) {
+    UnwindDest.first->setIsEHPad();
+    addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
+  }
+  InvokeMBB->normalizeSuccProbs();
+
   MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
   MIRBuilder.buildBr(ReturnMBB);
   return true;
@@ -2511,12 +2511,12 @@ bool IRTranslator::translateLandingPad(const User &U,
   MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
     .addSym(MF->addLandingPad(&MBB));
 
-  // If the unwinder does not preserve all registers, ensure that the 
-  // function marks the clobbered registers as used. 
-  const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); 
-  if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF)) 
-    MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask); 
- 
+  // If the unwinder does not preserve all registers, ensure that the
+  // function marks the clobbered registers as used.
+  const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+  if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
+    MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
+
   LLT Ty = getLLTForType(*LP.getType(), *DL);
   Register Undef = MRI->createGenericVirtualRegister(Ty);
   MIRBuilder.buildUndef(Undef);
@@ -2855,8 +2855,8 @@ bool IRTranslator::translate(const Instruction &Inst) {
   // We only emit constants into the entry block from here. To prevent jumpy
   // debug behaviour set the line to 0.
   if (const DebugLoc &DL = Inst.getDebugLoc())
-    EntryBuilder->setDebugLoc(DILocation::get( 
-        Inst.getContext(), 0, 0, DL.getScope(), DL.getInlinedAt())); 
+    EntryBuilder->setDebugLoc(DILocation::get(
+        Inst.getContext(), 0, 0, DL.getScope(), DL.getInlinedAt()));
   else
     EntryBuilder->setDebugLoc(DebugLoc());
 
@@ -2934,57 +2934,57 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
 }
 
 void IRTranslator::finalizeBasicBlock() {
-  for (auto &BTB : SL->BitTestCases) { 
-    // Emit header first, if it wasn't already emitted. 
-    if (!BTB.Emitted) 
-      emitBitTestHeader(BTB, BTB.Parent); 
- 
-    BranchProbability UnhandledProb = BTB.Prob; 
-    for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) { 
-      UnhandledProb -= BTB.Cases[j].ExtraProb; 
-      // Set the current basic block to the mbb we wish to insert the code into 
-      MachineBasicBlock *MBB = BTB.Cases[j].ThisBB; 
-      // If all cases cover a contiguous range, it is not necessary to jump to 
-      // the default block after the last bit test fails. This is because the 
-      // range check during bit test header creation has guaranteed that every 
-      // case here doesn't go outside the range. In this case, there is no need 
-      // to perform the last bit test, as it will always be true. Instead, make 
-      // the second-to-last bit-test fall through to the target of the last bit 
-      // test, and delete the last bit test. 
- 
-      MachineBasicBlock *NextMBB; 
-      if (BTB.ContiguousRange && j + 2 == ej) { 
-        // Second-to-last bit-test with contiguous range: fall through to the 
-        // target of the final bit test. 
-        NextMBB = BTB.Cases[j + 1].TargetBB; 
-      } else if (j + 1 == ej) { 
-        // For the last bit test, fall through to Default. 
-        NextMBB = BTB.Default; 
-      } else { 
-        // Otherwise, fall through to the next bit test. 
-        NextMBB = BTB.Cases[j + 1].ThisBB; 
-      } 
- 
-      emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB); 
- 
-      // FIXME delete this block below? 
-      if (BTB.ContiguousRange && j + 2 == ej) { 
-        // Since we're not going to use the final bit test, remove it. 
-        BTB.Cases.pop_back(); 
-        break; 
-      } 
-    } 
-    // This is "default" BB. We have two jumps to it. From "header" BB and from 
-    // last "case" BB, unless the latter was skipped. 
-    CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(), 
-                                   BTB.Default->getBasicBlock()}; 
-    addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent); 
-    if (!BTB.ContiguousRange) { 
-      addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB); 
-    } 
-  } 
-  SL->BitTestCases.clear(); 
- 
+  for (auto &BTB : SL->BitTestCases) {
+    // Emit header first, if it wasn't already emitted.
+    if (!BTB.Emitted)
+      emitBitTestHeader(BTB, BTB.Parent);
+
+    BranchProbability UnhandledProb = BTB.Prob;
+    for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
+      UnhandledProb -= BTB.Cases[j].ExtraProb;
+      // Set the current basic block to the mbb we wish to insert the code into
+      MachineBasicBlock *MBB = BTB.Cases[j].ThisBB;
+      // If all cases cover a contiguous range, it is not necessary to jump to
+      // the default block after the last bit test fails. This is because the
+      // range check during bit test header creation has guaranteed that every
+      // case here doesn't go outside the range. In this case, there is no need
+      // to perform the last bit test, as it will always be true. Instead, make
+      // the second-to-last bit-test fall through to the target of the last bit
+      // test, and delete the last bit test.
+
+      MachineBasicBlock *NextMBB;
+      if (BTB.ContiguousRange && j + 2 == ej) {
+        // Second-to-last bit-test with contiguous range: fall through to the
+        // target of the final bit test.
+        NextMBB = BTB.Cases[j + 1].TargetBB;
+      } else if (j + 1 == ej) {
+        // For the last bit test, fall through to Default.
+        NextMBB = BTB.Default;
+      } else {
+        // Otherwise, fall through to the next bit test.
+        NextMBB = BTB.Cases[j + 1].ThisBB;
+      }
+
+      emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
+
+      // FIXME delete this block below?
+      if (BTB.ContiguousRange && j + 2 == ej) {
+        // Since we're not going to use the final bit test, remove it.
+        BTB.Cases.pop_back();
+        break;
+      }
+    }
+    // This is "default" BB. We have two jumps to it. From "header" BB and from
+    // last "case" BB, unless the latter was skipped.
+    CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(),
+                                   BTB.Default->getBasicBlock()};
+    addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent);
+    if (!BTB.ContiguousRange) {
+      addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB);
+    }
+  }
+  SL->BitTestCases.clear();
+
   for (auto &JTCase : SL->JTCases) {
     // Emit header first, if it wasn't already emitted.
     if (!JTCase.first.Emitted)
@@ -2993,10 +2993,10 @@ void IRTranslator::finalizeBasicBlock() {
     emitJumpTable(JTCase.second, JTCase.second.MBB);
   }
   SL->JTCases.clear();
- 
-  for (auto &SwCase : SL->SwitchCases) 
-    emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder); 
-  SL->SwitchCases.clear(); 
+
+  for (auto &SwCase : SL->SwitchCases)
+    emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
+  SL->SwitchCases.clear();
 }
 
 void IRTranslator::finalizeFunction() {
@@ -3058,24 +3058,24 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
   MRI = &MF->getRegInfo();
   DL = &F.getParent()->getDataLayout();
   ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
-  const TargetMachine &TM = MF->getTarget(); 
-  TM.resetTargetOptions(F); 
-  EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F); 
+  const TargetMachine &TM = MF->getTarget();
+  TM.resetTargetOptions(F);
+  EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
   FuncInfo.MF = MF;
-  if (EnableOpts) 
-    FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); 
-  else 
-    FuncInfo.BPI = nullptr; 
- 
-  FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF); 
- 
+  if (EnableOpts)
+    FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+  else
+    FuncInfo.BPI = nullptr;
+
+  FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF);
+
   const auto &TLI = *MF->getSubtarget().getTargetLowering();
- 
+
   SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo);
   SL->init(TLI, TM, *DL);
 
 
- 
+
   assert(PendingPHIs.empty() && "stale PHIs");
 
   if (!DL->isLittleEndian()) {
@@ -3142,7 +3142,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
     }
   }
 
-  if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) { 
+  if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) {
     OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                F.getSubprogram(), &F.getEntryBlock());
     R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index 8bdf9f8862..bb4d41cfd6 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -562,11 +562,11 @@ bool InlineAsmLowering::lowerInlineAsm(
       }
 
       unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs);
-      if (OpInfo.Regs.front().isVirtual()) { 
-        // Put the register class of the virtual registers in the flag word. 
-        const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front()); 
-        Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); 
-      } 
+      if (OpInfo.Regs.front().isVirtual()) {
+        // Put the register class of the virtual registers in the flag word.
+        const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front());
+        Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+      }
       Inst.addImm(Flag);
       if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder))
         return false;
@@ -662,7 +662,7 @@ bool InlineAsmLowering::lowerAsmOperandForConstraint(
   default:
     return false;
   case 'i': // Simple Integer or Relocatable Constant
-  case 'n': // immediate integer with a known value. 
+  case 'n': // immediate integer with a known value.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
       assert(CI->getBitWidth() <= 64 &&
              "expected immediate to fit into 64-bits");
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index bbd09edaf1..25fae54871 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -41,7 +41,7 @@ static cl::opt<std::string>
                    cl::desc("Record GlobalISel rule coverage files of this "
                             "prefix if instrumentation was generated"));
 #else
-static const std::string CoveragePrefix; 
+static const std::string CoveragePrefix;
 #endif
 
 char InstructionSelect::ID = 0;
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 1f39b5bf2c..4fec9e628d 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -38,7 +38,7 @@ bool InstructionSelector::isOperandImmEqual(
     const MachineRegisterInfo &MRI) const {
   if (MO.isReg() && MO.getReg())
     if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI))
-      return VRegVal->Value.getSExtValue() == Value; 
+      return VRegVal->Value.getSExtValue() == Value;
   return false;
 }
 
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 5d2979e053..1993f60332 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -10,17 +10,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-// Enable optimizations to work around MSVC debug mode bug in 32-bit: 
-// https://developercommunity.visualstudio.com/content/problem/1179643/msvc-copies-overaligned-non-trivially-copyable-par.html 
-// FIXME: Remove this when the issue is closed. 
-#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86) 
-// We have to disable runtime checks in order to enable optimizations. This is 
-// done for the entire file because the problem is actually observed in STL 
-// template functions. 
-#pragma runtime_checks("", off) 
-#pragma optimize("gs", on) 
-#endif 
- 
+// Enable optimizations to work around MSVC debug mode bug in 32-bit:
+// https://developercommunity.visualstudio.com/content/problem/1179643/msvc-copies-overaligned-non-trivially-copyable-par.html
+// FIXME: Remove this when the issue is closed.
+#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)
+// We have to disable runtime checks in order to enable optimizations. This is
+// done for the entire file because the problem is actually observed in STL
+// template functions.
+#pragma runtime_checks("", off)
+#pragma optimize("gs", on)
+#endif
+
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
 
 using namespace llvm;
@@ -35,7 +35,7 @@ LegalityPredicates::typeInSet(unsigned TypeIdx,
                               std::initializer_list<LLT> TypesInit) {
   SmallVector<LLT, 4> Types = TypesInit;
   return [=](const LegalityQuery &Query) {
-    return llvm::is_contained(Types, Query.Types[TypeIdx]); 
+    return llvm::is_contained(Types, Query.Types[TypeIdx]);
   };
 }
 
@@ -45,7 +45,7 @@ LegalityPredicate LegalityPredicates::typePairInSet(
   SmallVector<std::pair<LLT, LLT>, 4> Types = TypesInit;
   return [=](const LegalityQuery &Query) {
     std::pair<LLT, LLT> Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1]};
-    return llvm::is_contained(Types, Match); 
+    return llvm::is_contained(Types, Match);
   };
 }
 
@@ -57,10 +57,10 @@ LegalityPredicate LegalityPredicates::typePairAndMemDescInSet(
     TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
                                 Query.MMODescrs[MMOIdx].SizeInBits,
                                 Query.MMODescrs[MMOIdx].AlignInBits};
-    return llvm::any_of(TypesAndMemDesc, 
-                        [=](const TypePairAndMemDesc &Entry) -> bool { 
-                          return Match.isCompatible(Entry); 
-                        }); 
+    return llvm::any_of(TypesAndMemDesc,
+                        [=](const TypePairAndMemDesc &Entry) -> bool {
+                          return Match.isCompatible(Entry);
+                        });
   };
 }
 
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index a5169a9239..f3ba3f0801 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -43,16 +43,16 @@ LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
   };
 }
 
-LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx, 
-                                                        unsigned FromTypeIdx) { 
-  return [=](const LegalityQuery &Query) { 
-    const LLT OldTy = Query.Types[TypeIdx]; 
-    const LLT NewTy = Query.Types[FromTypeIdx]; 
-    const LLT NewEltTy = LLT::scalar(NewTy.getScalarSizeInBits()); 
-    return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy)); 
-  }; 
-} 
- 
+LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx,
+                                                        unsigned FromTypeIdx) {
+  return [=](const LegalityQuery &Query) {
+    const LLT OldTy = Query.Types[TypeIdx];
+    const LLT NewTy = Query.Types[FromTypeIdx];
+    const LLT NewEltTy = LLT::scalar(NewTy.getScalarSizeInBits());
+    return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy));
+  };
+}
+
 LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx,
                                                                unsigned Min) {
   return [=](const LegalityQuery &Query) {
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Legalizer.cpp
index c0629d955d..5ba9367cac 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -284,7 +284,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
                                             WrapperObserver)) {
         WorkListObserver.printNewInstrs();
         for (auto *DeadMI : DeadInstructions) {
-          LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI); 
+          LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI);
           RemoveDeadInstFromLists(DeadMI);
           DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
         }
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 995abb85d0..66871ca3b9 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -16,7 +16,7 @@
 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
@@ -30,7 +30,7 @@
 
 using namespace llvm;
 using namespace LegalizeActions;
-using namespace MIPatternMatch; 
+using namespace MIPatternMatch;
 
 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
 ///
@@ -77,8 +77,8 @@ static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
     return Type::getFloatTy(Ctx);
   case 64:
     return Type::getDoubleTy(Ctx);
-  case 80: 
-    return Type::getX86_FP80Ty(Ctx); 
+  case 80:
+    return Type::getX86_FP80Ty(Ctx);
   case 128:
     return Type::getFP128Ty(Ctx);
   default:
@@ -90,15 +90,15 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF,
                                  GISelChangeObserver &Observer,
                                  MachineIRBuilder &Builder)
     : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
-      LI(*MF.getSubtarget().getLegalizerInfo()), 
-      TLI(*MF.getSubtarget().getTargetLowering()) { } 
+      LI(*MF.getSubtarget().getLegalizerInfo()),
+      TLI(*MF.getSubtarget().getTargetLowering()) { }
 
 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
                                  GISelChangeObserver &Observer,
                                  MachineIRBuilder &B)
-  : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI), 
-    TLI(*MF.getSubtarget().getTargetLowering()) { } 
- 
+  : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
+    TLI(*MF.getSubtarget().getTargetLowering()) { }
+
 LegalizerHelper::LegalizeResult
 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
   LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
@@ -240,20 +240,20 @@ void LegalizerHelper::insertParts(Register DstReg,
   }
 }
 
-/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. 
+/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
 static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
                               const MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
 
-  const int StartIdx = Regs.size(); 
+  const int StartIdx = Regs.size();
   const int NumResults = MI.getNumOperands() - 1;
-  Regs.resize(Regs.size() + NumResults); 
+  Regs.resize(Regs.size() + NumResults);
   for (int I = 0; I != NumResults; ++I)
-    Regs[StartIdx + I] = MI.getOperand(I).getReg(); 
+    Regs[StartIdx + I] = MI.getOperand(I).getReg();
 }
 
-void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, 
-                                     LLT GCDTy, Register SrcReg) { 
+void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
+                                     LLT GCDTy, Register SrcReg) {
   LLT SrcTy = MRI.getType(SrcReg);
   if (SrcTy == GCDTy) {
     // If the source already evenly divides the result type, we don't need to do
@@ -264,13 +264,13 @@ void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
     auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
     getUnmergeResults(Parts, *Unmerge);
   }
-} 
+}
 
-LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, 
-                                    LLT NarrowTy, Register SrcReg) { 
-  LLT SrcTy = MRI.getType(SrcReg); 
-  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); 
-  extractGCDType(Parts, GCDTy, SrcReg); 
+LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
+                                    LLT NarrowTy, Register SrcReg) {
+  LLT SrcTy = MRI.getType(SrcReg);
+  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
+  extractGCDType(Parts, GCDTy, SrcReg);
   return GCDTy;
 }
 
@@ -384,14 +384,14 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
   }
 
   if (LCMTy.isVector()) {
-    unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits(); 
-    SmallVector<Register, 8> UnmergeDefs(NumDefs); 
-    UnmergeDefs[0] = DstReg; 
-    for (unsigned I = 1; I != NumDefs; ++I) 
-      UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy); 
- 
-    MIRBuilder.buildUnmerge(UnmergeDefs, 
-                            MIRBuilder.buildMerge(LCMTy, RemergeRegs)); 
+    unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
+    SmallVector<Register, 8> UnmergeDefs(NumDefs);
+    UnmergeDefs[0] = DstReg;
+    for (unsigned I = 1; I != NumDefs; ++I)
+      UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
+
+    MIRBuilder.buildUnmerge(UnmergeDefs,
+                            MIRBuilder.buildMerge(LCMTy, RemergeRegs));
     return;
   }
 
@@ -399,20 +399,20 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
 }
 
 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
-#define RTLIBCASE_INT(LibcallPrefix)                                           \ 
-  do {                                                                         \ 
-    switch (Size) {                                                            \ 
-    case 32:                                                                   \ 
-      return RTLIB::LibcallPrefix##32;                                         \ 
-    case 64:                                                                   \ 
-      return RTLIB::LibcallPrefix##64;                                         \ 
-    case 128:                                                                  \ 
-      return RTLIB::LibcallPrefix##128;                                        \ 
-    default:                                                                   \ 
-      llvm_unreachable("unexpected size");                                     \ 
-    }                                                                          \ 
-  } while (0) 
- 
+#define RTLIBCASE_INT(LibcallPrefix)                                           \
+  do {                                                                         \
+    switch (Size) {                                                            \
+    case 32:                                                                   \
+      return RTLIB::LibcallPrefix##32;                                         \
+    case 64:                                                                   \
+      return RTLIB::LibcallPrefix##64;                                         \
+    case 128:                                                                  \
+      return RTLIB::LibcallPrefix##128;                                        \
+    default:                                                                   \
+      llvm_unreachable("unexpected size");                                     \
+    }                                                                          \
+  } while (0)
+
 #define RTLIBCASE(LibcallPrefix)                                               \
   do {                                                                         \
     switch (Size) {                                                            \
@@ -420,8 +420,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
       return RTLIB::LibcallPrefix##32;                                         \
     case 64:                                                                   \
       return RTLIB::LibcallPrefix##64;                                         \
-    case 80:                                                                   \ 
-      return RTLIB::LibcallPrefix##80;                                         \ 
+    case 80:                                                                   \
+      return RTLIB::LibcallPrefix##80;                                         \
     case 128:                                                                  \
       return RTLIB::LibcallPrefix##128;                                        \
     default:                                                                   \
@@ -431,15 +431,15 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
 
   switch (Opcode) {
   case TargetOpcode::G_SDIV:
-    RTLIBCASE_INT(SDIV_I); 
+    RTLIBCASE_INT(SDIV_I);
   case TargetOpcode::G_UDIV:
-    RTLIBCASE_INT(UDIV_I); 
+    RTLIBCASE_INT(UDIV_I);
   case TargetOpcode::G_SREM:
-    RTLIBCASE_INT(SREM_I); 
+    RTLIBCASE_INT(SREM_I);
   case TargetOpcode::G_UREM:
-    RTLIBCASE_INT(UREM_I); 
+    RTLIBCASE_INT(UREM_I);
   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
-    RTLIBCASE_INT(CTLZ_I); 
+    RTLIBCASE_INT(CTLZ_I);
   case TargetOpcode::G_FADD:
     RTLIBCASE(ADD_F);
   case TargetOpcode::G_FSUB:
@@ -482,16 +482,16 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
     RTLIBCASE(RINT_F);
   case TargetOpcode::G_FNEARBYINT:
     RTLIBCASE(NEARBYINT_F);
-  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: 
-    RTLIBCASE(ROUNDEVEN_F); 
+  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+    RTLIBCASE(ROUNDEVEN_F);
   }
   llvm_unreachable("Unknown libcall function");
 }
 
 /// True if an instruction is in tail position in its caller. Intended for
 /// legalizing libcalls as tail calls when possible.
-static bool isLibCallInTailPosition(const TargetInstrInfo &TII, 
-                                    MachineInstr &MI) { 
+static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
+                                    MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
   const Function &F = MBB.getParent()->getFunction();
 
@@ -566,7 +566,7 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
 
   SmallVector<CallLowering::ArgInfo, 3> Args;
   // Add all the args, except for the last which is an imm denoting 'tail'.
-  for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) { 
+  for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
     Register Reg = MI.getOperand(i).getReg();
 
     // Need derive an IR type for call lowering.
@@ -582,14 +582,14 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
   RTLIB::Libcall RTLibcall;
-  switch (MI.getOpcode()) { 
-  case TargetOpcode::G_MEMCPY: 
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_MEMCPY:
     RTLibcall = RTLIB::MEMCPY;
     break;
-  case TargetOpcode::G_MEMMOVE: 
-    RTLibcall = RTLIB::MEMMOVE; 
-    break; 
-  case TargetOpcode::G_MEMSET: 
+  case TargetOpcode::G_MEMMOVE:
+    RTLibcall = RTLIB::MEMMOVE;
+    break;
+  case TargetOpcode::G_MEMSET:
     RTLibcall = RTLIB::MEMSET;
     break;
   default:
@@ -601,8 +601,8 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
   Info.Callee = MachineOperand::CreateES(Name);
   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
-  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() && 
-                    isLibCallInTailPosition(MIRBuilder.getTII(), MI); 
+  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
+                    isLibCallInTailPosition(MIRBuilder.getTII(), MI);
 
   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
   if (!CLI.lowerCall(MIRBuilder, Info))
@@ -695,11 +695,11 @@ LegalizerHelper::libcall(MachineInstr &MI) {
   case TargetOpcode::G_FMAXNUM:
   case TargetOpcode::G_FSQRT:
   case TargetOpcode::G_FRINT:
-  case TargetOpcode::G_FNEARBYINT: 
-  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { 
+  case TargetOpcode::G_FNEARBYINT:
+  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
     Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
-    if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { 
-      LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); 
+    if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
+      LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
       return UnableToLegalize;
     }
     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
@@ -748,14 +748,14 @@ LegalizerHelper::libcall(MachineInstr &MI) {
       return Status;
     break;
   }
-  case TargetOpcode::G_MEMCPY: 
-  case TargetOpcode::G_MEMMOVE: 
-  case TargetOpcode::G_MEMSET: { 
-    LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI); 
-    MI.eraseFromParent(); 
-    return Result; 
+  case TargetOpcode::G_MEMCPY:
+  case TargetOpcode::G_MEMMOVE:
+  case TargetOpcode::G_MEMSET: {
+    LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
+    MI.eraseFromParent();
+    return Result;
+  }
   }
-  } 
 
   MI.eraseFromParent();
   return Legalized;
@@ -935,7 +935,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
   case TargetOpcode::G_INSERT:
     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
   case TargetOpcode::G_LOAD: {
-    auto &MMO = **MI.memoperands_begin(); 
+    auto &MMO = **MI.memoperands_begin();
     Register DstReg = MI.getOperand(0).getReg();
     LLT DstTy = MRI.getType(DstReg);
     if (DstTy.isVector())
@@ -959,15 +959,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
 
     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
     auto &MMO = **MI.memoperands_begin();
-    unsigned MemSize = MMO.getSizeInBits(); 
- 
-    if (MemSize == NarrowSize) { 
+    unsigned MemSize = MMO.getSizeInBits();
+
+    if (MemSize == NarrowSize) {
       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
-    } else if (MemSize < NarrowSize) { 
+    } else if (MemSize < NarrowSize) {
       MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
-    } else if (MemSize > NarrowSize) { 
-      // FIXME: Need to split the load. 
-      return UnableToLegalize; 
+    } else if (MemSize > NarrowSize) {
+      // FIXME: Need to split the load.
+      return UnableToLegalize;
     }
 
     if (ZExt)
@@ -1063,11 +1063,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     Observer.changedInstr(MI);
     return Legalized;
   case TargetOpcode::G_PHI: {
-    // FIXME: add support for when SizeOp0 isn't an exact multiple of 
-    // NarrowSize. 
-    if (SizeOp0 % NarrowSize != 0) 
-      return UnableToLegalize; 
- 
+    // FIXME: add support for when SizeOp0 isn't an exact multiple of
+    // NarrowSize.
+    if (SizeOp0 % NarrowSize != 0)
+      return UnableToLegalize;
+
     unsigned NumParts = SizeOp0 / NarrowSize;
     SmallVector<Register, 2> DstRegs(NumParts);
     SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
@@ -1248,7 +1248,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     MI.eraseFromParent();
     return Legalized;
   }
-  case TargetOpcode::G_PTR_ADD: 
+  case TargetOpcode::G_PTR_ADD:
   case TargetOpcode::G_PTRMASK: {
     if (TypeIdx != 1)
       return UnableToLegalize;
@@ -1257,17 +1257,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     Observer.changedInstr(MI);
     return Legalized;
   }
-  case TargetOpcode::G_FPTOUI: 
-  case TargetOpcode::G_FPTOSI: 
-    return narrowScalarFPTOI(MI, TypeIdx, NarrowTy); 
-  case TargetOpcode::G_FPEXT: 
-    if (TypeIdx != 0) 
-      return UnableToLegalize; 
-    Observer.changingInstr(MI); 
-    narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT); 
-    Observer.changedInstr(MI); 
-    return Legalized; 
-  } 
+  case TargetOpcode::G_FPTOUI:
+  case TargetOpcode::G_FPTOSI:
+    return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
+  case TargetOpcode::G_FPEXT:
+    if (TypeIdx != 0)
+      return UnableToLegalize;
+    Observer.changingInstr(MI);
+    narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
 }
 
 Register LegalizerHelper::coerceToScalar(Register Val) {
@@ -1328,7 +1328,7 @@ void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
                                             unsigned OpIdx) {
   MachineOperand &MO = MI.getOperand(OpIdx);
   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
-  MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); 
+  MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
 }
 
 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
@@ -1496,40 +1496,40 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
   return Legalized;
 }
 
-Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) { 
-  Register WideReg = MRI.createGenericVirtualRegister(WideTy); 
-  LLT OrigTy = MRI.getType(OrigReg); 
-  LLT LCMTy = getLCMType(WideTy, OrigTy); 
- 
-  const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits(); 
-  const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits(); 
- 
-  Register UnmergeSrc = WideReg; 
- 
-  // Create a merge to the LCM type, padding with undef 
-  // %0:_(<3 x s32>) = G_FOO => <4 x s32> 
-  // => 
-  // %1:_(<4 x s32>) = G_FOO 
-  // %2:_(<4 x s32>) = G_IMPLICIT_DEF 
-  // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2 
-  // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3 
-  if (NumMergeParts > 1) { 
-    Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0); 
-    SmallVector<Register, 8> MergeParts(NumMergeParts, Undef); 
-    MergeParts[0] = WideReg; 
-    UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0); 
-  } 
- 
-  // Unmerge to the original register and pad with dead defs. 
-  SmallVector<Register, 8> UnmergeResults(NumUnmergeParts); 
-  UnmergeResults[0] = OrigReg; 
-  for (int I = 1; I != NumUnmergeParts; ++I) 
-    UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy); 
- 
-  MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc); 
-  return WideReg; 
-} 
- 
+Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
+  Register WideReg = MRI.createGenericVirtualRegister(WideTy);
+  LLT OrigTy = MRI.getType(OrigReg);
+  LLT LCMTy = getLCMType(WideTy, OrigTy);
+
+  const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
+  const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
+
+  Register UnmergeSrc = WideReg;
+
+  // Create a merge to the LCM type, padding with undef
+  // %0:_(<3 x s32>) = G_FOO => <4 x s32>
+  // =>
+  // %1:_(<4 x s32>) = G_FOO
+  // %2:_(<4 x s32>) = G_IMPLICIT_DEF
+  // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
+  // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
+  if (NumMergeParts > 1) {
+    Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
+    SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
+    MergeParts[0] = WideReg;
+    UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
+  }
+
+  // Unmerge to the original register and pad with dead defs.
+  SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
+  UnmergeResults[0] = OrigReg;
+  for (int I = 1; I != NumUnmergeParts; ++I)
+    UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
+
+  MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
+  return WideReg;
+}
+
 LegalizerHelper::LegalizeResult
 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
                                           LLT WideTy) {
@@ -1599,60 +1599,60 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
 
   auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
 
-  // Create a sequence of unmerges and merges to the original results. Since we 
-  // may have widened the source, we will need to pad the results with dead defs 
-  // to cover the source register. 
-  // e.g. widen s48 to s64: 
-  // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96) 
+  // Create a sequence of unmerges and merges to the original results. Since we
+  // may have widened the source, we will need to pad the results with dead defs
+  // to cover the source register.
+  // e.g. widen s48 to s64:
+  // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
   //
   // =>
-  //  %4:_(s192) = G_ANYEXT %0:_(s96) 
-  //  %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge 
-  //  ; unpack to GCD type, with extra dead defs 
-  //  %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64) 
-  //  %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64) 
-  //  dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64) 
-  //  %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10   ; Remerge to destination 
-  //  %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination 
-  const LLT GCDTy = getGCDType(WideTy, DstTy); 
+  //  %4:_(s192) = G_ANYEXT %0:_(s96)
+  //  %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
+  //  ; unpack to GCD type, with extra dead defs
+  //  %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
+  //  %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
+  //  dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
+  //  %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10   ; Remerge to destination
+  //  %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
+  const LLT GCDTy = getGCDType(WideTy, DstTy);
   const int NumUnmerge = Unmerge->getNumOperands() - 1;
-  const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits(); 
-
-  // Directly unmerge to the destination without going through a GCD type 
-  // if possible 
-  if (PartsPerRemerge == 1) { 
-    const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); 
-
-    for (int I = 0; I != NumUnmerge; ++I) { 
-      auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 
- 
-      for (int J = 0; J != PartsPerUnmerge; ++J) { 
-        int Idx = I * PartsPerUnmerge + J; 
-        if (Idx < NumDst) 
-          MIB.addDef(MI.getOperand(Idx).getReg()); 
-        else { 
-          // Create dead def for excess components. 
-          MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); 
-        } 
+  const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
+
+  // Directly unmerge to the destination without going through a GCD type
+  // if possible
+  if (PartsPerRemerge == 1) {
+    const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
+
+    for (int I = 0; I != NumUnmerge; ++I) {
+      auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+
+      for (int J = 0; J != PartsPerUnmerge; ++J) {
+        int Idx = I * PartsPerUnmerge + J;
+        if (Idx < NumDst)
+          MIB.addDef(MI.getOperand(Idx).getReg());
+        else {
+          // Create dead def for excess components.
+          MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
+        }
       }
- 
-      MIB.addUse(Unmerge.getReg(I)); 
+
+      MIB.addUse(Unmerge.getReg(I));
+    }
+  } else {
+    SmallVector<Register, 16> Parts;
+    for (int J = 0; J != NumUnmerge; ++J)
+      extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
+
+    SmallVector<Register, 8> RemergeParts;
+    for (int I = 0; I != NumDst; ++I) {
+      for (int J = 0; J < PartsPerRemerge; ++J) {
+        const int Idx = I * PartsPerRemerge + J;
+        RemergeParts.emplace_back(Parts[Idx]);
+      }
+
+      MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
+      RemergeParts.clear();
     }
-  } else { 
-    SmallVector<Register, 16> Parts; 
-    for (int J = 0; J != NumUnmerge; ++J) 
-      extractGCDType(Parts, GCDTy, Unmerge.getReg(J)); 
-
-    SmallVector<Register, 8> RemergeParts; 
-    for (int I = 0; I != NumDst; ++I) { 
-      for (int J = 0; J < PartsPerRemerge; ++J) { 
-        const int Idx = I * PartsPerRemerge + J; 
-        RemergeParts.emplace_back(Parts[Idx]); 
-      } 
- 
-      MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts); 
-      RemergeParts.clear(); 
-    } 
   }
 
   MI.eraseFromParent();
@@ -1702,7 +1702,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
       Src = MIRBuilder.buildAnyExt(WideTy, Src);
       ShiftTy = WideTy;
-    } 
+    }
 
     auto LShr = MIRBuilder.buildLShr(
       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
@@ -1740,7 +1740,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
 LegalizerHelper::LegalizeResult
 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
                                    LLT WideTy) {
-  if (TypeIdx != 0 || WideTy.isVector()) 
+  if (TypeIdx != 0 || WideTy.isVector())
     return UnableToLegalize;
   Observer.changingInstr(MI);
   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
@@ -1750,45 +1750,45 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
 }
 
 LegalizerHelper::LegalizeResult
-LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx, 
-                                     LLT WideTy) { 
-  if (TypeIdx == 1) 
-    return UnableToLegalize; // TODO 
-  unsigned Op = MI.getOpcode(); 
-  unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO 
-                        ? TargetOpcode::G_ADD 
-                        : TargetOpcode::G_SUB; 
-  unsigned ExtOpcode = 
-      Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO 
-          ? TargetOpcode::G_ZEXT 
-          : TargetOpcode::G_SEXT; 
-  auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)}); 
-  auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)}); 
-  // Do the arithmetic in the larger type. 
-  auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}); 
-  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 
-  auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp); 
-  auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp}); 
-  // There is no overflow if the ExtOp is the same as NewOp. 
-  MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp); 
-  // Now trunc the NewOp to the original result. 
-  MIRBuilder.buildTrunc(MI.getOperand(0), NewOp); 
-  MI.eraseFromParent(); 
-  return Legalized; 
-} 
- 
-LegalizerHelper::LegalizeResult 
-LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, 
-                                         LLT WideTy) { 
+LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx,
+                                     LLT WideTy) {
+  if (TypeIdx == 1)
+    return UnableToLegalize; // TODO
+  unsigned Op = MI.getOpcode();
+  unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO
+                        ? TargetOpcode::G_ADD
+                        : TargetOpcode::G_SUB;
+  unsigned ExtOpcode =
+      Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO
+          ? TargetOpcode::G_ZEXT
+          : TargetOpcode::G_SEXT;
+  auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
+  auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
+  // Do the arithmetic in the larger type.
+  auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt});
+  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
+  auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
+  auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
+  // There is no overflow if the ExtOp is the same as NewOp.
+  MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
+  // Now trunc the NewOp to the original result.
+  MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
+                                         LLT WideTy) {
   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
-                  MI.getOpcode() == TargetOpcode::G_SSUBSAT || 
-                  MI.getOpcode() == TargetOpcode::G_SSHLSAT; 
-  bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT || 
-                 MI.getOpcode() == TargetOpcode::G_USHLSAT; 
+                  MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
+                  MI.getOpcode() == TargetOpcode::G_SSHLSAT;
+  bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
+                 MI.getOpcode() == TargetOpcode::G_USHLSAT;
   // We can convert this to:
   //   1. Any extend iN to iM
   //   2. SHL by M-N
-  //   3. [US][ADD|SUB|SHL]SAT 
+  //   3. [US][ADD|SUB|SHL]SAT
   //   4. L/ASHR by M-N
   //
   // It may be more efficient to lower this to a min and a max operation in
@@ -1799,14 +1799,14 @@ LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
   unsigned NewBits = WideTy.getScalarSizeInBits();
   unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
 
-  // Shifts must zero-extend the RHS to preserve the unsigned quantity, and 
-  // must not left shift the RHS to preserve the shift amount. 
+  // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
+  // must not left shift the RHS to preserve the shift amount.
   auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
-  auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2)) 
-                     : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); 
+  auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
+                     : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
   auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
   auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
-  auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK); 
+  auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
 
   auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
                                         {ShiftL, ShiftR}, MI.getFlags());
@@ -1834,18 +1834,18 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     return widenScalarMergeValues(MI, TypeIdx, WideTy);
   case TargetOpcode::G_UNMERGE_VALUES:
     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
-  case TargetOpcode::G_SADDO: 
-  case TargetOpcode::G_SSUBO: 
+  case TargetOpcode::G_SADDO:
+  case TargetOpcode::G_SSUBO:
   case TargetOpcode::G_UADDO:
-  case TargetOpcode::G_USUBO: 
-    return widenScalarAddoSubo(MI, TypeIdx, WideTy); 
+  case TargetOpcode::G_USUBO:
+    return widenScalarAddoSubo(MI, TypeIdx, WideTy);
   case TargetOpcode::G_SADDSAT:
   case TargetOpcode::G_SSUBSAT:
-  case TargetOpcode::G_SSHLSAT: 
+  case TargetOpcode::G_SSHLSAT:
   case TargetOpcode::G_UADDSAT:
   case TargetOpcode::G_USUBSAT:
-  case TargetOpcode::G_USHLSAT: 
-    return widenScalarAddSubShlSat(MI, TypeIdx, WideTy); 
+  case TargetOpcode::G_USHLSAT:
+    return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
   case TargetOpcode::G_CTTZ:
   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
   case TargetOpcode::G_CTLZ:
@@ -2038,22 +2038,22 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     return Legalized;
   case TargetOpcode::G_SITOFP:
     Observer.changingInstr(MI);
- 
-    if (TypeIdx == 0) 
-      widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 
-    else 
-      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 
- 
+
+    if (TypeIdx == 0)
+      widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+    else
+      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+
     Observer.changedInstr(MI);
     return Legalized;
   case TargetOpcode::G_UITOFP:
     Observer.changingInstr(MI);
- 
-    if (TypeIdx == 0) 
-      widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 
-    else 
-      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 
- 
+
+    if (TypeIdx == 0)
+      widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+    else
+      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+
     Observer.changedInstr(MI);
     return Legalized;
   case TargetOpcode::G_LOAD:
@@ -2069,7 +2069,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
       return UnableToLegalize;
 
     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
-    if (!Ty.isScalar()) 
+    if (!Ty.isScalar())
       return UnableToLegalize;
 
     Observer.changingInstr(MI);
@@ -2267,7 +2267,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
   case TargetOpcode::G_FPOW:
   case TargetOpcode::G_INTRINSIC_TRUNC:
   case TargetOpcode::G_INTRINSIC_ROUND:
-  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: 
+  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
     assert(TypeIdx == 0);
     Observer.changingInstr(MI);
 
@@ -2277,15 +2277,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
     Observer.changedInstr(MI);
     return Legalized;
-  case TargetOpcode::G_FPOWI: { 
-    if (TypeIdx != 0) 
-      return UnableToLegalize; 
-    Observer.changingInstr(MI); 
-    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); 
-    widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 
-    Observer.changedInstr(MI); 
-    return Legalized; 
-  } 
+  case TargetOpcode::G_FPOWI: {
+    if (TypeIdx != 0)
+      return UnableToLegalize;
+    Observer.changingInstr(MI);
+    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+    widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
   case TargetOpcode::G_INTTOPTR:
     if (TypeIdx != 1)
       return UnableToLegalize;
@@ -2312,7 +2312,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     // Avoid changing the result vector type if the source element type was
     // requested.
     if (TypeIdx == 1) {
-      MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); 
+      MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
     } else {
       widenScalarDst(MI, WideTy, 0);
     }
@@ -2415,377 +2415,377 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) {
   return UnableToLegalize;
 }
 
-/// Figure out the bit offset into a register when coercing a vector index for 
-/// the wide element type. This is only for the case when promoting vector to 
-/// one with larger elements. 
-// 
-/// 
-/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) 
-/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) 
-static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, 
-                                                   Register Idx, 
-                                                   unsigned NewEltSize, 
-                                                   unsigned OldEltSize) { 
-  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); 
-  LLT IdxTy = B.getMRI()->getType(Idx); 
- 
-  // Now figure out the amount we need to shift to get the target bits. 
-  auto OffsetMask = B.buildConstant( 
-    IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio)); 
-  auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask); 
-  return B.buildShl(IdxTy, OffsetIdx, 
-                    B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0); 
-} 
- 
-/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this 
-/// is casting to a vector with a smaller element size, perform multiple element 
-/// extracts and merge the results. If this is coercing to a vector with larger 
-/// elements, index the bitcasted vector and extract the target element with bit 
-/// operations. This is intended to force the indexing in the native register 
-/// size for architectures that can dynamically index the register file. 
+/// Figure out the bit offset into a register when coercing a vector index for
+/// the wide element type. This is only for the case when promoting vector to
+/// one with larger elements.
+//
+///
+/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
+/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
+static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
+                                                   Register Idx,
+                                                   unsigned NewEltSize,
+                                                   unsigned OldEltSize) {
+  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+  LLT IdxTy = B.getMRI()->getType(Idx);
+
+  // Now figure out the amount we need to shift to get the target bits.
+  auto OffsetMask = B.buildConstant(
+    IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
+  auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
+  return B.buildShl(IdxTy, OffsetIdx,
+                    B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
+}
+
+/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
+/// is casting to a vector with a smaller element size, perform multiple element
+/// extracts and merge the results. If this is coercing to a vector with larger
+/// elements, index the bitcasted vector and extract the target element with bit
+/// operations. This is intended to force the indexing in the native register
+/// size for architectures that can dynamically index the register file.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
+                                         LLT CastTy) {
+  if (TypeIdx != 1)
+    return UnableToLegalize;
+
+  Register Dst = MI.getOperand(0).getReg();
+  Register SrcVec = MI.getOperand(1).getReg();
+  Register Idx = MI.getOperand(2).getReg();
+  LLT SrcVecTy = MRI.getType(SrcVec);
+  LLT IdxTy = MRI.getType(Idx);
+
+  LLT SrcEltTy = SrcVecTy.getElementType();
+  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
+  unsigned OldNumElts = SrcVecTy.getNumElements();
+
+  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
+  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
+
+  const unsigned NewEltSize = NewEltTy.getSizeInBits();
+  const unsigned OldEltSize = SrcEltTy.getSizeInBits();
+  if (NewNumElts > OldNumElts) {
+    // Decreasing the vector element size
+    //
+    // e.g. i64 = extract_vector_elt x:v2i64, y:i32
+    //  =>
+    //  v4i32:castx = bitcast x:v2i64
+    //
+    // i64 = bitcast
+    //   (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
+    //                       (i32 (extract_vector_elt castx, (2 * y + 1)))
+    //
+    if (NewNumElts % OldNumElts != 0)
+      return UnableToLegalize;
+
+    // Type of the intermediate result vector.
+    const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
+    LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
+
+    auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
+
+    SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
+    auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
+
+    for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+      auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
+      auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
+      auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
+      NewOps[I] = Elt.getReg(0);
+    }
+
+    auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
+    MIRBuilder.buildBitcast(Dst, NewVec);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
+  if (NewNumElts < OldNumElts) {
+    if (NewEltSize % OldEltSize != 0)
+      return UnableToLegalize;
+
+    // This only depends on powers of 2 because we use bit tricks to figure out
+    // the bit offset we need to shift to get the target element. A general
+    // expansion could emit division/multiply.
+    if (!isPowerOf2_32(NewEltSize / OldEltSize))
+      return UnableToLegalize;
+
+    // Increasing the vector element size.
+    // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
+    //
+    //   =>
+    //
+    // %cast = G_BITCAST %vec
+    // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
+    // %wide_elt  = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
+    // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
+    // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
+    // %elt_bits = G_LSHR %wide_elt, %offset_bits
+    // %elt = G_TRUNC %elt_bits
+
+    const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+    auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
+
+    // Divide to get the index in the wider element type.
+    auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
+
+    Register WideElt = CastVec;
+    if (CastTy.isVector()) {
+      WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
+                                                     ScaledIdx).getReg(0);
+    }
+
+    // Compute the bit offset into the register of the target element.
+    Register OffsetBits = getBitcastWiderVectorElementOffset(
+      MIRBuilder, Idx, NewEltSize, OldEltSize);
+
+    // Shift the wide element to get the target element.
+    auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
+    MIRBuilder.buildTrunc(Dst, ExtractedBits);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
+  return UnableToLegalize;
+}
+
+/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
+/// TargetReg, while preserving other bits in \p TargetReg.
+///
+/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
+static Register buildBitFieldInsert(MachineIRBuilder &B,
+                                    Register TargetReg, Register InsertReg,
+                                    Register OffsetBits) {
+  LLT TargetTy = B.getMRI()->getType(TargetReg);
+  LLT InsertTy = B.getMRI()->getType(InsertReg);
+  auto ZextVal = B.buildZExt(TargetTy, InsertReg);
+  auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
+
+  // Produce a bitmask of the value to insert
+  auto EltMask = B.buildConstant(
+    TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
+                                   InsertTy.getSizeInBits()));
+  // Shift it into position
+  auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
+  auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
+
+  // Clear out the bits in the wide element
+  auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
+
+  // The value to insert has all zeros already, so stick it into the masked
+  // wide element.
+  return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
+}
+
+/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
+/// is increasing the element size, perform the indexing in the target element
+/// type, and use bit operations to insert at the element position. This is
+/// intended for architectures that can dynamically index the register file and
+/// want to force indexing in the native register size.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
+                                        LLT CastTy) {
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+
+  Register Dst = MI.getOperand(0).getReg();
+  Register SrcVec = MI.getOperand(1).getReg();
+  Register Val = MI.getOperand(2).getReg();
+  Register Idx = MI.getOperand(3).getReg();
+
+  LLT VecTy = MRI.getType(Dst);
+  LLT IdxTy = MRI.getType(Idx);
+
+  LLT VecEltTy = VecTy.getElementType();
+  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
+  const unsigned NewEltSize = NewEltTy.getSizeInBits();
+  const unsigned OldEltSize = VecEltTy.getSizeInBits();
+
+  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
+  unsigned OldNumElts = VecTy.getNumElements();
+
+  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
+  if (NewNumElts < OldNumElts) {
+    if (NewEltSize % OldEltSize != 0)
+      return UnableToLegalize;
+
+    // This only depends on powers of 2 because we use bit tricks to figure out
+    // the bit offset we need to shift to get the target element. A general
+    // expansion could emit division/multiply.
+    if (!isPowerOf2_32(NewEltSize / OldEltSize))
+      return UnableToLegalize;
+
+    const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+    auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
+
+    // Divide to get the index in the wider element type.
+    auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
+
+    Register ExtractedElt = CastVec;
+    if (CastTy.isVector()) {
+      ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
+                                                          ScaledIdx).getReg(0);
+    }
+
+    // Compute the bit offset into the register of the target element.
+    Register OffsetBits = getBitcastWiderVectorElementOffset(
+      MIRBuilder, Idx, NewEltSize, OldEltSize);
+
+    Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
+                                               Val, OffsetBits);
+    if (CastTy.isVector()) {
+      InsertedElt = MIRBuilder.buildInsertVectorElement(
+        CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
+    }
+
+    MIRBuilder.buildBitcast(Dst, InsertedElt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
+  return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerLoad(MachineInstr &MI) {
+  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
+  Register DstReg = MI.getOperand(0).getReg();
+  Register PtrReg = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  auto &MMO = **MI.memoperands_begin();
+
+  if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
+    if (MI.getOpcode() == TargetOpcode::G_LOAD) {
+      // This load needs splitting into power of 2 sized loads.
+      if (DstTy.isVector())
+        return UnableToLegalize;
+      if (isPowerOf2_32(DstTy.getSizeInBits()))
+        return UnableToLegalize; // Don't know what we're being asked to do.
+
+      // Our strategy here is to generate anyextending loads for the smaller
+      // types up to next power-2 result type, and then combine the two larger
+      // result values together, before truncating back down to the non-pow-2
+      // type.
+      // E.g. v1 = i24 load =>
+      // v2 = i32 zextload (2 byte)
+      // v3 = i32 load (1 byte)
+      // v4 = i32 shl v3, 16
+      // v5 = i32 or v4, v2
+      // v1 = i24 trunc v5
+      // By doing this we generate the correct truncate which should get
+      // combined away as an artifact with a matching extend.
+      uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
+      uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
+
+      MachineFunction &MF = MIRBuilder.getMF();
+      MachineMemOperand *LargeMMO =
+        MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+      MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
+        &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+
+      LLT PtrTy = MRI.getType(PtrReg);
+      unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
+      LLT AnyExtTy = LLT::scalar(AnyExtSize);
+      Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
+      Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
+      auto LargeLoad = MIRBuilder.buildLoadInstr(
+        TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
+
+      auto OffsetCst = MIRBuilder.buildConstant(
+        LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
+      Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+      auto SmallPtr =
+        MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
+      auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
+                                            *SmallMMO);
+
+      auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
+      auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
+      auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+      MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
+      MI.eraseFromParent();
+      return Legalized;
+    }
+
+    MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
+  if (DstTy.isScalar()) {
+    Register TmpReg =
+      MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
+    MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
+    switch (MI.getOpcode()) {
+    default:
+      llvm_unreachable("Unexpected opcode");
+    case TargetOpcode::G_LOAD:
+      MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
+      break;
+    case TargetOpcode::G_SEXTLOAD:
+      MIRBuilder.buildSExt(DstReg, TmpReg);
+      break;
+    case TargetOpcode::G_ZEXTLOAD:
+      MIRBuilder.buildZExt(DstReg, TmpReg);
+      break;
+    }
+
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
+  return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerStore(MachineInstr &MI) {
+  // Lower a non-power of 2 store into multiple pow-2 stores.
+  // E.g. split an i24 store into an i16 store + i8 store.
+  // We do this by first extending the stored value to the next largest power
+  // of 2 type, and then using truncating stores to store the components.
+  // By doing this, likewise with G_LOAD, generate an extend that can be
+  // artifact-combined away instead of leaving behind extracts.
+  Register SrcReg = MI.getOperand(0).getReg();
+  Register PtrReg = MI.getOperand(1).getReg();
+  LLT SrcTy = MRI.getType(SrcReg);
+  MachineMemOperand &MMO = **MI.memoperands_begin();
+  if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
+    return UnableToLegalize;
+  if (SrcTy.isVector())
+    return UnableToLegalize;
+  if (isPowerOf2_32(SrcTy.getSizeInBits()))
+    return UnableToLegalize; // Don't know what we're being asked to do.
+
+  // Extend to the next pow-2.
+  const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
+  auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
+
+  // Obtain the smaller value by shifting away the larger value.
+  uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
+  uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
+  auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
+  auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
+
+  // Generate the PtrAdd and truncating stores.
+  LLT PtrTy = MRI.getType(PtrReg);
+  auto OffsetCst = MIRBuilder.buildConstant(
+    LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
+  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+  auto SmallPtr =
+    MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
+
+  MachineFunction &MF = MIRBuilder.getMF();
+  MachineMemOperand *LargeMMO =
+    MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+  MachineMemOperand *SmallMMO =
+    MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+  MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
+  MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
+  MI.eraseFromParent();
+  return Legalized;
+}
+
 LegalizerHelper::LegalizeResult
-LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, 
-                                         LLT CastTy) { 
-  if (TypeIdx != 1) 
-    return UnableToLegalize; 
- 
-  Register Dst = MI.getOperand(0).getReg(); 
-  Register SrcVec = MI.getOperand(1).getReg(); 
-  Register Idx = MI.getOperand(2).getReg(); 
-  LLT SrcVecTy = MRI.getType(SrcVec); 
-  LLT IdxTy = MRI.getType(Idx); 
- 
-  LLT SrcEltTy = SrcVecTy.getElementType(); 
-  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; 
-  unsigned OldNumElts = SrcVecTy.getNumElements(); 
- 
-  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; 
-  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); 
- 
-  const unsigned NewEltSize = NewEltTy.getSizeInBits(); 
-  const unsigned OldEltSize = SrcEltTy.getSizeInBits(); 
-  if (NewNumElts > OldNumElts) { 
-    // Decreasing the vector element size 
-    // 
-    // e.g. i64 = extract_vector_elt x:v2i64, y:i32 
-    //  => 
-    //  v4i32:castx = bitcast x:v2i64 
-    // 
-    // i64 = bitcast 
-    //   (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), 
-    //                       (i32 (extract_vector_elt castx, (2 * y + 1))) 
-    // 
-    if (NewNumElts % OldNumElts != 0) 
-      return UnableToLegalize; 
- 
-    // Type of the intermediate result vector. 
-    const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts; 
-    LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy); 
- 
-    auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt); 
- 
-    SmallVector<Register, 8> NewOps(NewEltsPerOldElt); 
-    auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK); 
- 
-    for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { 
-      auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I); 
-      auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset); 
-      auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx); 
-      NewOps[I] = Elt.getReg(0); 
-    } 
- 
-    auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps); 
-    MIRBuilder.buildBitcast(Dst, NewVec); 
-    MI.eraseFromParent(); 
-    return Legalized; 
-  } 
- 
-  if (NewNumElts < OldNumElts) { 
-    if (NewEltSize % OldEltSize != 0) 
-      return UnableToLegalize; 
- 
-    // This only depends on powers of 2 because we use bit tricks to figure out 
-    // the bit offset we need to shift to get the target element. A general 
-    // expansion could emit division/multiply. 
-    if (!isPowerOf2_32(NewEltSize / OldEltSize)) 
-      return UnableToLegalize; 
- 
-    // Increasing the vector element size. 
-    // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx 
-    // 
-    //   => 
-    // 
-    // %cast = G_BITCAST %vec 
-    // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize) 
-    // %wide_elt  = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx 
-    // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) 
-    // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) 
-    // %elt_bits = G_LSHR %wide_elt, %offset_bits 
-    // %elt = G_TRUNC %elt_bits 
- 
-    const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); 
-    auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); 
- 
-    // Divide to get the index in the wider element type. 
-    auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); 
- 
-    Register WideElt = CastVec; 
-    if (CastTy.isVector()) { 
-      WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, 
-                                                     ScaledIdx).getReg(0); 
-    } 
- 
-    // Compute the bit offset into the register of the target element. 
-    Register OffsetBits = getBitcastWiderVectorElementOffset( 
-      MIRBuilder, Idx, NewEltSize, OldEltSize); 
- 
-    // Shift the wide element to get the target element. 
-    auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits); 
-    MIRBuilder.buildTrunc(Dst, ExtractedBits); 
-    MI.eraseFromParent(); 
-    return Legalized; 
-  } 
- 
-  return UnableToLegalize; 
-} 
- 
-/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p 
-/// TargetReg, while preserving other bits in \p TargetReg. 
-/// 
-/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset) 
-static Register buildBitFieldInsert(MachineIRBuilder &B, 
-                                    Register TargetReg, Register InsertReg, 
-                                    Register OffsetBits) { 
-  LLT TargetTy = B.getMRI()->getType(TargetReg); 
-  LLT InsertTy = B.getMRI()->getType(InsertReg); 
-  auto ZextVal = B.buildZExt(TargetTy, InsertReg); 
-  auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits); 
- 
-  // Produce a bitmask of the value to insert 
-  auto EltMask = B.buildConstant( 
-    TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(), 
-                                   InsertTy.getSizeInBits())); 
-  // Shift it into position 
-  auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits); 
-  auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask); 
- 
-  // Clear out the bits in the wide element 
-  auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask); 
- 
-  // The value to insert has all zeros already, so stick it into the masked 
-  // wide element. 
-  return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0); 
-} 
- 
-/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this 
-/// is increasing the element size, perform the indexing in the target element 
-/// type, and use bit operations to insert at the element position. This is 
-/// intended for architectures that can dynamically index the register file and 
-/// want to force indexing in the native register size. 
-LegalizerHelper::LegalizeResult 
-LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, 
-                                        LLT CastTy) { 
-  if (TypeIdx != 0) 
-    return UnableToLegalize; 
- 
-  Register Dst = MI.getOperand(0).getReg(); 
-  Register SrcVec = MI.getOperand(1).getReg(); 
-  Register Val = MI.getOperand(2).getReg(); 
-  Register Idx = MI.getOperand(3).getReg(); 
- 
-  LLT VecTy = MRI.getType(Dst); 
-  LLT IdxTy = MRI.getType(Idx); 
- 
-  LLT VecEltTy = VecTy.getElementType(); 
-  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; 
-  const unsigned NewEltSize = NewEltTy.getSizeInBits(); 
-  const unsigned OldEltSize = VecEltTy.getSizeInBits(); 
- 
-  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; 
-  unsigned OldNumElts = VecTy.getNumElements(); 
- 
-  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); 
-  if (NewNumElts < OldNumElts) { 
-    if (NewEltSize % OldEltSize != 0) 
-      return UnableToLegalize; 
- 
-    // This only depends on powers of 2 because we use bit tricks to figure out 
-    // the bit offset we need to shift to get the target element. A general 
-    // expansion could emit division/multiply. 
-    if (!isPowerOf2_32(NewEltSize / OldEltSize)) 
-      return UnableToLegalize; 
- 
-    const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); 
-    auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); 
- 
-    // Divide to get the index in the wider element type. 
-    auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); 
- 
-    Register ExtractedElt = CastVec; 
-    if (CastTy.isVector()) { 
-      ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, 
-                                                          ScaledIdx).getReg(0); 
-    } 
- 
-    // Compute the bit offset into the register of the target element. 
-    Register OffsetBits = getBitcastWiderVectorElementOffset( 
-      MIRBuilder, Idx, NewEltSize, OldEltSize); 
- 
-    Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt, 
-                                               Val, OffsetBits); 
-    if (CastTy.isVector()) { 
-      InsertedElt = MIRBuilder.buildInsertVectorElement( 
-        CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0); 
-    } 
- 
-    MIRBuilder.buildBitcast(Dst, InsertedElt); 
-    MI.eraseFromParent(); 
-    return Legalized; 
-  } 
- 
-  return UnableToLegalize; 
-} 
- 
-LegalizerHelper::LegalizeResult 
-LegalizerHelper::lowerLoad(MachineInstr &MI) { 
-  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  Register PtrReg = MI.getOperand(1).getReg(); 
-  LLT DstTy = MRI.getType(DstReg); 
-  auto &MMO = **MI.memoperands_begin(); 
- 
-  if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { 
-    if (MI.getOpcode() == TargetOpcode::G_LOAD) { 
-      // This load needs splitting into power of 2 sized loads. 
-      if (DstTy.isVector()) 
-        return UnableToLegalize; 
-      if (isPowerOf2_32(DstTy.getSizeInBits())) 
-        return UnableToLegalize; // Don't know what we're being asked to do. 
- 
-      // Our strategy here is to generate anyextending loads for the smaller 
-      // types up to next power-2 result type, and then combine the two larger 
-      // result values together, before truncating back down to the non-pow-2 
-      // type. 
-      // E.g. v1 = i24 load => 
-      // v2 = i32 zextload (2 byte) 
-      // v3 = i32 load (1 byte) 
-      // v4 = i32 shl v3, 16 
-      // v5 = i32 or v4, v2 
-      // v1 = i24 trunc v5 
-      // By doing this we generate the correct truncate which should get 
-      // combined away as an artifact with a matching extend. 
-      uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); 
-      uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; 
- 
-      MachineFunction &MF = MIRBuilder.getMF(); 
-      MachineMemOperand *LargeMMO = 
-        MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); 
-      MachineMemOperand *SmallMMO = MF.getMachineMemOperand( 
-        &MMO, LargeSplitSize / 8, SmallSplitSize / 8); 
- 
-      LLT PtrTy = MRI.getType(PtrReg); 
-      unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); 
-      LLT AnyExtTy = LLT::scalar(AnyExtSize); 
-      Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); 
-      Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); 
-      auto LargeLoad = MIRBuilder.buildLoadInstr( 
-        TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO); 
- 
-      auto OffsetCst = MIRBuilder.buildConstant( 
-        LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); 
-      Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); 
-      auto SmallPtr = 
-        MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); 
-      auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), 
-                                            *SmallMMO); 
- 
-      auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); 
-      auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); 
-      auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); 
-      MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); 
-      MI.eraseFromParent(); 
-      return Legalized; 
-    } 
- 
-    MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 
-    MI.eraseFromParent(); 
-    return Legalized; 
-  } 
- 
-  if (DstTy.isScalar()) { 
-    Register TmpReg = 
-      MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); 
-    MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 
-    switch (MI.getOpcode()) { 
-    default: 
-      llvm_unreachable("Unexpected opcode"); 
-    case TargetOpcode::G_LOAD: 
-      MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg); 
-      break; 
-    case TargetOpcode::G_SEXTLOAD: 
-      MIRBuilder.buildSExt(DstReg, TmpReg); 
-      break; 
-    case TargetOpcode::G_ZEXTLOAD: 
-      MIRBuilder.buildZExt(DstReg, TmpReg); 
-      break; 
-    } 
- 
-    MI.eraseFromParent(); 
-    return Legalized; 
-  } 
- 
-  return UnableToLegalize; 
-} 
- 
-LegalizerHelper::LegalizeResult 
-LegalizerHelper::lowerStore(MachineInstr &MI) { 
-  // Lower a non-power of 2 store into multiple pow-2 stores. 
-  // E.g. split an i24 store into an i16 store + i8 store. 
-  // We do this by first extending the stored value to the next largest power 
-  // of 2 type, and then using truncating stores to store the components. 
-  // By doing this, likewise with G_LOAD, generate an extend that can be 
-  // artifact-combined away instead of leaving behind extracts. 
-  Register SrcReg = MI.getOperand(0).getReg(); 
-  Register PtrReg = MI.getOperand(1).getReg(); 
-  LLT SrcTy = MRI.getType(SrcReg); 
-  MachineMemOperand &MMO = **MI.memoperands_begin(); 
-  if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) 
-    return UnableToLegalize; 
-  if (SrcTy.isVector()) 
-    return UnableToLegalize; 
-  if (isPowerOf2_32(SrcTy.getSizeInBits())) 
-    return UnableToLegalize; // Don't know what we're being asked to do. 
- 
-  // Extend to the next pow-2. 
-  const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); 
-  auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); 
- 
-  // Obtain the smaller value by shifting away the larger value. 
-  uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); 
-  uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; 
-  auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); 
-  auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); 
- 
-  // Generate the PtrAdd and truncating stores. 
-  LLT PtrTy = MRI.getType(PtrReg); 
-  auto OffsetCst = MIRBuilder.buildConstant( 
-    LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); 
-  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); 
-  auto SmallPtr = 
-    MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); 
- 
-  MachineFunction &MF = MIRBuilder.getMF(); 
-  MachineMemOperand *LargeMMO = 
-    MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); 
-  MachineMemOperand *SmallMMO = 
-    MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); 
-  MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); 
-  MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); 
-  MI.eraseFromParent(); 
-  return Legalized; 
-} 
- 
-LegalizerHelper::LegalizeResult 
 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
   switch (MI.getOpcode()) {
   case TargetOpcode::G_LOAD: {
@@ -2833,24 +2833,24 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
     Observer.changedInstr(MI);
     return Legalized;
   }
-  case TargetOpcode::G_EXTRACT_VECTOR_ELT: 
-    return bitcastExtractVectorElt(MI, TypeIdx, CastTy); 
-  case TargetOpcode::G_INSERT_VECTOR_ELT: 
-    return bitcastInsertVectorElt(MI, TypeIdx, CastTy); 
+  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+    return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
+  case TargetOpcode::G_INSERT_VECTOR_ELT:
+    return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
   default:
     return UnableToLegalize;
   }
 }
 
-// Legalize an instruction by changing the opcode in place. 
-void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) { 
-    Observer.changingInstr(MI); 
-    MI.setDesc(MIRBuilder.getTII().get(NewOpcode)); 
-    Observer.changedInstr(MI); 
-} 
- 
+// Legalize an instruction by changing the opcode in place.
+void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
+    Observer.changingInstr(MI);
+    MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
+    Observer.changedInstr(MI);
+}
+
 LegalizerHelper::LegalizeResult
-LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { 
+LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
   using namespace TargetOpcode;
 
   switch(MI.getOpcode()) {
@@ -2860,7 +2860,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     return lowerBitcast(MI);
   case TargetOpcode::G_SREM:
   case TargetOpcode::G_UREM: {
-    LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 
+    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
     auto Quot =
         MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
                               {MI.getOperand(1), MI.getOperand(2)});
@@ -2873,9 +2873,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
   case TargetOpcode::G_SADDO:
   case TargetOpcode::G_SSUBO:
     return lowerSADDO_SSUBO(MI);
-  case TargetOpcode::G_UMULH: 
-  case TargetOpcode::G_SMULH: 
-    return lowerSMULH_UMULH(MI); 
+  case TargetOpcode::G_UMULH:
+  case TargetOpcode::G_SMULH:
+    return lowerSMULH_UMULH(MI);
   case TargetOpcode::G_SMULO:
   case TargetOpcode::G_UMULO: {
     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
@@ -2884,7 +2884,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     Register Overflow = MI.getOperand(1).getReg();
     Register LHS = MI.getOperand(2).getReg();
     Register RHS = MI.getOperand(3).getReg();
-    LLT Ty = MRI.getType(Res); 
+    LLT Ty = MRI.getType(Res);
 
     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
                           ? TargetOpcode::G_SMULH
@@ -2914,24 +2914,24 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     return Legalized;
   }
   case TargetOpcode::G_FNEG: {
-    Register Res = MI.getOperand(0).getReg(); 
-    LLT Ty = MRI.getType(Res); 
- 
+    Register Res = MI.getOperand(0).getReg();
+    LLT Ty = MRI.getType(Res);
+
     // TODO: Handle vector types once we are able to
     // represent them.
     if (Ty.isVector())
       return UnableToLegalize;
-    auto SignMask = 
-        MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits())); 
+    auto SignMask =
+        MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
     Register SubByReg = MI.getOperand(1).getReg();
-    MIRBuilder.buildXor(Res, SubByReg, SignMask); 
+    MIRBuilder.buildXor(Res, SubByReg, SignMask);
     MI.eraseFromParent();
     return Legalized;
   }
   case TargetOpcode::G_FSUB: {
-    Register Res = MI.getOperand(0).getReg(); 
-    LLT Ty = MRI.getType(Res); 
- 
+    Register Res = MI.getOperand(0).getReg();
+    LLT Ty = MRI.getType(Res);
+
     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
     // First, check if G_FNEG is marked as Lower. If so, we may
     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
@@ -2951,12 +2951,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     return lowerFFloor(MI);
   case TargetOpcode::G_INTRINSIC_ROUND:
     return lowerIntrinsicRound(MI);
-  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { 
-    // Since round even is the assumed rounding mode for unconstrained FP 
-    // operations, rint and roundeven are the same operation. 
-    changeOpcode(MI, TargetOpcode::G_FRINT); 
-    return Legalized; 
-  } 
+  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
+    // Since round even is the assumed rounding mode for unconstrained FP
+    // operations, rint and roundeven are the same operation.
+    changeOpcode(MI, TargetOpcode::G_FRINT);
+    return Legalized;
+  }
   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
     Register OldValRes = MI.getOperand(0).getReg();
     Register SuccessRes = MI.getOperand(1).getReg();
@@ -2971,16 +2971,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
   }
   case TargetOpcode::G_LOAD:
   case TargetOpcode::G_SEXTLOAD:
-  case TargetOpcode::G_ZEXTLOAD: 
-    return lowerLoad(MI); 
-  case TargetOpcode::G_STORE: 
-    return lowerStore(MI); 
+  case TargetOpcode::G_ZEXTLOAD:
+    return lowerLoad(MI);
+  case TargetOpcode::G_STORE:
+    return lowerStore(MI);
   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
   case TargetOpcode::G_CTLZ:
   case TargetOpcode::G_CTTZ:
   case TargetOpcode::G_CTPOP:
-    return lowerBitCount(MI); 
+    return lowerBitCount(MI);
   case G_UADDO: {
     Register Res = MI.getOperand(0).getReg();
     Register CarryOut = MI.getOperand(1).getReg();
@@ -3042,24 +3042,24 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     return Legalized;
   }
   case G_UITOFP:
-    return lowerUITOFP(MI); 
+    return lowerUITOFP(MI);
   case G_SITOFP:
-    return lowerSITOFP(MI); 
+    return lowerSITOFP(MI);
   case G_FPTOUI:
-    return lowerFPTOUI(MI); 
+    return lowerFPTOUI(MI);
   case G_FPTOSI:
     return lowerFPTOSI(MI);
   case G_FPTRUNC:
-    return lowerFPTRUNC(MI); 
-  case G_FPOWI: 
-    return lowerFPOWI(MI); 
+    return lowerFPTRUNC(MI);
+  case G_FPOWI:
+    return lowerFPOWI(MI);
   case G_SMIN:
   case G_SMAX:
   case G_UMIN:
   case G_UMAX:
-    return lowerMinMax(MI); 
+    return lowerMinMax(MI);
   case G_FCOPYSIGN:
-    return lowerFCopySign(MI); 
+    return lowerFCopySign(MI);
   case G_FMINNUM:
   case G_FMAXNUM:
     return lowerFMinNumMaxNum(MI);
@@ -3082,9 +3082,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     MI.eraseFromParent();
     return Legalized;
   }
-  case G_EXTRACT_VECTOR_ELT: 
-  case G_INSERT_VECTOR_ELT: 
-    return lowerExtractInsertVectorElt(MI); 
+  case G_EXTRACT_VECTOR_ELT:
+  case G_INSERT_VECTOR_ELT:
+    return lowerExtractInsertVectorElt(MI);
   case G_SHUFFLE_VECTOR:
     return lowerShuffleVector(MI);
   case G_DYN_STACKALLOC:
@@ -3100,123 +3100,123 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
   case G_READ_REGISTER:
   case G_WRITE_REGISTER:
     return lowerReadWriteRegister(MI);
-  case G_UADDSAT: 
-  case G_USUBSAT: { 
-    // Try to make a reasonable guess about which lowering strategy to use. The 
-    // target can override this with custom lowering and calling the 
-    // implementation functions. 
-    LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 
-    if (LI.isLegalOrCustom({G_UMIN, Ty})) 
-      return lowerAddSubSatToMinMax(MI); 
-    return lowerAddSubSatToAddoSubo(MI); 
-  }
-  case G_SADDSAT: 
-  case G_SSUBSAT: { 
-    LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 
- 
-    // FIXME: It would probably make more sense to see if G_SADDO is preferred, 
-    // since it's a shorter expansion. However, we would need to figure out the 
-    // preferred boolean type for the carry out for the query. 
-    if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty})) 
-      return lowerAddSubSatToMinMax(MI); 
-    return lowerAddSubSatToAddoSubo(MI); 
-  } 
-  case G_SSHLSAT: 
-  case G_USHLSAT: 
-    return lowerShlSat(MI); 
-  case G_ABS: { 
-    // Expand %res = G_ABS %a into: 
-    // %v1 = G_ASHR %a, scalar_size-1 
-    // %v2 = G_ADD %a, %v1 
-    // %res = G_XOR %v2, %v1 
-    LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 
-    Register OpReg = MI.getOperand(1).getReg(); 
-    auto ShiftAmt = 
-        MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1); 
-    auto Shift = 
-        MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt); 
-    auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift); 
-    MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift); 
-    MI.eraseFromParent(); 
-    return Legalized; 
-  } 
-  case G_SELECT: 
-    return lowerSelect(MI); 
-  } 
+  case G_UADDSAT:
+  case G_USUBSAT: {
+    // Try to make a reasonable guess about which lowering strategy to use. The
+    // target can override this with custom lowering and calling the
+    // implementation functions.
+    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+    if (LI.isLegalOrCustom({G_UMIN, Ty}))
+      return lowerAddSubSatToMinMax(MI);
+    return lowerAddSubSatToAddoSubo(MI);
+  }
+  case G_SADDSAT:
+  case G_SSUBSAT: {
+    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+    // FIXME: It would probably make more sense to see if G_SADDO is preferred,
+    // since it's a shorter expansion. However, we would need to figure out the
+    // preferred boolean type for the carry out for the query.
+    if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
+      return lowerAddSubSatToMinMax(MI);
+    return lowerAddSubSatToAddoSubo(MI);
+  }
+  case G_SSHLSAT:
+  case G_USHLSAT:
+    return lowerShlSat(MI);
+  case G_ABS: {
+    // Expand %res = G_ABS %a into:
+    // %v1 = G_ASHR %a, scalar_size-1
+    // %v2 = G_ADD %a, %v1
+    // %res = G_XOR %v2, %v1
+    LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+    Register OpReg = MI.getOperand(1).getReg();
+    auto ShiftAmt =
+        MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
+    auto Shift =
+        MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
+    auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
+    MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case G_SELECT:
+    return lowerSelect(MI);
+  }
+}
+
+Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
+                                                  Align MinAlign) const {
+  // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
+  // datalayout for the preferred alignment. Also there should be a target hook
+  // for this to allow targets to reduce the alignment and ignore the
+  // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
+  // the type.
+  return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
+}
+
+MachineInstrBuilder
+LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
+                                      MachinePointerInfo &PtrInfo) {
+  MachineFunction &MF = MIRBuilder.getMF();
+  const DataLayout &DL = MIRBuilder.getDataLayout();
+  int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
+
+  unsigned AddrSpace = DL.getAllocaAddrSpace();
+  LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+
+  PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
+  return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
+}
+
+static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
+                                        LLT VecTy) {
+  int64_t IdxVal;
+  if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
+    return IdxReg;
+
+  LLT IdxTy = B.getMRI()->getType(IdxReg);
+  unsigned NElts = VecTy.getNumElements();
+  if (isPowerOf2_32(NElts)) {
+    APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
+    return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
+  }
+
+  return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
+      .getReg(0);
+}
+
+Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
+                                                  Register Index) {
+  LLT EltTy = VecTy.getElementType();
+
+  // Calculate the element offset and add it to the pointer.
+  unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
+  assert(EltSize * 8 == EltTy.getSizeInBits() &&
+         "Converting bits to bytes lost precision");
+
+  Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
+
+  LLT IdxTy = MRI.getType(Index);
+  auto Mul = MIRBuilder.buildMul(IdxTy, Index,
+                                 MIRBuilder.buildConstant(IdxTy, EltSize));
+
+  LLT PtrTy = MRI.getType(VecPtr);
+  return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
 }
 
-Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty, 
-                                                  Align MinAlign) const { 
-  // FIXME: We're missing a way to go back from LLT to llvm::Type to query the 
-  // datalayout for the preferred alignment. Also there should be a target hook 
-  // for this to allow targets to reduce the alignment and ignore the 
-  // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of 
-  // the type. 
-  return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign); 
-} 
- 
-MachineInstrBuilder 
-LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment, 
-                                      MachinePointerInfo &PtrInfo) { 
-  MachineFunction &MF = MIRBuilder.getMF(); 
-  const DataLayout &DL = MIRBuilder.getDataLayout(); 
-  int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false); 
- 
-  unsigned AddrSpace = DL.getAllocaAddrSpace(); 
-  LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); 
- 
-  PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx); 
-  return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx); 
-} 
- 
-static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg, 
-                                        LLT VecTy) { 
-  int64_t IdxVal; 
-  if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) 
-    return IdxReg; 
- 
-  LLT IdxTy = B.getMRI()->getType(IdxReg); 
-  unsigned NElts = VecTy.getNumElements(); 
-  if (isPowerOf2_32(NElts)) { 
-    APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts)); 
-    return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0); 
-  } 
- 
-  return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1)) 
-      .getReg(0); 
-} 
- 
-Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, 
-                                                  Register Index) { 
-  LLT EltTy = VecTy.getElementType(); 
- 
-  // Calculate the element offset and add it to the pointer. 
-  unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size. 
-  assert(EltSize * 8 == EltTy.getSizeInBits() && 
-         "Converting bits to bytes lost precision"); 
- 
-  Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy); 
- 
-  LLT IdxTy = MRI.getType(Index); 
-  auto Mul = MIRBuilder.buildMul(IdxTy, Index, 
-                                 MIRBuilder.buildConstant(IdxTy, EltSize)); 
- 
-  LLT PtrTy = MRI.getType(VecPtr); 
-  return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); 
-} 
- 
 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
     MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
   Register DstReg = MI.getOperand(0).getReg();
-  LLT DstTy = MRI.getType(DstReg); 
-  LLT LCMTy = getLCMType(DstTy, NarrowTy); 
+  LLT DstTy = MRI.getType(DstReg);
+  LLT LCMTy = getLCMType(DstTy, NarrowTy);
 
-  unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); 
+  unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
 
-  auto NewUndef = MIRBuilder.buildUndef(NarrowTy); 
-  SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0)); 
+  auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
+  SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
 
-  buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 
+  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
   MI.eraseFromParent();
   return Legalized;
 }
@@ -3337,7 +3337,7 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
       return UnableToLegalize;
 
-    NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType()); 
+    NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
   } else {
     NumParts = DstTy.getNumElements();
     NarrowTy1 = SrcTy.getElementType();
@@ -3610,116 +3610,116 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
   return Legalized;
 }
 
-// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces 
-// a vector 
-// 
-// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with 
-// undef as necessary. 
-// 
-// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 
-//   -> <2 x s16> 
-// 
-// %4:_(s16) = G_IMPLICIT_DEF 
-// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 
-// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 
-// %7:_(<2 x s16>) = G_IMPLICIT_DEF 
-// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7 
-// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8 
+// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
+// a vector
+//
+// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
+// undef as necessary.
+//
+// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
+//   -> <2 x s16>
+//
+// %4:_(s16) = G_IMPLICIT_DEF
+// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
+// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
+// %7:_(<2 x s16>) = G_IMPLICIT_DEF
+// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
+// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
 LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, 
-                                          LLT NarrowTy) { 
+LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
+                                          LLT NarrowTy) {
   Register DstReg = MI.getOperand(0).getReg();
   LLT DstTy = MRI.getType(DstReg);
-  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 
-  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); 
-
-  // Break into a common type 
-  SmallVector<Register, 16> Parts; 
-  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 
-    extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg()); 
-
-  // Build the requested new merge, padding with undef. 
-  LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, 
-                                  TargetOpcode::G_ANYEXT); 
-
-  // Pack into the original result register. 
-  buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 
-
-  MI.eraseFromParent(); 
-  return Legalized; 
-} 
-
-LegalizerHelper::LegalizeResult 
-LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, 
-                                                           unsigned TypeIdx, 
-                                                           LLT NarrowVecTy) { 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  Register SrcVec = MI.getOperand(1).getReg(); 
-  Register InsertVal; 
-  bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT; 
- 
-  assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index"); 
-  if (IsInsert) 
-    InsertVal = MI.getOperand(2).getReg(); 
- 
-  Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); 
- 
-  // TODO: Handle total scalarization case. 
-  if (!NarrowVecTy.isVector()) 
-    return UnableToLegalize; 
- 
-  LLT VecTy = MRI.getType(SrcVec); 
- 
-  // If the index is a constant, we can really break this down as you would 
-  // expect, and index into the target size pieces. 
-  int64_t IdxVal; 
-  if (mi_match(Idx, MRI, m_ICst(IdxVal))) { 
-    // Avoid out of bounds indexing the pieces. 
-    if (IdxVal >= VecTy.getNumElements()) { 
-      MIRBuilder.buildUndef(DstReg); 
-      MI.eraseFromParent(); 
-      return Legalized; 
+  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
+
+  // Break into a common type
+  SmallVector<Register, 16> Parts;
+  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
+    extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
+
+  // Build the requested new merge, padding with undef.
+  LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
+                                  TargetOpcode::G_ANYEXT);
+
+  // Pack into the original result register.
+  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
+                                                           unsigned TypeIdx,
+                                                           LLT NarrowVecTy) {
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcVec = MI.getOperand(1).getReg();
+  Register InsertVal;
+  bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
+
+  assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
+  if (IsInsert)
+    InsertVal = MI.getOperand(2).getReg();
+
+  Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
+
+  // TODO: Handle total scalarization case.
+  if (!NarrowVecTy.isVector())
+    return UnableToLegalize;
+
+  LLT VecTy = MRI.getType(SrcVec);
+
+  // If the index is a constant, we can really break this down as you would
+  // expect, and index into the target size pieces.
+  int64_t IdxVal;
+  if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
+    // Avoid out of bounds indexing the pieces.
+    if (IdxVal >= VecTy.getNumElements()) {
+      MIRBuilder.buildUndef(DstReg);
+      MI.eraseFromParent();
+      return Legalized;
     }
 
-    SmallVector<Register, 8> VecParts; 
-    LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); 
-
-    // Build a sequence of NarrowTy pieces in VecParts for this operand. 
-    LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, 
-                                    TargetOpcode::G_ANYEXT); 
- 
-    unsigned NewNumElts = NarrowVecTy.getNumElements(); 
- 
-    LLT IdxTy = MRI.getType(Idx); 
-    int64_t PartIdx = IdxVal / NewNumElts; 
-    auto NewIdx = 
-        MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); 
- 
-    if (IsInsert) { 
-      LLT PartTy = MRI.getType(VecParts[PartIdx]); 
- 
-      // Use the adjusted index to insert into one of the subvectors. 
-      auto InsertPart = MIRBuilder.buildInsertVectorElement( 
-          PartTy, VecParts[PartIdx], InsertVal, NewIdx); 
-      VecParts[PartIdx] = InsertPart.getReg(0); 
- 
-      // Recombine the inserted subvector with the others to reform the result 
-      // vector. 
-      buildWidenedRemergeToDst(DstReg, LCMTy, VecParts); 
-    } else { 
-      MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); 
-    } 
- 
-    MI.eraseFromParent(); 
-    return Legalized; 
-  }
-
-  // With a variable index, we can't perform the operation in a smaller type, so 
-  // we're forced to expand this. 
-  // 
-  // TODO: We could emit a chain of compare/select to figure out which piece to 
-  // index. 
-  return lowerExtractInsertVectorElt(MI); 
+    SmallVector<Register, 8> VecParts;
+    LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
+
+    // Build a sequence of NarrowTy pieces in VecParts for this operand.
+    LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
+                                    TargetOpcode::G_ANYEXT);
+
+    unsigned NewNumElts = NarrowVecTy.getNumElements();
+
+    LLT IdxTy = MRI.getType(Idx);
+    int64_t PartIdx = IdxVal / NewNumElts;
+    auto NewIdx =
+        MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
+
+    if (IsInsert) {
+      LLT PartTy = MRI.getType(VecParts[PartIdx]);
+
+      // Use the adjusted index to insert into one of the subvectors.
+      auto InsertPart = MIRBuilder.buildInsertVectorElement(
+          PartTy, VecParts[PartIdx], InsertVal, NewIdx);
+      VecParts[PartIdx] = InsertPart.getReg(0);
+
+      // Recombine the inserted subvector with the others to reform the result
+      // vector.
+      buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
+    } else {
+      MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
+    }
+
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
+  // With a variable index, we can't perform the operation in a smaller type, so
+  // we're forced to expand this.
+  //
+  // TODO: We could emit a chain of compare/select to figure out which piece to
+  // index.
+  return lowerExtractInsertVectorElt(MI);
 }
 
 LegalizerHelper::LegalizeResult
@@ -3765,8 +3765,8 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
   if (NumParts == -1)
     return UnableToLegalize;
 
-  LLT PtrTy = MRI.getType(AddrReg); 
-  const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); 
+  LLT PtrTy = MRI.getType(AddrReg);
+  const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
 
   unsigned TotalSize = ValTy.getSizeInBits();
 
@@ -3964,7 +3964,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_ADD:
   case G_SUB:
   case G_MUL:
-  case G_PTR_ADD: 
+  case G_PTR_ADD:
   case G_SMULH:
   case G_UMULH:
   case G_FADD:
@@ -3988,7 +3988,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_FFLOOR:
   case G_FRINT:
   case G_INTRINSIC_ROUND:
-  case G_INTRINSIC_ROUNDEVEN: 
+  case G_INTRINSIC_ROUNDEVEN:
   case G_INTRINSIC_TRUNC:
   case G_FCOS:
   case G_FSIN:
@@ -4020,8 +4020,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_SHL:
   case G_LSHR:
   case G_ASHR:
-  case G_SSHLSAT: 
-  case G_USHLSAT: 
+  case G_SSHLSAT:
+  case G_USHLSAT:
   case G_CTLZ:
   case G_CTLZ_ZERO_UNDEF:
   case G_CTTZ:
@@ -4052,15 +4052,15 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_UNMERGE_VALUES:
     return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
   case G_BUILD_VECTOR:
-    assert(TypeIdx == 0 && "not a vector type index"); 
-    return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); 
-  case G_CONCAT_VECTORS: 
-    if (TypeIdx != 1) // TODO: This probably does work as expected already. 
-      return UnableToLegalize; 
-    return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); 
-  case G_EXTRACT_VECTOR_ELT: 
-  case G_INSERT_VECTOR_ELT: 
-    return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy); 
+    assert(TypeIdx == 0 && "not a vector type index");
+    return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
+  case G_CONCAT_VECTORS:
+    if (TypeIdx != 1) // TODO: This probably does work as expected already.
+      return UnableToLegalize;
+    return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
+  case G_EXTRACT_VECTOR_ELT:
+  case G_INSERT_VECTOR_ELT:
+    return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
   case G_LOAD:
   case G_STORE:
     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
@@ -4484,31 +4484,31 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
 }
 
 LegalizerHelper::LegalizeResult
-LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, 
-                                   LLT NarrowTy) { 
-  if (TypeIdx != 0) 
-    return UnableToLegalize; 
- 
-  bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI; 
- 
-  Register Src = MI.getOperand(1).getReg(); 
-  LLT SrcTy = MRI.getType(Src); 
- 
-  // If all finite floats fit into the narrowed integer type, we can just swap 
-  // out the result type. This is practically only useful for conversions from 
-  // half to at least 16-bits, so just handle the one case. 
-  if (SrcTy.getScalarType() != LLT::scalar(16) || 
-      NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16)) 
-    return UnableToLegalize; 
- 
-  Observer.changingInstr(MI); 
-  narrowScalarDst(MI, NarrowTy, 0, 
-                  IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT); 
-  Observer.changedInstr(MI); 
-  return Legalized; 
-} 
- 
-LegalizerHelper::LegalizeResult 
+LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
+                                   LLT NarrowTy) {
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+
+  bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
+
+  Register Src = MI.getOperand(1).getReg();
+  LLT SrcTy = MRI.getType(Src);
+
+  // If all finite floats fit into the narrowed integer type, we can just swap
+  // out the result type. This is practically only useful for conversions from
+  // half to at least 16-bits, so just handle the one case.
+  if (SrcTy.getScalarType() != LLT::scalar(16) ||
+      NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16))
+    return UnableToLegalize;
+
+  Observer.changingInstr(MI);
+  narrowScalarDst(MI, NarrowTy, 0,
+                  IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
+  Observer.changedInstr(MI);
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
                                      LLT NarrowTy) {
   if (TypeIdx != 1)
@@ -4857,9 +4857,9 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
 }
 
 LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerBitCount(MachineInstr &MI) { 
+LegalizerHelper::lowerBitCount(MachineInstr &MI) {
   unsigned Opc = MI.getOpcode();
-  const auto &TII = MIRBuilder.getTII(); 
+  const auto &TII = MIRBuilder.getTII();
   auto isSupported = [this](const LegalityQuery &Q) {
     auto QAction = LI.getAction(Q).Action;
     return QAction == Legal || QAction == Libcall || QAction == Custom;
@@ -4947,15 +4947,15 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
     // unless the target has ctlz but not ctpop, in which case we use:
     // { return 32 - nlz(~x & (x-1)); }
     // Ref: "Hacker's Delight" by Henry Warren
-    auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1); 
-    auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1); 
+    auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
+    auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
     auto MIBTmp = MIRBuilder.buildAnd(
-        SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1)); 
-    if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) && 
-        isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) { 
-      auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len); 
+        SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
+    if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
+        isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
+      auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
       MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
-                          MIRBuilder.buildCTLZ(SrcTy, MIBTmp)); 
+                          MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
       MI.eraseFromParent();
       return Legalized;
     }
@@ -4964,8 +4964,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
     return Legalized;
   }
   case TargetOpcode::G_CTPOP: {
-    Register SrcReg = MI.getOperand(1).getReg(); 
-    LLT Ty = MRI.getType(SrcReg); 
+    Register SrcReg = MI.getOperand(1).getReg();
+    LLT Ty = MRI.getType(SrcReg);
     unsigned Size = Ty.getSizeInBits();
     MachineIRBuilder &B = MIRBuilder;
 
@@ -4975,11 +4975,11 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
     // B2Count = val - { (val >> 1) & 0x55555555 }
     // since it gives same result in blocks of 2 with one instruction less.
     auto C_1 = B.buildConstant(Ty, 1);
-    auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1); 
+    auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
     APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
     auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
     auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
-    auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi); 
+    auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
 
     // In order to get count in blocks of 4 add values from adjacent block of 2.
     // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
@@ -5078,7 +5078,7 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
   return Legalized;
 }
 
-LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { 
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
   Register Dst = MI.getOperand(0).getReg();
   Register Src = MI.getOperand(1).getReg();
   LLT DstTy = MRI.getType(Dst);
@@ -5106,7 +5106,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
   return UnableToLegalize;
 }
 
-LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { 
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
   Register Dst = MI.getOperand(0).getReg();
   Register Src = MI.getOperand(1).getReg();
   LLT DstTy = MRI.getType(Dst);
@@ -5152,7 +5152,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
   return UnableToLegalize;
 }
 
-LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { 
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
   Register Dst = MI.getOperand(0).getReg();
   Register Src = MI.getOperand(1).getReg();
   LLT DstTy = MRI.getType(Dst);
@@ -5369,7 +5369,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
 }
 
 LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { 
+LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
   Register Dst = MI.getOperand(0).getReg();
   Register Src = MI.getOperand(1).getReg();
 
@@ -5384,20 +5384,20 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
   return UnableToLegalize;
 }
 
-// TODO: If RHS is a constant SelectionDAGBuilder expands this into a 
-// multiplication tree. 
-LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) { 
-  Register Dst = MI.getOperand(0).getReg(); 
-  Register Src0 = MI.getOperand(1).getReg(); 
-  Register Src1 = MI.getOperand(2).getReg(); 
-  LLT Ty = MRI.getType(Dst); 
- 
-  auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1); 
-  MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags()); 
-  MI.eraseFromParent(); 
-  return Legalized; 
-} 
- 
+// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
+// multiplication tree.
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src0 = MI.getOperand(1).getReg();
+  Register Src1 = MI.getOperand(2).getReg();
+  LLT Ty = MRI.getType(Dst);
+
+  auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
+  MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
+  MI.eraseFromParent();
+  return Legalized;
+}
+
 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
   switch (Opc) {
   case TargetOpcode::G_SMIN:
@@ -5413,7 +5413,7 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
   }
 }
 
-LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { 
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
   Register Dst = MI.getOperand(0).getReg();
   Register Src0 = MI.getOperand(1).getReg();
   Register Src1 = MI.getOperand(2).getReg();
@@ -5429,7 +5429,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
 }
 
 LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFCopySign(MachineInstr &MI) { 
+LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
   Register Dst = MI.getOperand(0).getReg();
   Register Src0 = MI.getOperand(1).getReg();
   Register Src1 = MI.getOperand(2).getReg();
@@ -5651,72 +5651,72 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
   return Legalized;
 }
 
-/// Lower a vector extract or insert by writing the vector to a stack temporary 
-/// and reloading the element or vector. 
-/// 
-/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx 
-///  => 
-///  %stack_temp = G_FRAME_INDEX 
-///  G_STORE %vec, %stack_temp 
-///  %idx = clamp(%idx, %vec.getNumElements()) 
-///  %element_ptr = G_PTR_ADD %stack_temp, %idx 
-///  %dst = G_LOAD %element_ptr 
+/// Lower a vector extract or insert by writing the vector to a stack temporary
+/// and reloading the element or vector.
+///
+/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
+///  =>
+///  %stack_temp = G_FRAME_INDEX
+///  G_STORE %vec, %stack_temp
+///  %idx = clamp(%idx, %vec.getNumElements())
+///  %element_ptr = G_PTR_ADD %stack_temp, %idx
+///  %dst = G_LOAD %element_ptr
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcVec = MI.getOperand(1).getReg();
+  Register InsertVal;
+  if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
+    InsertVal = MI.getOperand(2).getReg();
+
+  Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
+
+  LLT VecTy = MRI.getType(SrcVec);
+  LLT EltTy = VecTy.getElementType();
+  if (!EltTy.isByteSized()) { // Not implemented.
+    LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
+    return UnableToLegalize;
+  }
+
+  unsigned EltBytes = EltTy.getSizeInBytes();
+  Align VecAlign = getStackTemporaryAlignment(VecTy);
+  Align EltAlign;
+
+  MachinePointerInfo PtrInfo;
+  auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
+                                        VecAlign, PtrInfo);
+  MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
+
+  // Get the pointer to the element, and be sure not to hit undefined behavior
+  // if the index is out of bounds.
+  Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
+
+  int64_t IdxVal;
+  if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
+    int64_t Offset = IdxVal * EltBytes;
+    PtrInfo = PtrInfo.getWithOffset(Offset);
+    EltAlign = commonAlignment(VecAlign, Offset);
+  } else {
+    // We lose information with a variable offset.
+    EltAlign = getStackTemporaryAlignment(EltTy);
+    PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
+  }
+
+  if (InsertVal) {
+    // Write the inserted element
+    MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
+
+    // Reload the whole vector.
+    MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
+  } else {
+    MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
+  }
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
 LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  Register SrcVec = MI.getOperand(1).getReg(); 
-  Register InsertVal; 
-  if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) 
-    InsertVal = MI.getOperand(2).getReg(); 
- 
-  Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); 
- 
-  LLT VecTy = MRI.getType(SrcVec); 
-  LLT EltTy = VecTy.getElementType(); 
-  if (!EltTy.isByteSized()) { // Not implemented. 
-    LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n"); 
-    return UnableToLegalize; 
-  } 
- 
-  unsigned EltBytes = EltTy.getSizeInBytes(); 
-  Align VecAlign = getStackTemporaryAlignment(VecTy); 
-  Align EltAlign; 
- 
-  MachinePointerInfo PtrInfo; 
-  auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()), 
-                                        VecAlign, PtrInfo); 
-  MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign); 
- 
-  // Get the pointer to the element, and be sure not to hit undefined behavior 
-  // if the index is out of bounds. 
-  Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx); 
- 
-  int64_t IdxVal; 
-  if (mi_match(Idx, MRI, m_ICst(IdxVal))) { 
-    int64_t Offset = IdxVal * EltBytes; 
-    PtrInfo = PtrInfo.getWithOffset(Offset); 
-    EltAlign = commonAlignment(VecAlign, Offset); 
-  } else { 
-    // We lose information with a variable offset. 
-    EltAlign = getStackTemporaryAlignment(EltTy); 
-    PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace()); 
-  } 
- 
-  if (InsertVal) { 
-    // Write the inserted element 
-    MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign); 
- 
-    // Reload the whole vector. 
-    MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign); 
-  } else { 
-    MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign); 
-  } 
- 
-  MI.eraseFromParent(); 
-  return Legalized; 
-} 
- 
-LegalizerHelper::LegalizeResult 
 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
   Register DstReg = MI.getOperand(0).getReg();
   Register Src0Reg = MI.getOperand(1).getReg();
@@ -5931,185 +5931,185 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
 }
 
 LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) { 
-  Register Res = MI.getOperand(0).getReg(); 
-  Register LHS = MI.getOperand(1).getReg(); 
-  Register RHS = MI.getOperand(2).getReg(); 
-  LLT Ty = MRI.getType(Res); 
-  bool IsSigned; 
-  bool IsAdd; 
-  unsigned BaseOp; 
-  switch (MI.getOpcode()) { 
-  default: 
-    llvm_unreachable("unexpected addsat/subsat opcode"); 
-  case TargetOpcode::G_UADDSAT: 
-    IsSigned = false; 
-    IsAdd = true; 
-    BaseOp = TargetOpcode::G_ADD; 
-    break; 
-  case TargetOpcode::G_SADDSAT: 
-    IsSigned = true; 
-    IsAdd = true; 
-    BaseOp = TargetOpcode::G_ADD; 
-    break; 
-  case TargetOpcode::G_USUBSAT: 
-    IsSigned = false; 
-    IsAdd = false; 
-    BaseOp = TargetOpcode::G_SUB; 
-    break; 
-  case TargetOpcode::G_SSUBSAT: 
-    IsSigned = true; 
-    IsAdd = false; 
-    BaseOp = TargetOpcode::G_SUB; 
-    break; 
-  } 
- 
-  if (IsSigned) { 
-    // sadd.sat(a, b) -> 
-    //   hi = 0x7fffffff - smax(a, 0) 
-    //   lo = 0x80000000 - smin(a, 0) 
-    //   a + smin(smax(lo, b), hi) 
-    // ssub.sat(a, b) -> 
-    //   lo = smax(a, -1) - 0x7fffffff 
-    //   hi = smin(a, -1) - 0x80000000 
-    //   a - smin(smax(lo, b), hi) 
-    // TODO: AMDGPU can use a "median of 3" instruction here: 
-    //   a +/- med3(lo, b, hi) 
-    uint64_t NumBits = Ty.getScalarSizeInBits(); 
-    auto MaxVal = 
-        MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits)); 
-    auto MinVal = 
-        MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); 
-    MachineInstrBuilder Hi, Lo; 
-    if (IsAdd) { 
-      auto Zero = MIRBuilder.buildConstant(Ty, 0); 
-      Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero)); 
-      Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero)); 
-    } else { 
-      auto NegOne = MIRBuilder.buildConstant(Ty, -1); 
-      Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne), 
-                               MaxVal); 
-      Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne), 
-                               MinVal); 
-    } 
-    auto RHSClamped = 
-        MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi); 
-    MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped}); 
-  } else { 
-    // uadd.sat(a, b) -> a + umin(~a, b) 
-    // usub.sat(a, b) -> a - umin(a, b) 
-    Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS; 
-    auto Min = MIRBuilder.buildUMin(Ty, Not, RHS); 
-    MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min}); 
-  } 
- 
-  MI.eraseFromParent(); 
-  return Legalized; 
-} 
- 
-LegalizerHelper::LegalizeResult 
-LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) { 
-  Register Res = MI.getOperand(0).getReg(); 
-  Register LHS = MI.getOperand(1).getReg(); 
-  Register RHS = MI.getOperand(2).getReg(); 
-  LLT Ty = MRI.getType(Res); 
-  LLT BoolTy = Ty.changeElementSize(1); 
-  bool IsSigned; 
-  bool IsAdd; 
-  unsigned OverflowOp; 
-  switch (MI.getOpcode()) { 
-  default: 
-    llvm_unreachable("unexpected addsat/subsat opcode"); 
-  case TargetOpcode::G_UADDSAT: 
-    IsSigned = false; 
-    IsAdd = true; 
-    OverflowOp = TargetOpcode::G_UADDO; 
-    break; 
-  case TargetOpcode::G_SADDSAT: 
-    IsSigned = true; 
-    IsAdd = true; 
-    OverflowOp = TargetOpcode::G_SADDO; 
-    break; 
-  case TargetOpcode::G_USUBSAT: 
-    IsSigned = false; 
-    IsAdd = false; 
-    OverflowOp = TargetOpcode::G_USUBO; 
-    break; 
-  case TargetOpcode::G_SSUBSAT: 
-    IsSigned = true; 
-    IsAdd = false; 
-    OverflowOp = TargetOpcode::G_SSUBO; 
-    break; 
-  } 
- 
-  auto OverflowRes = 
-      MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS}); 
-  Register Tmp = OverflowRes.getReg(0); 
-  Register Ov = OverflowRes.getReg(1); 
-  MachineInstrBuilder Clamp; 
-  if (IsSigned) { 
-    // sadd.sat(a, b) -> 
-    //   {tmp, ov} = saddo(a, b) 
-    //   ov ? (tmp >>s 31) + 0x80000000 : r 
-    // ssub.sat(a, b) -> 
-    //   {tmp, ov} = ssubo(a, b) 
-    //   ov ? (tmp >>s 31) + 0x80000000 : r 
-    uint64_t NumBits = Ty.getScalarSizeInBits(); 
-    auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1); 
-    auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount); 
-    auto MinVal = 
-        MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); 
-    Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal); 
-  } else { 
-    // uadd.sat(a, b) -> 
-    //   {tmp, ov} = uaddo(a, b) 
-    //   ov ? 0xffffffff : tmp 
-    // usub.sat(a, b) -> 
-    //   {tmp, ov} = usubo(a, b) 
-    //   ov ? 0 : tmp 
-    Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0); 
-  } 
-  MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp); 
- 
-  MI.eraseFromParent(); 
-  return Legalized; 
-} 
- 
-LegalizerHelper::LegalizeResult 
-LegalizerHelper::lowerShlSat(MachineInstr &MI) { 
-  assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT || 
-          MI.getOpcode() == TargetOpcode::G_USHLSAT) && 
-         "Expected shlsat opcode!"); 
-  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT; 
-  Register Res = MI.getOperand(0).getReg(); 
-  Register LHS = MI.getOperand(1).getReg(); 
-  Register RHS = MI.getOperand(2).getReg(); 
-  LLT Ty = MRI.getType(Res); 
-  LLT BoolTy = Ty.changeElementSize(1); 
- 
-  unsigned BW = Ty.getScalarSizeInBits(); 
-  auto Result = MIRBuilder.buildShl(Ty, LHS, RHS); 
-  auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS) 
-                       : MIRBuilder.buildLShr(Ty, Result, RHS); 
- 
-  MachineInstrBuilder SatVal; 
-  if (IsSigned) { 
-    auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW)); 
-    auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW)); 
-    auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS, 
-                                    MIRBuilder.buildConstant(Ty, 0)); 
-    SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax); 
-  } else { 
-    SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW)); 
-  } 
-  auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig); 
-  MIRBuilder.buildSelect(Res, Ov, SatVal, Result); 
- 
-  MI.eraseFromParent(); 
-  return Legalized; 
-} 
- 
-LegalizerHelper::LegalizeResult 
+LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
+  Register Res = MI.getOperand(0).getReg();
+  Register LHS = MI.getOperand(1).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+  LLT Ty = MRI.getType(Res);
+  bool IsSigned;
+  bool IsAdd;
+  unsigned BaseOp;
+  switch (MI.getOpcode()) {
+  default:
+    llvm_unreachable("unexpected addsat/subsat opcode");
+  case TargetOpcode::G_UADDSAT:
+    IsSigned = false;
+    IsAdd = true;
+    BaseOp = TargetOpcode::G_ADD;
+    break;
+  case TargetOpcode::G_SADDSAT:
+    IsSigned = true;
+    IsAdd = true;
+    BaseOp = TargetOpcode::G_ADD;
+    break;
+  case TargetOpcode::G_USUBSAT:
+    IsSigned = false;
+    IsAdd = false;
+    BaseOp = TargetOpcode::G_SUB;
+    break;
+  case TargetOpcode::G_SSUBSAT:
+    IsSigned = true;
+    IsAdd = false;
+    BaseOp = TargetOpcode::G_SUB;
+    break;
+  }
+
+  if (IsSigned) {
+    // sadd.sat(a, b) ->
+    //   hi = 0x7fffffff - smax(a, 0)
+    //   lo = 0x80000000 - smin(a, 0)
+    //   a + smin(smax(lo, b), hi)
+    // ssub.sat(a, b) ->
+    //   lo = smax(a, -1) - 0x7fffffff
+    //   hi = smin(a, -1) - 0x80000000
+    //   a - smin(smax(lo, b), hi)
+    // TODO: AMDGPU can use a "median of 3" instruction here:
+    //   a +/- med3(lo, b, hi)
+    uint64_t NumBits = Ty.getScalarSizeInBits();
+    auto MaxVal =
+        MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
+    auto MinVal =
+        MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
+    MachineInstrBuilder Hi, Lo;
+    if (IsAdd) {
+      auto Zero = MIRBuilder.buildConstant(Ty, 0);
+      Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
+      Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
+    } else {
+      auto NegOne = MIRBuilder.buildConstant(Ty, -1);
+      Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
+                               MaxVal);
+      Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
+                               MinVal);
+    }
+    auto RHSClamped =
+        MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
+    MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
+  } else {
+    // uadd.sat(a, b) -> a + umin(~a, b)
+    // usub.sat(a, b) -> a - umin(a, b)
+    Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
+    auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
+    MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
+  }
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
+  Register Res = MI.getOperand(0).getReg();
+  Register LHS = MI.getOperand(1).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+  LLT Ty = MRI.getType(Res);
+  LLT BoolTy = Ty.changeElementSize(1);
+  bool IsSigned;
+  bool IsAdd;
+  unsigned OverflowOp;
+  switch (MI.getOpcode()) {
+  default:
+    llvm_unreachable("unexpected addsat/subsat opcode");
+  case TargetOpcode::G_UADDSAT:
+    IsSigned = false;
+    IsAdd = true;
+    OverflowOp = TargetOpcode::G_UADDO;
+    break;
+  case TargetOpcode::G_SADDSAT:
+    IsSigned = true;
+    IsAdd = true;
+    OverflowOp = TargetOpcode::G_SADDO;
+    break;
+  case TargetOpcode::G_USUBSAT:
+    IsSigned = false;
+    IsAdd = false;
+    OverflowOp = TargetOpcode::G_USUBO;
+    break;
+  case TargetOpcode::G_SSUBSAT:
+    IsSigned = true;
+    IsAdd = false;
+    OverflowOp = TargetOpcode::G_SSUBO;
+    break;
+  }
+
+  auto OverflowRes =
+      MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
+  Register Tmp = OverflowRes.getReg(0);
+  Register Ov = OverflowRes.getReg(1);
+  MachineInstrBuilder Clamp;
+  if (IsSigned) {
+    // sadd.sat(a, b) ->
+    //   {tmp, ov} = saddo(a, b)
+    //   ov ? (tmp >>s 31) + 0x80000000 : r
+    // ssub.sat(a, b) ->
+    //   {tmp, ov} = ssubo(a, b)
+    //   ov ? (tmp >>s 31) + 0x80000000 : r
+    uint64_t NumBits = Ty.getScalarSizeInBits();
+    auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
+    auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
+    auto MinVal =
+        MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
+    Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
+  } else {
+    // uadd.sat(a, b) ->
+    //   {tmp, ov} = uaddo(a, b)
+    //   ov ? 0xffffffff : tmp
+    // usub.sat(a, b) ->
+    //   {tmp, ov} = usubo(a, b)
+    //   ov ? 0 : tmp
+    Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
+  }
+  MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerShlSat(MachineInstr &MI) {
+  assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
+          MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
+         "Expected shlsat opcode!");
+  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
+  Register Res = MI.getOperand(0).getReg();
+  Register LHS = MI.getOperand(1).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+  LLT Ty = MRI.getType(Res);
+  LLT BoolTy = Ty.changeElementSize(1);
+
+  unsigned BW = Ty.getScalarSizeInBits();
+  auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
+  auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
+                       : MIRBuilder.buildLShr(Ty, Result, RHS);
+
+  MachineInstrBuilder SatVal;
+  if (IsSigned) {
+    auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
+    auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
+    auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
+                                    MIRBuilder.buildConstant(Ty, 0));
+    SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
+  } else {
+    SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
+  }
+  auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
+  MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
 LegalizerHelper::lowerBswap(MachineInstr &MI) {
   Register Dst = MI.getOperand(0).getReg();
   Register Src = MI.getOperand(1).getReg();
@@ -6199,7 +6199,7 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
   const MDString *RegStr = cast<MDString>(
     cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
 
-  Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF); 
+  Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
   if (!PhysReg.isValid())
     return UnableToLegalize;
 
@@ -6211,63 +6211,63 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
   MI.eraseFromParent();
   return Legalized;
 }
- 
-LegalizerHelper::LegalizeResult 
-LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) { 
-  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH; 
-  unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 
-  Register Result = MI.getOperand(0).getReg(); 
-  LLT OrigTy = MRI.getType(Result); 
-  auto SizeInBits = OrigTy.getScalarSizeInBits(); 
-  LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2); 
- 
-  auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)}); 
-  auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)}); 
-  auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS); 
-  unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR; 
- 
-  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits); 
-  auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt}); 
-  MIRBuilder.buildTrunc(Result, Shifted); 
- 
-  MI.eraseFromParent(); 
-  return Legalized; 
-} 
- 
-LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { 
-  // Implement vector G_SELECT in terms of XOR, AND, OR. 
-  Register DstReg = MI.getOperand(0).getReg(); 
-  Register MaskReg = MI.getOperand(1).getReg(); 
-  Register Op1Reg = MI.getOperand(2).getReg(); 
-  Register Op2Reg = MI.getOperand(3).getReg(); 
-  LLT DstTy = MRI.getType(DstReg); 
-  LLT MaskTy = MRI.getType(MaskReg); 
-  LLT Op1Ty = MRI.getType(Op1Reg); 
-  if (!DstTy.isVector()) 
-    return UnableToLegalize; 
- 
-  // Vector selects can have a scalar predicate. If so, splat into a vector and 
-  // finish for later legalization attempts to try again. 
-  if (MaskTy.isScalar()) { 
-    Register MaskElt = MaskReg; 
-    if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) 
-      MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0); 
-    // Generate a vector splat idiom to be pattern matched later. 
-    auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); 
-    Observer.changingInstr(MI); 
-    MI.getOperand(1).setReg(ShufSplat.getReg(0)); 
-    Observer.changedInstr(MI); 
-    return Legalized; 
-  } 
- 
-  if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) { 
-    return UnableToLegalize; 
-  } 
- 
-  auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg); 
-  auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg); 
-  auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask); 
-  MIRBuilder.buildOr(DstReg, NewOp1, NewOp2); 
-  MI.eraseFromParent(); 
-  return Legalized; 
-} 
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
+  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
+  unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+  Register Result = MI.getOperand(0).getReg();
+  LLT OrigTy = MRI.getType(Result);
+  auto SizeInBits = OrigTy.getScalarSizeInBits();
+  LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
+
+  auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
+  auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
+  auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
+  unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
+
+  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
+  auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
+  MIRBuilder.buildTrunc(Result, Shifted);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
+  // Implement vector G_SELECT in terms of XOR, AND, OR.
+  Register DstReg = MI.getOperand(0).getReg();
+  Register MaskReg = MI.getOperand(1).getReg();
+  Register Op1Reg = MI.getOperand(2).getReg();
+  Register Op2Reg = MI.getOperand(3).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  LLT MaskTy = MRI.getType(MaskReg);
+  LLT Op1Ty = MRI.getType(Op1Reg);
+  if (!DstTy.isVector())
+    return UnableToLegalize;
+
+  // Vector selects can have a scalar predicate. If so, splat into a vector and
+  // finish for later legalization attempts to try again.
+  if (MaskTy.isScalar()) {
+    Register MaskElt = MaskReg;
+    if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
+      MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
+    // Generate a vector splat idiom to be pattern matched later.
+    auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
+    Observer.changingInstr(MI);
+    MI.getOperand(1).setReg(ShufSplat.getReg(0));
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
+
+  if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
+    return UnableToLegalize;
+  }
+
+  auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
+  auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
+  auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
+  MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
+  MI.eraseFromParent();
+  return Legalized;
+}
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 0a5cb26325..30acac14bc 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -105,7 +105,7 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
 static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q,
                              const std::pair<unsigned, LLT> &Mutation) {
   switch (Rule.getAction()) {
-  case Legal: 
+  case Legal:
   case Custom:
   case Lower:
   case MoreElements:
@@ -123,7 +123,7 @@ static bool mutationIsSane(const LegalizeRule &Rule,
                            std::pair<unsigned, LLT> Mutation) {
   // If the user wants a custom mutation, then we can't really say much about
   // it. Return true, and trust that they're doing the right thing.
-  if (Rule.getAction() == Custom || Rule.getAction() == Legal) 
+  if (Rule.getAction() == Custom || Rule.getAction() == Legal)
     return true;
 
   const unsigned TypeIdx = Mutation.first;
@@ -148,8 +148,8 @@ static bool mutationIsSane(const LegalizeRule &Rule,
         if (NewTy.getNumElements() <= OldElts)
           return false;
       }
-    } else if (Rule.getAction() == MoreElements) 
-      return false; 
+    } else if (Rule.getAction() == MoreElements)
+      return false;
 
     // Make sure the element type didn't change.
     return NewTy.getScalarType() == OldTy.getScalarType();
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Localizer.cpp
index 66cff18e91..30c00c63f6 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -11,7 +11,7 @@
 
 #include "llvm/CodeGen/GlobalISel/Localizer.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h" 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
@@ -57,20 +57,20 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
   return InsertMBB == Def.getParent();
 }
 
-bool Localizer::isNonUniquePhiValue(MachineOperand &Op) const { 
-  MachineInstr *MI = Op.getParent(); 
-  if (!MI->isPHI()) 
-    return false; 
- 
-  Register SrcReg = Op.getReg(); 
-  for (unsigned Idx = 1; Idx < MI->getNumOperands(); Idx += 2) { 
-    auto &MO = MI->getOperand(Idx); 
-    if (&MO != &Op && MO.isReg() && MO.getReg() == SrcReg) 
-      return true; 
-  } 
-  return false; 
-} 
- 
+bool Localizer::isNonUniquePhiValue(MachineOperand &Op) const {
+  MachineInstr *MI = Op.getParent();
+  if (!MI->isPHI())
+    return false;
+
+  Register SrcReg = Op.getReg();
+  for (unsigned Idx = 1; Idx < MI->getNumOperands(); Idx += 2) {
+    auto &MO = MI->getOperand(Idx);
+    if (&MO != &Op && MO.isReg() && MO.getReg() == SrcReg)
+      return true;
+  }
+  return false;
+}
+
 bool Localizer::localizeInterBlock(MachineFunction &MF,
                                    LocalizedSetVecT &LocalizedInstrs) {
   bool Changed = false;
@@ -108,14 +108,14 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
         LocalizedInstrs.insert(&MI);
         continue;
       }
- 
-      // If the use is a phi operand that's not unique, don't try to localize. 
-      // If we do, we can cause unnecessary instruction bloat by duplicating 
-      // into each predecessor block, when the existing one is sufficient and 
-      // allows for easier optimization later. 
-      if (isNonUniquePhiValue(MOUse)) 
-        continue; 
- 
+
+      // If the use is a phi operand that's not unique, don't try to localize.
+      // If we do, we can cause unnecessary instruction bloat by duplicating
+      // into each predecessor block, when the existing one is sufficient and
+      // allows for easier optimization later.
+      if (isNonUniquePhiValue(MOUse))
+        continue;
+
       LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
       Changed = true;
       auto MBBAndReg = std::make_pair(InsertMBB, Reg);
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 549bb1a13c..67ef02a4e7 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -9,7 +9,7 @@
 /// This file implements the MachineIRBuidler class.
 //===----------------------------------------------------------------------===//
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/Analysis/MemoryLocation.h" 
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -106,8 +106,8 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
   } else if (auto *CFP = dyn_cast<ConstantFP>(&C)) {
     MIB.addFPImm(CFP);
   } else {
-    // Insert $noreg if we didn't find a usable constant and had to drop it. 
-    MIB.addReg(Register()); 
+    // Insert $noreg if we didn't find a usable constant and had to drop it.
+    MIB.addReg(Register());
   }
 
   MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
@@ -162,11 +162,11 @@ MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy,
       .addJumpTableIndex(JTI);
 }
 
-void MachineIRBuilder::validateUnaryOp(const LLT Res, const LLT Op0) { 
-  assert((Res.isScalar() || Res.isVector()) && "invalid operand type"); 
-  assert((Res == Op0) && "type mismatch"); 
-} 
- 
+void MachineIRBuilder::validateUnaryOp(const LLT Res, const LLT Op0) {
+  assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+  assert((Res == Op0) && "type mismatch");
+}
+
 void MachineIRBuilder::validateBinaryOp(const LLT Res, const LLT Op0,
                                         const LLT Op1) {
   assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
@@ -317,29 +317,29 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
   return buildFConstant(Res, *CFP);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildBrCond(const SrcOp &Tst, 
+MachineInstrBuilder MachineIRBuilder::buildBrCond(const SrcOp &Tst,
                                                   MachineBasicBlock &Dest) {
-  assert(Tst.getLLTTy(*getMRI()).isScalar() && "invalid operand type"); 
+  assert(Tst.getLLTTy(*getMRI()).isScalar() && "invalid operand type");
 
-  auto MIB = buildInstr(TargetOpcode::G_BRCOND); 
-  Tst.addSrcToMIB(MIB); 
-  MIB.addMBB(&Dest); 
-  return MIB; 
+  auto MIB = buildInstr(TargetOpcode::G_BRCOND);
+  Tst.addSrcToMIB(MIB);
+  MIB.addMBB(&Dest);
+  return MIB;
 }
 
-MachineInstrBuilder 
-MachineIRBuilder::buildLoad(const DstOp &Dst, const SrcOp &Addr, 
-                            MachinePointerInfo PtrInfo, Align Alignment, 
-                            MachineMemOperand::Flags MMOFlags, 
-                            const AAMDNodes &AAInfo) { 
-  MMOFlags |= MachineMemOperand::MOLoad; 
-  assert((MMOFlags & MachineMemOperand::MOStore) == 0); 
- 
-  uint64_t Size = MemoryLocation::getSizeOrUnknown( 
-      TypeSize::Fixed(Dst.getLLTTy(*getMRI()).getSizeInBytes())); 
-  MachineMemOperand *MMO = 
-      getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); 
-  return buildLoad(Dst, Addr, *MMO); 
+MachineInstrBuilder
+MachineIRBuilder::buildLoad(const DstOp &Dst, const SrcOp &Addr,
+                            MachinePointerInfo PtrInfo, Align Alignment,
+                            MachineMemOperand::Flags MMOFlags,
+                            const AAMDNodes &AAInfo) {
+  MMOFlags |= MachineMemOperand::MOLoad;
+  assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+
+  uint64_t Size = MemoryLocation::getSizeOrUnknown(
+      TypeSize::Fixed(Dst.getLLTTy(*getMRI()).getSizeInBytes()));
+  MachineMemOperand *MMO =
+      getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+  return buildLoad(Dst, Addr, *MMO);
 }
 
 MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
@@ -386,21 +386,21 @@ MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val,
   return MIB;
 }
 
-MachineInstrBuilder 
-MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr, 
-                             MachinePointerInfo PtrInfo, Align Alignment, 
-                             MachineMemOperand::Flags MMOFlags, 
-                             const AAMDNodes &AAInfo) { 
-  MMOFlags |= MachineMemOperand::MOStore; 
-  assert((MMOFlags & MachineMemOperand::MOLoad) == 0); 
- 
-  uint64_t Size = MemoryLocation::getSizeOrUnknown( 
-      TypeSize::Fixed(Val.getLLTTy(*getMRI()).getSizeInBytes())); 
-  MachineMemOperand *MMO = 
-      getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); 
-  return buildStore(Val, Addr, *MMO); 
-} 
- 
+MachineInstrBuilder
+MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr,
+                             MachinePointerInfo PtrInfo, Align Alignment,
+                             MachineMemOperand::Flags MMOFlags,
+                             const AAMDNodes &AAInfo) {
+  MMOFlags |= MachineMemOperand::MOStore;
+  assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+  uint64_t Size = MemoryLocation::getSizeOrUnknown(
+      TypeSize::Fixed(Val.getLLTTy(*getMRI()).getSizeInBytes()));
+  MachineMemOperand *MMO =
+      getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+  return buildStore(Val, Addr, *MMO);
+}
+
 MachineInstrBuilder MachineIRBuilder::buildAnyExt(const DstOp &Res,
                                                   const SrcOp &Op) {
   return buildInstr(TargetOpcode::G_ANYEXT, Res, Op);
@@ -635,35 +635,35 @@ MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
   return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res, 
-                                                        const SrcOp &Src) { 
-  LLT DstTy = Res.getLLTTy(*getMRI()); 
-  assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() && 
-         "Expected Src to match Dst elt ty"); 
-  auto UndefVec = buildUndef(DstTy); 
-  auto Zero = buildConstant(LLT::scalar(64), 0); 
-  auto InsElt = buildInsertVectorElement(DstTy, UndefVec, Src, Zero); 
-  SmallVector<int, 16> ZeroMask(DstTy.getNumElements()); 
-  return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask); 
-} 
- 
-MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, 
-                                                         const SrcOp &Src1, 
-                                                         const SrcOp &Src2, 
-                                                         ArrayRef<int> Mask) { 
-  LLT DstTy = Res.getLLTTy(*getMRI()); 
-  LLT Src1Ty = Src1.getLLTTy(*getMRI()); 
-  LLT Src2Ty = Src2.getLLTTy(*getMRI()); 
-  assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size()); 
-  assert(DstTy.getElementType() == Src1Ty.getElementType() && 
-         DstTy.getElementType() == Src2Ty.getElementType()); 
-  (void)Src1Ty; 
-  (void)Src2Ty; 
-  ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask); 
-  return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2}) 
-      .addShuffleMask(MaskAlloc); 
-} 
- 
+MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res,
+                                                        const SrcOp &Src) {
+  LLT DstTy = Res.getLLTTy(*getMRI());
+  assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() &&
+         "Expected Src to match Dst elt ty");
+  auto UndefVec = buildUndef(DstTy);
+  auto Zero = buildConstant(LLT::scalar(64), 0);
+  auto InsElt = buildInsertVectorElement(DstTy, UndefVec, Src, Zero);
+  SmallVector<int, 16> ZeroMask(DstTy.getNumElements());
+  return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
+                                                         const SrcOp &Src1,
+                                                         const SrcOp &Src2,
+                                                         ArrayRef<int> Mask) {
+  LLT DstTy = Res.getLLTTy(*getMRI());
+  LLT Src1Ty = Src1.getLLTTy(*getMRI());
+  LLT Src2Ty = Src2.getLLTTy(*getMRI());
+  assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size());
+  assert(DstTy.getElementType() == Src1Ty.getElementType() &&
+         DstTy.getElementType() == Src2Ty.getElementType());
+  (void)Src1Ty;
+  (void)Src2Ty;
+  ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask);
+  return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2})
+      .addShuffleMask(MaskAlloc);
+}
+
 MachineInstrBuilder
 MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) {
   // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
@@ -986,14 +986,14 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
         SrcOps[1].getLLTTy(*getMRI()), SrcOps[2].getLLTTy(*getMRI()));
     break;
   }
-  case TargetOpcode::G_FNEG: 
-  case TargetOpcode::G_ABS: 
-    // All these are unary ops. 
-    assert(DstOps.size() == 1 && "Invalid Dst"); 
-    assert(SrcOps.size() == 1 && "Invalid Srcs"); 
-    validateUnaryOp(DstOps[0].getLLTTy(*getMRI()), 
-                    SrcOps[0].getLLTTy(*getMRI())); 
-    break; 
+  case TargetOpcode::G_FNEG:
+  case TargetOpcode::G_ABS:
+    // All these are unary ops.
+    assert(DstOps.size() == 1 && "Invalid Dst");
+    assert(SrcOps.size() == 1 && "Invalid Srcs");
+    validateUnaryOp(DstOps[0].getLLTTy(*getMRI()),
+                    SrcOps[0].getLLTTy(*getMRI()));
+    break;
   case TargetOpcode::G_ADD:
   case TargetOpcode::G_AND:
   case TargetOpcode::G_MUL:
@@ -1022,9 +1022,9 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
   }
   case TargetOpcode::G_SHL:
   case TargetOpcode::G_ASHR:
-  case TargetOpcode::G_LSHR: 
-  case TargetOpcode::G_USHLSAT: 
-  case TargetOpcode::G_SSHLSAT: { 
+  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_USHLSAT:
+  case TargetOpcode::G_SSHLSAT: {
     assert(DstOps.size() == 1 && "Invalid Dst");
     assert(SrcOps.size() == 2 && "Invalid Srcs");
     validateShiftOp(DstOps[0].getLLTTy(*getMRI()),
@@ -1089,11 +1089,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
   case TargetOpcode::G_UNMERGE_VALUES: {
     assert(!DstOps.empty() && "Invalid trivial sequence");
     assert(SrcOps.size() == 1 && "Invalid src for Unmerge");
-    assert(llvm::all_of(DstOps, 
-                        [&, this](const DstOp &Op) { 
-                          return Op.getLLTTy(*getMRI()) == 
-                                 DstOps[0].getLLTTy(*getMRI()); 
-                        }) && 
+    assert(llvm::all_of(DstOps,
+                        [&, this](const DstOp &Op) {
+                          return Op.getLLTTy(*getMRI()) ==
+                                 DstOps[0].getLLTTy(*getMRI());
+                        }) &&
            "type mismatch in output list");
     assert(DstOps.size() * DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
                SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
@@ -1103,11 +1103,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
   case TargetOpcode::G_MERGE_VALUES: {
     assert(!SrcOps.empty() && "invalid trivial sequence");
     assert(DstOps.size() == 1 && "Invalid Dst");
-    assert(llvm::all_of(SrcOps, 
-                        [&, this](const SrcOp &Op) { 
-                          return Op.getLLTTy(*getMRI()) == 
-                                 SrcOps[0].getLLTTy(*getMRI()); 
-                        }) && 
+    assert(llvm::all_of(SrcOps,
+                        [&, this](const SrcOp &Op) {
+                          return Op.getLLTTy(*getMRI()) ==
+                                 SrcOps[0].getLLTTy(*getMRI());
+                        }) &&
            "type mismatch in input list");
     assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
                DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
@@ -1154,11 +1154,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
     assert(DstOps.size() == 1 && "Invalid DstOps");
     assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
            "Res type must be a vector");
-    assert(llvm::all_of(SrcOps, 
-                        [&, this](const SrcOp &Op) { 
-                          return Op.getLLTTy(*getMRI()) == 
-                                 SrcOps[0].getLLTTy(*getMRI()); 
-                        }) && 
+    assert(llvm::all_of(SrcOps,
+                        [&, this](const SrcOp &Op) {
+                          return Op.getLLTTy(*getMRI()) ==
+                                 SrcOps[0].getLLTTy(*getMRI());
+                        }) &&
            "type mismatch in input list");
     assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
                DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
@@ -1171,11 +1171,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
     assert(DstOps.size() == 1 && "Invalid DstOps");
     assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
            "Res type must be a vector");
-    assert(llvm::all_of(SrcOps, 
-                        [&, this](const SrcOp &Op) { 
-                          return Op.getLLTTy(*getMRI()) == 
-                                 SrcOps[0].getLLTTy(*getMRI()); 
-                        }) && 
+    assert(llvm::all_of(SrcOps,
+                        [&, this](const SrcOp &Op) {
+                          return Op.getLLTTy(*getMRI()) ==
+                                 SrcOps[0].getLLTTy(*getMRI());
+                        }) &&
            "type mismatch in input list");
     if (SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
         DstOps[0].getLLTTy(*getMRI()).getElementType().getSizeInBits())
@@ -1186,12 +1186,12 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
     assert(DstOps.size() == 1 && "Invalid DstOps");
     assert((!SrcOps.empty() || SrcOps.size() < 2) &&
            "Must have at least 2 operands");
-    assert(llvm::all_of(SrcOps, 
-                        [&, this](const SrcOp &Op) { 
-                          return (Op.getLLTTy(*getMRI()).isVector() && 
-                                  Op.getLLTTy(*getMRI()) == 
-                                      SrcOps[0].getLLTTy(*getMRI())); 
-                        }) && 
+    assert(llvm::all_of(SrcOps,
+                        [&, this](const SrcOp &Op) {
+                          return (Op.getLLTTy(*getMRI()).isVector() &&
+                                  Op.getLLTTy(*getMRI()) ==
+                                      SrcOps[0].getLLTTy(*getMRI()));
+                        }) &&
            "type mismatch in input list");
     assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
                DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 05f47915b3..e2a9637471 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -421,7 +421,7 @@ RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const {
 
   // Then the alternative mapping, if any.
   InstructionMappings AltMappings = getInstrAlternativeMappings(MI);
-  append_range(PossibleMappings, AltMappings); 
+  append_range(PossibleMappings, AltMappings);
 #ifndef NDEBUG
   for (const InstructionMapping *Mapping : PossibleMappings)
     assert(Mapping->verify(MI) && "Mapping is invalid");
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Utils.cpp
index 2adc30eacc..cd24832244 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/Utils.cpp
@@ -11,11 +11,11 @@
 
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h" 
-#include "llvm/ADT/Optional.h" 
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
-#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 
-#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -23,16 +23,16 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/StackProtector.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetLowering.h" 
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/Target/TargetMachine.h" 
+#include "llvm/Target/TargetMachine.h"
 
 #define DEBUG_TYPE "globalisel-utils"
 
 using namespace llvm;
-using namespace MIPatternMatch; 
+using namespace MIPatternMatch;
 
 Register llvm::constrainRegToClass(MachineRegisterInfo &MRI,
                                    const TargetInstrInfo &TII,
@@ -48,7 +48,7 @@ Register llvm::constrainOperandRegClass(
     const MachineFunction &MF, const TargetRegisterInfo &TRI,
     MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
     const RegisterBankInfo &RBI, MachineInstr &InsertPt,
-    const TargetRegisterClass &RegClass, MachineOperand &RegMO) { 
+    const TargetRegisterClass &RegClass, MachineOperand &RegMO) {
   Register Reg = RegMO.getReg();
   // Assume physical registers are properly constrained.
   assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
@@ -69,13 +69,13 @@ Register llvm::constrainOperandRegClass(
               TII.get(TargetOpcode::COPY), Reg)
           .addReg(ConstrainedReg);
     }
-    if (GISelChangeObserver *Observer = MF.getObserver()) { 
-      Observer->changingInstr(*RegMO.getParent()); 
-    } 
-    RegMO.setReg(ConstrainedReg); 
-    if (GISelChangeObserver *Observer = MF.getObserver()) { 
-      Observer->changedInstr(*RegMO.getParent()); 
-    } 
+    if (GISelChangeObserver *Observer = MF.getObserver()) {
+      Observer->changingInstr(*RegMO.getParent());
+    }
+    RegMO.setReg(ConstrainedReg);
+    if (GISelChangeObserver *Observer = MF.getObserver()) {
+      Observer->changedInstr(*RegMO.getParent());
+    }
   } else {
     if (GISelChangeObserver *Observer = MF.getObserver()) {
       if (!RegMO.isDef()) {
@@ -93,7 +93,7 @@ Register llvm::constrainOperandRegClass(
     const MachineFunction &MF, const TargetRegisterInfo &TRI,
     MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
     const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
-    MachineOperand &RegMO, unsigned OpIdx) { 
+    MachineOperand &RegMO, unsigned OpIdx) {
   Register Reg = RegMO.getReg();
   // Assume physical registers are properly constrained.
   assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
@@ -163,7 +163,7 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
     // If the operand is a vreg, we should constrain its regclass, and only
     // insert COPYs if that's impossible.
     // constrainOperandRegClass does that for us.
-    constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), MO, OpI); 
+    constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), MO, OpI);
 
     // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been
     // done.
@@ -192,14 +192,14 @@ bool llvm::canReplaceReg(Register DstReg, Register SrcReg,
 
 bool llvm::isTriviallyDead(const MachineInstr &MI,
                            const MachineRegisterInfo &MRI) {
-  // FIXME: This logical is mostly duplicated with 
-  // DeadMachineInstructionElim::isDead. Why is LOCAL_ESCAPE not considered in 
-  // MachineInstr::isLabel? 
- 
-  // Don't delete frame allocation labels. 
-  if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) 
-    return false; 
- 
+  // FIXME: This logical is mostly duplicated with
+  // DeadMachineInstructionElim::isDead. Why is LOCAL_ESCAPE not considered in
+  // MachineInstr::isLabel?
+
+  // Don't delete frame allocation labels.
+  if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE)
+    return false;
+
   // If we can move an instruction, we can remove it.  Otherwise, it has
   // a side-effect of some sort.
   bool SawStore = false;
@@ -262,8 +262,8 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
   reportGISelFailure(MF, TPC, MORE, R);
 }
 
-Optional<APInt> llvm::getConstantVRegVal(Register VReg, 
-                                         const MachineRegisterInfo &MRI) { 
+Optional<APInt> llvm::getConstantVRegVal(Register VReg,
+                                         const MachineRegisterInfo &MRI) {
   Optional<ValueAndVReg> ValAndVReg =
       getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false);
   assert((!ValAndVReg || ValAndVReg->VReg == VReg) &&
@@ -273,17 +273,17 @@ Optional<APInt> llvm::getConstantVRegVal(Register VReg,
   return ValAndVReg->Value;
 }
 
-Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg, 
-                                               const MachineRegisterInfo &MRI) { 
-  Optional<APInt> Val = getConstantVRegVal(VReg, MRI); 
-  if (Val && Val->getBitWidth() <= 64) 
-    return Val->getSExtValue(); 
-  return None; 
-} 
- 
+Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg,
+                                               const MachineRegisterInfo &MRI) {
+  Optional<APInt> Val = getConstantVRegVal(VReg, MRI);
+  if (Val && Val->getBitWidth() <= 64)
+    return Val->getSExtValue();
+  return None;
+}
+
 Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
     Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
-    bool HandleFConstant, bool LookThroughAnyExt) { 
+    bool HandleFConstant, bool LookThroughAnyExt) {
   SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
   MachineInstr *MI;
   auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) {
@@ -310,10 +310,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
   while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) &&
          LookThroughInstrs) {
     switch (MI->getOpcode()) {
-    case TargetOpcode::G_ANYEXT: 
-      if (!LookThroughAnyExt) 
-        return None; 
-      LLVM_FALLTHROUGH; 
+    case TargetOpcode::G_ANYEXT:
+      if (!LookThroughAnyExt)
+        return None;
+      LLVM_FALLTHROUGH;
     case TargetOpcode::G_TRUNC:
     case TargetOpcode::G_SEXT:
     case TargetOpcode::G_ZEXT:
@@ -347,7 +347,7 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
     case TargetOpcode::G_TRUNC:
       Val = Val.trunc(OpcodeAndSize.second);
       break;
-    case TargetOpcode::G_ANYEXT: 
+    case TargetOpcode::G_ANYEXT:
     case TargetOpcode::G_SEXT:
       Val = Val.sext(OpcodeAndSize.second);
       break;
@@ -357,10 +357,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
     }
   }
 
-  return ValueAndVReg{Val, VReg}; 
+  return ValueAndVReg{Val, VReg};
 }
 
-const ConstantFP * 
+const ConstantFP *
 llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) {
   MachineInstr *MI = MRI.getVRegDef(VReg);
   if (TargetOpcode::G_FCONSTANT != MI->getOpcode())
@@ -368,8 +368,8 @@ llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) {
   return MI->getOperand(1).getFPImm();
 }
 
-Optional<DefinitionAndSourceRegister> 
-llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { 
+Optional<DefinitionAndSourceRegister>
+llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
   Register DefSrcReg = Reg;
   auto *DefMI = MRI.getVRegDef(Reg);
   auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
@@ -378,7 +378,7 @@ llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
   while (DefMI->getOpcode() == TargetOpcode::COPY) {
     Register SrcReg = DefMI->getOperand(1).getReg();
     auto SrcTy = MRI.getType(SrcReg);
-    if (!SrcTy.isValid()) 
+    if (!SrcTy.isValid())
       break;
     DefMI = MRI.getVRegDef(SrcReg);
     DefSrcReg = SrcReg;
@@ -386,8 +386,8 @@ llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
   return DefinitionAndSourceRegister{DefMI, DefSrcReg};
 }
 
-MachineInstr *llvm::getDefIgnoringCopies(Register Reg, 
-                                         const MachineRegisterInfo &MRI) { 
+MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
+                                         const MachineRegisterInfo &MRI) {
   Optional<DefinitionAndSourceRegister> DefSrcReg =
       getDefSrcRegIgnoringCopies(Reg, MRI);
   return DefSrcReg ? DefSrcReg->MI : nullptr;
@@ -400,8 +400,8 @@ Register llvm::getSrcRegIgnoringCopies(Register Reg,
   return DefSrcReg ? DefSrcReg->Reg : Register();
 }
 
-MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg, 
-                                 const MachineRegisterInfo &MRI) { 
+MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg,
+                                 const MachineRegisterInfo &MRI) {
   MachineInstr *DefMI = getDefIgnoringCopies(Reg, MRI);
   return DefMI && DefMI->getOpcode() == Opcode ? DefMI : nullptr;
 }
@@ -430,8 +430,8 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
   if (!MaybeOp1Cst)
     return None;
 
-  const APInt &C1 = *MaybeOp1Cst; 
-  const APInt &C2 = *MaybeOp2Cst; 
+  const APInt &C1 = *MaybeOp1Cst;
+  const APInt &C2 = *MaybeOp2Cst;
   switch (Opcode) {
   default:
     break;
@@ -480,8 +480,8 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
   if (!DefMI)
     return false;
 
-  const TargetMachine& TM = DefMI->getMF()->getTarget(); 
-  if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) 
+  const TargetMachine& TM = DefMI->getMF()->getTarget();
+  if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
     return true;
 
   if (SNaN) {
@@ -512,40 +512,40 @@ Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
   return Align(1);
 }
 
-Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF, 
-                                        const TargetInstrInfo &TII, 
-                                        MCRegister PhysReg, 
-                                        const TargetRegisterClass &RC, 
-                                        LLT RegTy) { 
-  DebugLoc DL; // FIXME: Is no location the right choice? 
-  MachineBasicBlock &EntryMBB = MF.front(); 
-  MachineRegisterInfo &MRI = MF.getRegInfo(); 
-  Register LiveIn = MRI.getLiveInVirtReg(PhysReg); 
-  if (LiveIn) { 
-    MachineInstr *Def = MRI.getVRegDef(LiveIn); 
-    if (Def) { 
-      // FIXME: Should the verifier check this is in the entry block? 
-      assert(Def->getParent() == &EntryMBB && "live-in copy not in entry block"); 
-      return LiveIn; 
-    } 
- 
-    // It's possible the incoming argument register and copy was added during 
-    // lowering, but later deleted due to being/becoming dead. If this happens, 
-    // re-insert the copy. 
-  } else { 
-    // The live in register was not present, so add it. 
-    LiveIn = MF.addLiveIn(PhysReg, &RC); 
-    if (RegTy.isValid()) 
-      MRI.setType(LiveIn, RegTy); 
-  } 
- 
-  BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(TargetOpcode::COPY), LiveIn) 
-    .addReg(PhysReg); 
-  if (!EntryMBB.isLiveIn(PhysReg)) 
-    EntryMBB.addLiveIn(PhysReg); 
-  return LiveIn; 
-} 
- 
+Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF,
+                                        const TargetInstrInfo &TII,
+                                        MCRegister PhysReg,
+                                        const TargetRegisterClass &RC,
+                                        LLT RegTy) {
+  DebugLoc DL; // FIXME: Is no location the right choice?
+  MachineBasicBlock &EntryMBB = MF.front();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  Register LiveIn = MRI.getLiveInVirtReg(PhysReg);
+  if (LiveIn) {
+    MachineInstr *Def = MRI.getVRegDef(LiveIn);
+    if (Def) {
+      // FIXME: Should the verifier check this is in the entry block?
+      assert(Def->getParent() == &EntryMBB && "live-in copy not in entry block");
+      return LiveIn;
+    }
+
+    // It's possible the incoming argument register and copy was added during
+    // lowering, but later deleted due to being/becoming dead. If this happens,
+    // re-insert the copy.
+  } else {
+    // The live in register was not present, so add it.
+    LiveIn = MF.addLiveIn(PhysReg, &RC);
+    if (RegTy.isValid())
+      MRI.setType(LiveIn, RegTy);
+  }
+
+  BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(TargetOpcode::COPY), LiveIn)
+    .addReg(PhysReg);
+  if (!EntryMBB.isLiveIn(PhysReg))
+    EntryMBB.addLiveIn(PhysReg);
+  return LiveIn;
+}
+
 Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
                                         uint64_t Imm,
                                         const MachineRegisterInfo &MRI) {
@@ -554,262 +554,262 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
     switch (Opcode) {
     default:
       break;
-    case TargetOpcode::G_SEXT_INREG: { 
-      LLT Ty = MRI.getType(Op1); 
-      return MaybeOp1Cst->trunc(Imm).sext(Ty.getScalarSizeInBits()); 
+    case TargetOpcode::G_SEXT_INREG: {
+      LLT Ty = MRI.getType(Op1);
+      return MaybeOp1Cst->trunc(Imm).sext(Ty.getScalarSizeInBits());
+    }
     }
-    } 
   }
   return None;
 }
 
-bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, 
-                                  GISelKnownBits *KB) { 
-  Optional<DefinitionAndSourceRegister> DefSrcReg = 
-      getDefSrcRegIgnoringCopies(Reg, MRI); 
-  if (!DefSrcReg) 
-    return false; 
- 
-  const MachineInstr &MI = *DefSrcReg->MI; 
-  const LLT Ty = MRI.getType(Reg); 
- 
-  switch (MI.getOpcode()) { 
-  case TargetOpcode::G_CONSTANT: { 
-    unsigned BitWidth = Ty.getScalarSizeInBits(); 
-    const ConstantInt *CI = MI.getOperand(1).getCImm(); 
-    return CI->getValue().zextOrTrunc(BitWidth).isPowerOf2(); 
-  } 
-  case TargetOpcode::G_SHL: { 
-    // A left-shift of a constant one will have exactly one bit set because 
-    // shifting the bit off the end is undefined. 
- 
-    // TODO: Constant splat 
-    if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { 
-      if (*ConstLHS == 1) 
-        return true; 
-    } 
- 
-    break; 
-  } 
-  case TargetOpcode::G_LSHR: { 
-    if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { 
-      if (ConstLHS->isSignMask()) 
-        return true; 
-    } 
- 
-    break; 
-  } 
-  default: 
-    break; 
-  } 
- 
-  // TODO: Are all operands of a build vector constant powers of two? 
-  if (!KB) 
-    return false; 
- 
-  // More could be done here, though the above checks are enough 
-  // to handle some common cases. 
- 
-  // Fall back to computeKnownBits to catch other known cases. 
-  KnownBits Known = KB->getKnownBits(Reg); 
-  return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1); 
-} 
- 
+bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
+                                  GISelKnownBits *KB) {
+  Optional<DefinitionAndSourceRegister> DefSrcReg =
+      getDefSrcRegIgnoringCopies(Reg, MRI);
+  if (!DefSrcReg)
+    return false;
+
+  const MachineInstr &MI = *DefSrcReg->MI;
+  const LLT Ty = MRI.getType(Reg);
+
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_CONSTANT: {
+    unsigned BitWidth = Ty.getScalarSizeInBits();
+    const ConstantInt *CI = MI.getOperand(1).getCImm();
+    return CI->getValue().zextOrTrunc(BitWidth).isPowerOf2();
+  }
+  case TargetOpcode::G_SHL: {
+    // A left-shift of a constant one will have exactly one bit set because
+    // shifting the bit off the end is undefined.
+
+    // TODO: Constant splat
+    if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+      if (*ConstLHS == 1)
+        return true;
+    }
+
+    break;
+  }
+  case TargetOpcode::G_LSHR: {
+    if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+      if (ConstLHS->isSignMask())
+        return true;
+    }
+
+    break;
+  }
+  default:
+    break;
+  }
+
+  // TODO: Are all operands of a build vector constant powers of two?
+  if (!KB)
+    return false;
+
+  // More could be done here, though the above checks are enough
+  // to handle some common cases.
+
+  // Fall back to computeKnownBits to catch other known cases.
+  KnownBits Known = KB->getKnownBits(Reg);
+  return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
+}
+
 void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
   AU.addPreserved<StackProtector>();
 }
 
-static unsigned getLCMSize(unsigned OrigSize, unsigned TargetSize) { 
-  unsigned Mul = OrigSize * TargetSize; 
-  unsigned GCDSize = greatestCommonDivisor(OrigSize, TargetSize); 
-  return Mul / GCDSize; 
-} 
-
-LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) { 
-  const unsigned OrigSize = OrigTy.getSizeInBits(); 
-  const unsigned TargetSize = TargetTy.getSizeInBits(); 
- 
-  if (OrigSize == TargetSize) 
-    return OrigTy; 
- 
-  if (OrigTy.isVector()) { 
-    const LLT OrigElt = OrigTy.getElementType(); 
- 
-    if (TargetTy.isVector()) { 
-      const LLT TargetElt = TargetTy.getElementType(); 
- 
-      if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { 
-        int GCDElts = greatestCommonDivisor(OrigTy.getNumElements(), 
-                                            TargetTy.getNumElements()); 
-        // Prefer the original element type. 
-        int Mul = OrigTy.getNumElements() * TargetTy.getNumElements(); 
-        return LLT::vector(Mul / GCDElts, OrigTy.getElementType()); 
-      } 
-    } else { 
-      if (OrigElt.getSizeInBits() == TargetSize) 
-        return OrigTy; 
-    } 
- 
-    unsigned LCMSize = getLCMSize(OrigSize, TargetSize); 
-    return LLT::vector(LCMSize / OrigElt.getSizeInBits(), OrigElt); 
+static unsigned getLCMSize(unsigned OrigSize, unsigned TargetSize) {
+  unsigned Mul = OrigSize * TargetSize;
+  unsigned GCDSize = greatestCommonDivisor(OrigSize, TargetSize);
+  return Mul / GCDSize;
+}
+
+LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
+  const unsigned OrigSize = OrigTy.getSizeInBits();
+  const unsigned TargetSize = TargetTy.getSizeInBits();
+
+  if (OrigSize == TargetSize)
+    return OrigTy;
+
+  if (OrigTy.isVector()) {
+    const LLT OrigElt = OrigTy.getElementType();
+
+    if (TargetTy.isVector()) {
+      const LLT TargetElt = TargetTy.getElementType();
+
+      if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
+        int GCDElts = greatestCommonDivisor(OrigTy.getNumElements(),
+                                            TargetTy.getNumElements());
+        // Prefer the original element type.
+        int Mul = OrigTy.getNumElements() * TargetTy.getNumElements();
+        return LLT::vector(Mul / GCDElts, OrigTy.getElementType());
+      }
+    } else {
+      if (OrigElt.getSizeInBits() == TargetSize)
+        return OrigTy;
+    }
+
+    unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+    return LLT::vector(LCMSize / OrigElt.getSizeInBits(), OrigElt);
   }
 
-  if (TargetTy.isVector()) { 
-    unsigned LCMSize = getLCMSize(OrigSize, TargetSize); 
-    return LLT::vector(LCMSize / OrigSize, OrigTy); 
+  if (TargetTy.isVector()) {
+    unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+    return LLT::vector(LCMSize / OrigSize, OrigTy);
   }
 
-  unsigned LCMSize = getLCMSize(OrigSize, TargetSize); 
-
-  // Preserve pointer types. 
-  if (LCMSize == OrigSize) 
-    return OrigTy; 
-  if (LCMSize == TargetSize) 
-    return TargetTy; 
-
-  return LLT::scalar(LCMSize); 
-} 
- 
-LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { 
-  const unsigned OrigSize = OrigTy.getSizeInBits(); 
-  const unsigned TargetSize = TargetTy.getSizeInBits(); 
- 
-  if (OrigSize == TargetSize) 
-    return OrigTy; 
- 
-  if (OrigTy.isVector()) { 
-    LLT OrigElt = OrigTy.getElementType(); 
-    if (TargetTy.isVector()) { 
-      LLT TargetElt = TargetTy.getElementType(); 
-      if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { 
-        int GCD = greatestCommonDivisor(OrigTy.getNumElements(), 
-                                        TargetTy.getNumElements()); 
-        return LLT::scalarOrVector(GCD, OrigElt); 
-      } 
-    } else { 
-      // If the source is a vector of pointers, return a pointer element. 
-      if (OrigElt.getSizeInBits() == TargetSize) 
-        return OrigElt; 
-    } 
- 
-    unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize); 
-    if (GCD == OrigElt.getSizeInBits()) 
-      return OrigElt; 
- 
-    // If we can't produce the original element type, we have to use a smaller 
-    // scalar. 
-    if (GCD < OrigElt.getSizeInBits()) 
-      return LLT::scalar(GCD); 
-    return LLT::vector(GCD / OrigElt.getSizeInBits(), OrigElt); 
+  unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+
+  // Preserve pointer types.
+  if (LCMSize == OrigSize)
+    return OrigTy;
+  if (LCMSize == TargetSize)
+    return TargetTy;
+
+  return LLT::scalar(LCMSize);
+}
+
+LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
+  const unsigned OrigSize = OrigTy.getSizeInBits();
+  const unsigned TargetSize = TargetTy.getSizeInBits();
+
+  if (OrigSize == TargetSize)
+    return OrigTy;
+
+  if (OrigTy.isVector()) {
+    LLT OrigElt = OrigTy.getElementType();
+    if (TargetTy.isVector()) {
+      LLT TargetElt = TargetTy.getElementType();
+      if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
+        int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
+                                        TargetTy.getNumElements());
+        return LLT::scalarOrVector(GCD, OrigElt);
+      }
+    } else {
+      // If the source is a vector of pointers, return a pointer element.
+      if (OrigElt.getSizeInBits() == TargetSize)
+        return OrigElt;
+    }
+
+    unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize);
+    if (GCD == OrigElt.getSizeInBits())
+      return OrigElt;
+
+    // If we can't produce the original element type, we have to use a smaller
+    // scalar.
+    if (GCD < OrigElt.getSizeInBits())
+      return LLT::scalar(GCD);
+    return LLT::vector(GCD / OrigElt.getSizeInBits(), OrigElt);
+  }
+
+  if (TargetTy.isVector()) {
+    // Try to preserve the original element type.
+    LLT TargetElt = TargetTy.getElementType();
+    if (TargetElt.getSizeInBits() == OrigSize)
+      return OrigTy;
+  }
+
+  unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize);
+  return LLT::scalar(GCD);
+}
+
+Optional<int> llvm::getSplatIndex(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
+         "Only G_SHUFFLE_VECTOR can have a splat index!");
+  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+  auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; });
+
+  // If all elements are undefined, this shuffle can be considered a splat.
+  // Return 0 for better potential for callers to simplify.
+  if (FirstDefinedIdx == Mask.end())
+    return 0;
+
+  // Make sure all remaining elements are either undef or the same
+  // as the first non-undef value.
+  int SplatValue = *FirstDefinedIdx;
+  if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()),
+             [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; }))
+    return None;
+
+  return SplatValue;
+}
+
+static bool isBuildVectorOp(unsigned Opcode) {
+  return Opcode == TargetOpcode::G_BUILD_VECTOR ||
+         Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC;
+}
+
+// TODO: Handle mixed undef elements.
+static bool isBuildVectorConstantSplat(const MachineInstr &MI,
+                                       const MachineRegisterInfo &MRI,
+                                       int64_t SplatValue) {
+  if (!isBuildVectorOp(MI.getOpcode()))
+    return false;
+
+  const unsigned NumOps = MI.getNumOperands();
+  for (unsigned I = 1; I != NumOps; ++I) {
+    Register Element = MI.getOperand(I).getReg();
+    if (!mi_match(Element, MRI, m_SpecificICst(SplatValue)))
+      return false;
   }
 
-  if (TargetTy.isVector()) { 
-    // Try to preserve the original element type. 
-    LLT TargetElt = TargetTy.getElementType(); 
-    if (TargetElt.getSizeInBits() == OrigSize) 
-      return OrigTy; 
-  } 
- 
-  unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize); 
-  return LLT::scalar(GCD); 
-}
-
-Optional<int> llvm::getSplatIndex(MachineInstr &MI) { 
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && 
-         "Only G_SHUFFLE_VECTOR can have a splat index!"); 
-  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 
-  auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; }); 
- 
-  // If all elements are undefined, this shuffle can be considered a splat. 
-  // Return 0 for better potential for callers to simplify. 
-  if (FirstDefinedIdx == Mask.end()) 
-    return 0; 
- 
-  // Make sure all remaining elements are either undef or the same 
-  // as the first non-undef value. 
-  int SplatValue = *FirstDefinedIdx; 
-  if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()), 
-             [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; })) 
-    return None; 
- 
-  return SplatValue; 
-} 
- 
-static bool isBuildVectorOp(unsigned Opcode) { 
-  return Opcode == TargetOpcode::G_BUILD_VECTOR || 
-         Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC; 
-} 
- 
-// TODO: Handle mixed undef elements. 
-static bool isBuildVectorConstantSplat(const MachineInstr &MI, 
-                                       const MachineRegisterInfo &MRI, 
-                                       int64_t SplatValue) { 
-  if (!isBuildVectorOp(MI.getOpcode())) 
-    return false; 
- 
-  const unsigned NumOps = MI.getNumOperands(); 
-  for (unsigned I = 1; I != NumOps; ++I) { 
-    Register Element = MI.getOperand(I).getReg(); 
-    if (!mi_match(Element, MRI, m_SpecificICst(SplatValue))) 
-      return false; 
+  return true;
+}
+
+Optional<int64_t>
+llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
+                                  const MachineRegisterInfo &MRI) {
+  if (!isBuildVectorOp(MI.getOpcode()))
+    return None;
+
+  const unsigned NumOps = MI.getNumOperands();
+  Optional<int64_t> Scalar;
+  for (unsigned I = 1; I != NumOps; ++I) {
+    Register Element = MI.getOperand(I).getReg();
+    int64_t ElementValue;
+    if (!mi_match(Element, MRI, m_ICst(ElementValue)))
+      return None;
+    if (!Scalar)
+      Scalar = ElementValue;
+    else if (*Scalar != ElementValue)
+      return None;
   }
 
-  return true; 
-} 
- 
-Optional<int64_t> 
-llvm::getBuildVectorConstantSplat(const MachineInstr &MI, 
-                                  const MachineRegisterInfo &MRI) { 
-  if (!isBuildVectorOp(MI.getOpcode())) 
-    return None; 
- 
-  const unsigned NumOps = MI.getNumOperands(); 
-  Optional<int64_t> Scalar; 
-  for (unsigned I = 1; I != NumOps; ++I) { 
-    Register Element = MI.getOperand(I).getReg(); 
-    int64_t ElementValue; 
-    if (!mi_match(Element, MRI, m_ICst(ElementValue))) 
-      return None; 
-    if (!Scalar) 
-      Scalar = ElementValue; 
-    else if (*Scalar != ElementValue) 
-      return None; 
+  return Scalar;
+}
+
+bool llvm::isBuildVectorAllZeros(const MachineInstr &MI,
+                                 const MachineRegisterInfo &MRI) {
+  return isBuildVectorConstantSplat(MI, MRI, 0);
+}
+
+bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
+                                const MachineRegisterInfo &MRI) {
+  return isBuildVectorConstantSplat(MI, MRI, -1);
+}
+
+bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
+                          bool IsFP) {
+  switch (TLI.getBooleanContents(IsVector, IsFP)) {
+  case TargetLowering::UndefinedBooleanContent:
+    return Val & 0x1;
+  case TargetLowering::ZeroOrOneBooleanContent:
+    return Val == 1;
+  case TargetLowering::ZeroOrNegativeOneBooleanContent:
+    return Val == -1;
   }
+  llvm_unreachable("Invalid boolean contents");
+}
 
-  return Scalar; 
-} 
-
-bool llvm::isBuildVectorAllZeros(const MachineInstr &MI, 
-                                 const MachineRegisterInfo &MRI) { 
-  return isBuildVectorConstantSplat(MI, MRI, 0); 
-}
- 
-bool llvm::isBuildVectorAllOnes(const MachineInstr &MI, 
-                                const MachineRegisterInfo &MRI) { 
-  return isBuildVectorConstantSplat(MI, MRI, -1); 
-} 
- 
-bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, 
-                          bool IsFP) { 
-  switch (TLI.getBooleanContents(IsVector, IsFP)) { 
-  case TargetLowering::UndefinedBooleanContent: 
-    return Val & 0x1; 
-  case TargetLowering::ZeroOrOneBooleanContent: 
-    return Val == 1; 
-  case TargetLowering::ZeroOrNegativeOneBooleanContent: 
-    return Val == -1; 
-  } 
-  llvm_unreachable("Invalid boolean contents"); 
-} 
- 
-int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector, 
-                             bool IsFP) { 
-  switch (TLI.getBooleanContents(IsVector, IsFP)) { 
-  case TargetLowering::UndefinedBooleanContent: 
-  case TargetLowering::ZeroOrOneBooleanContent: 
-    return 1; 
-  case TargetLowering::ZeroOrNegativeOneBooleanContent: 
-    return -1; 
-  } 
-  llvm_unreachable("Invalid boolean contents"); 
-} 
+int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector,
+                             bool IsFP) {
+  switch (TLI.getBooleanContents(IsVector, IsFP)) {
+  case TargetLowering::UndefinedBooleanContent:
+  case TargetLowering::ZeroOrOneBooleanContent:
+    return 1;
+  case TargetLowering::ZeroOrNegativeOneBooleanContent:
+    return -1;
+  }
+  llvm_unreachable("Invalid boolean contents");
+}
diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make
index 6ede6da277..e6de0fe8d9 100644
--- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make
+++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make
@@ -12,16 +12,16 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
-    contrib/libs/llvm12 
-    contrib/libs/llvm12/include 
-    contrib/libs/llvm12/lib/Analysis 
-    contrib/libs/llvm12/lib/CodeGen 
-    contrib/libs/llvm12/lib/CodeGen/SelectionDAG 
-    contrib/libs/llvm12/lib/IR 
-    contrib/libs/llvm12/lib/MC 
-    contrib/libs/llvm12/lib/Support 
-    contrib/libs/llvm12/lib/Target 
-    contrib/libs/llvm12/lib/Transforms/Utils 
+    contrib/libs/llvm12
+    contrib/libs/llvm12/include
+    contrib/libs/llvm12/lib/Analysis
+    contrib/libs/llvm12/lib/CodeGen
+    contrib/libs/llvm12/lib/CodeGen/SelectionDAG
+    contrib/libs/llvm12/lib/IR
+    contrib/libs/llvm12/lib/MC
+    contrib/libs/llvm12/lib/Support
+    contrib/libs/llvm12/lib/Target
+    contrib/libs/llvm12/lib/Transforms/Utils
 )
 
 ADDINCL(
author	shadchin <shadchin@yandex-team.ru>	2022-02-10 16:44:39 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:44:39 +0300
commit	e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree	64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/CodeGen/GlobalISel
parent	2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
download	ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz