Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.

author: shadchin <shadchin@yandex-team.ru> 2022-02-10 16:44:30 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:44:30 +0300
commit: 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree: 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/AArch64/GISel
parent: 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
download: ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
12 files changed, 2496 insertions, 2496 deletions
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 0f8b1d6584..7b05f70a73 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -52,10 +52,10 @@ AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
   : CallLowering(&TLI) {}
 
 namespace {
-struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
+struct IncomingArgHandler : public CallLowering::IncomingValueHandler { 
   IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
                      CCAssignFn *AssignFn)
-      : IncomingValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
+      : IncomingValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {} 
 
   Register getStackAddress(uint64_t Size, int64_t Offset,
                            MachinePointerInfo &MPO) override {
@@ -101,7 +101,7 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
   /// How the physical register gets marked varies between formal
   /// parameters (it's a basic-block live-in), and a call instruction
   /// (it's an implicit-def of the BL).
-  virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
+  virtual void markPhysRegUsed(MCRegister PhysReg) = 0; 
 
   uint64_t StackUsed;
 };
@@ -111,7 +111,7 @@ struct FormalArgHandler : public IncomingArgHandler {
                    CCAssignFn *AssignFn)
     : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
 
-  void markPhysRegUsed(MCRegister PhysReg) override {
+  void markPhysRegUsed(MCRegister PhysReg) override { 
     MIRBuilder.getMRI()->addLiveIn(PhysReg);
     MIRBuilder.getMBB().addLiveIn(PhysReg);
   }
@@ -122,19 +122,19 @@ struct CallReturnHandler : public IncomingArgHandler {
                     MachineInstrBuilder MIB, CCAssignFn *AssignFn)
     : IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
 
-  void markPhysRegUsed(MCRegister PhysReg) override {
+  void markPhysRegUsed(MCRegister PhysReg) override { 
     MIB.addDef(PhysReg, RegState::Implicit);
   }
 
   MachineInstrBuilder MIB;
 };
 
-struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
+struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { 
   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
                      MachineInstrBuilder MIB, CCAssignFn *AssignFn,
                      CCAssignFn *AssignFnVarArg, bool IsTailCall = false,
                      int FPDiff = 0)
-      : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
+      : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), 
         AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
         StackSize(0), SPReg(0) {}
 
@@ -187,8 +187,8 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
     if (!Arg.IsFixed)
       MaxSize = 0;
 
-    assert(Arg.Regs.size() == 1);
-
+    assert(Arg.Regs.size() == 1); 
+ 
     Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
                            ? extendRegister(Arg.Regs[0], VA, MaxSize)
                            : Arg.Regs[0];
@@ -274,7 +274,7 @@ void AArch64CallLowering::splitToValueTypes(
 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
                                       const Value *Val,
                                       ArrayRef<Register> VRegs,
-                                      FunctionLoweringInfo &FLI,
+                                      FunctionLoweringInfo &FLI, 
                                       Register SwiftErrorVReg) const {
   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
@@ -420,7 +420,7 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
   // Conservatively forward X8, since it might be used for an aggregate
   // return.
   if (!CCInfo.isAllocated(AArch64::X8)) {
-    Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
+    Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass); 
     Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
   }
 
@@ -441,7 +441,7 @@ bool AArch64CallLowering::fallBackToDAGISel(const Function &F) const {
 
 bool AArch64CallLowering::lowerFormalArguments(
     MachineIRBuilder &MIRBuilder, const Function &F,
-    ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
+    ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const { 
   MachineFunction &MF = MIRBuilder.getMF();
   MachineBasicBlock &MBB = MIRBuilder.getMBB();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -623,25 +623,25 @@ bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
   const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
-  if (Info.IsVarArg) {
-    // Be conservative and disallow variadic memory operands to match SDAG's
-    // behaviour.
-    // FIXME: If the caller's calling convention is C, then we can
-    // potentially use its argument area. However, for cases like fastcc,
-    // we can't do anything.
-    for (unsigned i = 0; i < OutLocs.size(); ++i) {
-      auto &ArgLoc = OutLocs[i];
-      if (ArgLoc.isRegLoc())
-        continue;
+  if (Info.IsVarArg) { 
+    // Be conservative and disallow variadic memory operands to match SDAG's 
+    // behaviour. 
+    // FIXME: If the caller's calling convention is C, then we can 
+    // potentially use its argument area. However, for cases like fastcc, 
+    // we can't do anything. 
+    for (unsigned i = 0; i < OutLocs.size(); ++i) { 
+      auto &ArgLoc = OutLocs[i]; 
+      if (ArgLoc.isRegLoc()) 
+        continue; 
 
       LLVM_DEBUG(
           dbgs()
-          << "... Cannot tail call vararg function with stack arguments\n");
+          << "... Cannot tail call vararg function with stack arguments\n"); 
       return false;
     }
   }
 
-  return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
+  return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs); 
 }
 
 bool AArch64CallLowering::isEligibleForTailCallOptimization(
@@ -756,7 +756,7 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
 
   // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
   // x16 or x17.
-  if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
+  if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 
     return AArch64::TCRETURNriBTI;
 
   return AArch64::TCRETURNri;
@@ -776,7 +776,7 @@ bool AArch64CallLowering::lowerTailCall(
 
   // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
   // register class. Until we can do that, we should fall back here.
-  if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
+  if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) { 
     LLVM_DEBUG(
         dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
     return false;
@@ -894,9 +894,9 @@ bool AArch64CallLowering::lowerTailCall(
   // If Callee is a reg, since it is used by a target specific instruction,
   // it must have a register class matching the constraint of that instruction.
   if (Info.Callee.isReg())
-    constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
-                             *MF.getSubtarget().getRegBankInfo(), *MIB,
-                             MIB->getDesc(), Info.Callee, 0);
+    constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), 
+                             *MF.getSubtarget().getRegBankInfo(), *MIB, 
+                             MIB->getDesc(), Info.Callee, 0); 
 
   MF.getFrameInfo().setHasTailCall();
   Info.LoweredTailCall = true;
@@ -978,9 +978,9 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   // instruction, it must have a register class matching the
   // constraint of that instruction.
   if (Info.Callee.isReg())
-    constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
-                             *MF.getSubtarget().getRegBankInfo(), *MIB,
-                             MIB->getDesc(), Info.Callee, 0);
+    constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), 
+                             *MF.getSubtarget().getRegBankInfo(), *MIB, 
+                             MIB->getDesc(), Info.Callee, 0); 
 
   // Finally we can copy the returned value back into its virtual-register. In
   // symmetry with the arguments, the physical register must be an
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h
index 1f45c9ebc0..8054cf6b99 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h
@@ -34,14 +34,14 @@ public:
   AArch64CallLowering(const AArch64TargetLowering &TLI);
 
   bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
-                   ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI,
+                   ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI, 
                    Register SwiftErrorVReg) const override;
 
   bool fallBackToDAGISel(const Function &F) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
-                            ArrayRef<ArrayRef<Register>> VRegs,
-                            FunctionLoweringInfo &FLI) const override;
+                            ArrayRef<ArrayRef<Register>> VRegs, 
+                            FunctionLoweringInfo &FLI) const override; 
 
   bool lowerCall(MachineIRBuilder &MIRBuilder,
                  CallLoweringInfo &Info) const override;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
index bed1136c7a..9536f0a596 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
@@ -1,29 +1,29 @@
-//===- AArch64GlobalISelUtils.h ----------------------------------*- C++ -*-==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file APIs for AArch64-specific helper functions used in the GlobalISel
-/// pipeline.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
-#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
-
-#include <cstdint>
-
-namespace llvm {
-namespace AArch64GISelUtils {
-
-/// \returns true if \p C is a legal immediate operand for an arithmetic
-/// instruction.
-constexpr bool isLegalArithImmed(const uint64_t C) {
-  return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
-}
-
-} // namespace AArch64GISelUtils
-} // namespace llvm
-
-#endif
+//===- AArch64GlobalISelUtils.h ----------------------------------*- C++ -*-==// 
+// 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
+// See https://llvm.org/LICENSE.txt for license information. 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
+// 
+//===----------------------------------------------------------------------===// 
+/// \file APIs for AArch64-specific helper functions used in the GlobalISel 
+/// pipeline. 
+//===----------------------------------------------------------------------===// 
+ 
+#ifndef LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H 
+#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H 
+ 
+#include <cstdint> 
+ 
+namespace llvm { 
+namespace AArch64GISelUtils { 
+ 
+/// \returns true if \p C is a legal immediate operand for an arithmetic 
+/// instruction. 
+constexpr bool isLegalArithImmed(const uint64_t C) { 
+  return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); 
+} 
+ 
+} // namespace AArch64GISelUtils 
+} // namespace llvm 
+ 
+#endif 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index fc5ef02e84..72f92065f3 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -18,7 +18,7 @@
 #include "AArch64Subtarget.h"
 #include "AArch64TargetMachine.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h" 
 #include "llvm/ADT/Optional.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
@@ -34,18 +34,18 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Instructions.h" 
+#include "llvm/IR/PatternMatch.h" 
 #include "llvm/IR/Type.h"
 #include "llvm/IR/IntrinsicsAArch64.h"
-#include "llvm/Pass.h"
+#include "llvm/Pass.h" 
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 #define DEBUG_TYPE "aarch64-isel"
 
 using namespace llvm;
-using namespace MIPatternMatch;
+using namespace MIPatternMatch; 
 
 namespace {
 
@@ -103,23 +103,23 @@ private:
   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
                            MachineRegisterInfo &MRI) const;
 
-  ///@{
-  /// Helper functions for selectCompareBranch.
-  bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
-                                    MachineIRBuilder &MIB) const;
-  bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
-                                    MachineIRBuilder &MIB) const;
-  bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
-                                    MachineIRBuilder &MIB) const;
-  bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
+  ///@{ 
+  /// Helper functions for selectCompareBranch. 
+  bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, 
+                                    MachineIRBuilder &MIB) const; 
+  bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, 
+                                    MachineIRBuilder &MIB) const; 
+  bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, 
+                                    MachineIRBuilder &MIB) const; 
+  bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, 
                                   MachineBasicBlock *DstMBB,
                                   MachineIRBuilder &MIB) const;
-  ///@}
-
+  ///@} 
+ 
   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
                            MachineRegisterInfo &MRI) const;
 
-  bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const; 
   bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
 
   // Helper to generate an equivalent of scalar_to_vector into a new register,
@@ -160,7 +160,7 @@ private:
   bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
   bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
-  bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const; 
 
   unsigned emitConstantPoolEntry(const Constant *CPVal,
                                  MachineFunction &MF) const;
@@ -173,72 +173,72 @@ private:
                                  MachineIRBuilder &MIRBuilder) const;
 
   // Emit an integer compare between LHS and RHS, which checks for Predicate.
-  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
-                                   MachineOperand &Predicate,
-                                   MachineIRBuilder &MIRBuilder) const;
-
-  /// Emit a floating point comparison between \p LHS and \p RHS.
-  /// \p Pred if given is the intended predicate to use.
-  MachineInstr *emitFPCompare(Register LHS, Register RHS,
-                              MachineIRBuilder &MIRBuilder,
-                              Optional<CmpInst::Predicate> = None) const;
-
-  MachineInstr *emitInstr(unsigned Opcode,
-                          std::initializer_list<llvm::DstOp> DstOps,
-                          std::initializer_list<llvm::SrcOp> SrcOps,
-                          MachineIRBuilder &MIRBuilder,
-                          const ComplexRendererFns &RenderFns = None) const;
-  /// Helper function to emit an add or sub instruction.
-  ///
-  /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
-  /// in a specific order.
-  ///
-  /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
-  ///
-  /// \code
-  ///   const std::array<std::array<unsigned, 2>, 4> Table {
-  ///    {{AArch64::ADDXri, AArch64::ADDWri},
-  ///     {AArch64::ADDXrs, AArch64::ADDWrs},
-  ///     {AArch64::ADDXrr, AArch64::ADDWrr},
-  ///     {AArch64::SUBXri, AArch64::SUBWri},
-  ///     {AArch64::ADDXrx, AArch64::ADDWrx}}};
-  /// \endcode
-  ///
-  /// Each row in the table corresponds to a different addressing mode. Each
-  /// column corresponds to a different register size.
-  ///
-  /// \attention Rows must be structured as follows:
-  ///   - Row 0: The ri opcode variants
-  ///   - Row 1: The rs opcode variants
-  ///   - Row 2: The rr opcode variants
-  ///   - Row 3: The ri opcode variants for negative immediates
-  ///   - Row 4: The rx opcode variants
-  ///
-  /// \attention Columns must be structured as follows:
-  ///   - Column 0: The 64-bit opcode variants
-  ///   - Column 1: The 32-bit opcode variants
-  ///
-  /// \p Dst is the destination register of the binop to emit.
-  /// \p LHS is the left-hand operand of the binop to emit.
-  /// \p RHS is the right-hand operand of the binop to emit.
-  MachineInstr *emitAddSub(
-      const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
-      Register Dst, MachineOperand &LHS, MachineOperand &RHS,
-      MachineIRBuilder &MIRBuilder) const;
-  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
-                        MachineOperand &RHS,
+  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 
+                                   MachineOperand &Predicate, 
+                                   MachineIRBuilder &MIRBuilder) const; 
+ 
+  /// Emit a floating point comparison between \p LHS and \p RHS. 
+  /// \p Pred if given is the intended predicate to use. 
+  MachineInstr *emitFPCompare(Register LHS, Register RHS, 
+                              MachineIRBuilder &MIRBuilder, 
+                              Optional<CmpInst::Predicate> = None) const; 
+ 
+  MachineInstr *emitInstr(unsigned Opcode, 
+                          std::initializer_list<llvm::DstOp> DstOps, 
+                          std::initializer_list<llvm::SrcOp> SrcOps, 
+                          MachineIRBuilder &MIRBuilder, 
+                          const ComplexRendererFns &RenderFns = None) const; 
+  /// Helper function to emit an add or sub instruction. 
+  /// 
+  /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above 
+  /// in a specific order. 
+  /// 
+  /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. 
+  /// 
+  /// \code 
+  ///   const std::array<std::array<unsigned, 2>, 4> Table { 
+  ///    {{AArch64::ADDXri, AArch64::ADDWri}, 
+  ///     {AArch64::ADDXrs, AArch64::ADDWrs}, 
+  ///     {AArch64::ADDXrr, AArch64::ADDWrr}, 
+  ///     {AArch64::SUBXri, AArch64::SUBWri}, 
+  ///     {AArch64::ADDXrx, AArch64::ADDWrx}}}; 
+  /// \endcode 
+  /// 
+  /// Each row in the table corresponds to a different addressing mode. Each 
+  /// column corresponds to a different register size. 
+  /// 
+  /// \attention Rows must be structured as follows: 
+  ///   - Row 0: The ri opcode variants 
+  ///   - Row 1: The rs opcode variants 
+  ///   - Row 2: The rr opcode variants 
+  ///   - Row 3: The ri opcode variants for negative immediates 
+  ///   - Row 4: The rx opcode variants 
+  /// 
+  /// \attention Columns must be structured as follows: 
+  ///   - Column 0: The 64-bit opcode variants 
+  ///   - Column 1: The 32-bit opcode variants 
+  /// 
+  /// \p Dst is the destination register of the binop to emit. 
+  /// \p LHS is the left-hand operand of the binop to emit. 
+  /// \p RHS is the right-hand operand of the binop to emit. 
+  MachineInstr *emitAddSub( 
+      const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, 
+      Register Dst, MachineOperand &LHS, MachineOperand &RHS, 
+      MachineIRBuilder &MIRBuilder) const; 
+  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, 
+                        MachineOperand &RHS, 
                         MachineIRBuilder &MIRBuilder) const;
-  MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
-                         MachineIRBuilder &MIRBuilder) const;
-  MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
-                         MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 
+                         MachineIRBuilder &MIRBuilder) const; 
+  MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 
+                         MachineIRBuilder &MIRBuilder) const; 
   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
                         MachineIRBuilder &MIRBuilder) const;
-  MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
+  MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, 
                         MachineIRBuilder &MIRBuilder) const;
-  MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
-                           AArch64CC::CondCode CC,
-                           MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, 
+                           AArch64CC::CondCode CC, 
+                           MachineIRBuilder &MIRBuilder) const; 
   MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
                                      const RegisterBank &DstRB, LLT ScalarTy,
                                      Register VecReg, unsigned LaneIdx,
@@ -250,25 +250,25 @@ private:
   MachineInstr *emitFMovForFConstant(MachineInstr &MI,
                                      MachineRegisterInfo &MRI) const;
 
-  /// Emit a CSet for an integer compare.
-  ///
-  /// \p DefReg is expected to be a 32-bit scalar register.
+  /// Emit a CSet for an integer compare. 
+  /// 
+  /// \p DefReg is expected to be a 32-bit scalar register. 
   MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
                                 MachineIRBuilder &MIRBuilder) const;
-  /// Emit a CSet for a FP compare.
-  ///
-  /// \p Dst is expected to be a 32-bit scalar register.
-  MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
-                                MachineIRBuilder &MIRBuilder) const;
-
-  /// Emit the overflow op for \p Opcode.
-  ///
-  /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
-  /// G_USUBO, etc.
-  std::pair<MachineInstr *, AArch64CC::CondCode>
-  emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
-                 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
-
+  /// Emit a CSet for a FP compare. 
+  /// 
+  /// \p Dst is expected to be a 32-bit scalar register. 
+  MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, 
+                                MachineIRBuilder &MIRBuilder) const; 
+
+  /// Emit the overflow op for \p Opcode. 
+  /// 
+  /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, 
+  /// G_USUBO, etc. 
+  std::pair<MachineInstr *, AArch64CC::CondCode> 
+  emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, 
+                 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; 
+ 
   /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
   /// \p IsNegative is true if the test should be "not zero".
   /// This will also optimize the test bit instruction when possible.
@@ -276,11 +276,11 @@ private:
                             MachineBasicBlock *DstMBB,
                             MachineIRBuilder &MIB) const;
 
-  /// Emit a CB(N)Z instruction which branches to \p DestMBB.
-  MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
-                        MachineBasicBlock *DestMBB,
-                        MachineIRBuilder &MIB) const;
-
+  /// Emit a CB(N)Z instruction which branches to \p DestMBB. 
+  MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, 
+                        MachineBasicBlock *DestMBB, 
+                        MachineIRBuilder &MIB) const; 
+ 
   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
   // We use these manually instead of using the importer since it doesn't
   // support SDNodeXForm.
@@ -577,7 +577,7 @@ static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
         getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
     if (!ValAndVReg)
       return None;
-    Immed = ValAndVReg->Value.getSExtValue();
+    Immed = ValAndVReg->Value.getSExtValue(); 
   } else
     return None;
   return Immed;
@@ -865,7 +865,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
 #ifndef NDEBUG
     ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
     assert(ValidCopy && "Invalid copy.");
-    (void)KnownValid;
+    (void)KnownValid; 
 #endif
     return ValidCopy;
   };
@@ -1012,173 +1012,173 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
   return GenericOpc;
 }
 
-MachineInstr *
-AArch64InstructionSelector::emitSelect(Register Dst, Register True,
-                                       Register False, AArch64CC::CondCode CC,
-                                       MachineIRBuilder &MIB) const {
-  MachineRegisterInfo &MRI = *MIB.getMRI();
-  assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
-             RBI.getRegBank(True, MRI, TRI)->getID() &&
-         "Expected both select operands to have the same regbank?");
-  LLT Ty = MRI.getType(True);
-  if (Ty.isVector())
-    return nullptr;
-  const unsigned Size = Ty.getSizeInBits();
-  assert((Size == 32 || Size == 64) &&
-         "Expected 32 bit or 64 bit select only?");
-  const bool Is32Bit = Size == 32;
-  if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
-    unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
-    auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
-    constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
-    return &*FCSel;
-  }
-
-  // By default, we'll try and emit a CSEL.
-  unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
-  bool Optimized = false;
-  auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
-                                 &Optimized](Register &Reg, Register &OtherReg,
-                                             bool Invert) {
-    if (Optimized)
-      return false;
-
-    // Attempt to fold:
-    //
-    // %sub = G_SUB 0, %x
-    // %select = G_SELECT cc, %reg, %sub
-    //
-    // Into:
-    // %select = CSNEG %reg, %x, cc
-    Register MatchReg;
-    if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
-      Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
-      Reg = MatchReg;
-      if (Invert) {
-        CC = AArch64CC::getInvertedCondCode(CC);
-        std::swap(Reg, OtherReg);
-      }
-      return true;
-    }
-
-    // Attempt to fold:
-    //
-    // %xor = G_XOR %x, -1
-    // %select = G_SELECT cc, %reg, %xor
-    //
-    // Into:
-    // %select = CSINV %reg, %x, cc
-    if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
-      Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
-      Reg = MatchReg;
-      if (Invert) {
-        CC = AArch64CC::getInvertedCondCode(CC);
-        std::swap(Reg, OtherReg);
-      }
-      return true;
-    }
-
-    // Attempt to fold:
-    //
-    // %add = G_ADD %x, 1
-    // %select = G_SELECT cc, %reg, %add
-    //
-    // Into:
-    // %select = CSINC %reg, %x, cc
-    if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
-      Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
-      Reg = MatchReg;
-      if (Invert) {
-        CC = AArch64CC::getInvertedCondCode(CC);
-        std::swap(Reg, OtherReg);
-      }
-      return true;
-    }
-
+MachineInstr * 
+AArch64InstructionSelector::emitSelect(Register Dst, Register True, 
+                                       Register False, AArch64CC::CondCode CC, 
+                                       MachineIRBuilder &MIB) const { 
+  MachineRegisterInfo &MRI = *MIB.getMRI(); 
+  assert(RBI.getRegBank(False, MRI, TRI)->getID() == 
+             RBI.getRegBank(True, MRI, TRI)->getID() && 
+         "Expected both select operands to have the same regbank?"); 
+  LLT Ty = MRI.getType(True); 
+  if (Ty.isVector()) 
+    return nullptr; 
+  const unsigned Size = Ty.getSizeInBits(); 
+  assert((Size == 32 || Size == 64) && 
+         "Expected 32 bit or 64 bit select only?"); 
+  const bool Is32Bit = Size == 32; 
+  if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { 
+    unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; 
+    auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); 
+    constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI); 
+    return &*FCSel; 
+  } 
+
+  // By default, we'll try and emit a CSEL. 
+  unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; 
+  bool Optimized = false; 
+  auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, 
+                                 &Optimized](Register &Reg, Register &OtherReg, 
+                                             bool Invert) { 
+    if (Optimized) 
+      return false; 
+
+    // Attempt to fold: 
+    // 
+    // %sub = G_SUB 0, %x 
+    // %select = G_SELECT cc, %reg, %sub 
+    // 
+    // Into: 
+    // %select = CSNEG %reg, %x, cc 
+    Register MatchReg; 
+    if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) { 
+      Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; 
+      Reg = MatchReg; 
+      if (Invert) { 
+        CC = AArch64CC::getInvertedCondCode(CC); 
+        std::swap(Reg, OtherReg); 
+      } 
+      return true; 
+    } 
+ 
+    // Attempt to fold: 
+    // 
+    // %xor = G_XOR %x, -1 
+    // %select = G_SELECT cc, %reg, %xor 
+    // 
+    // Into: 
+    // %select = CSINV %reg, %x, cc 
+    if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) { 
+      Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 
+      Reg = MatchReg; 
+      if (Invert) { 
+        CC = AArch64CC::getInvertedCondCode(CC); 
+        std::swap(Reg, OtherReg); 
+      } 
+      return true; 
+    } 
+ 
+    // Attempt to fold: 
+    // 
+    // %add = G_ADD %x, 1 
+    // %select = G_SELECT cc, %reg, %add 
+    // 
+    // Into: 
+    // %select = CSINC %reg, %x, cc 
+    if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) { 
+      Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 
+      Reg = MatchReg; 
+      if (Invert) { 
+        CC = AArch64CC::getInvertedCondCode(CC); 
+        std::swap(Reg, OtherReg); 
+      } 
+      return true; 
+    } 
+ 
     return false;
-  };
-
-  // Helper lambda which tries to use CSINC/CSINV for the instruction when its
-  // true/false values are constants.
-  // FIXME: All of these patterns already exist in tablegen. We should be
-  // able to import these.
-  auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
-                          &Optimized]() {
-    if (Optimized)
-      return false;
-    auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
-    auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
-    if (!TrueCst && !FalseCst)
-      return false;
-
-    Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
-    if (TrueCst && FalseCst) {
-      int64_t T = TrueCst->Value.getSExtValue();
-      int64_t F = FalseCst->Value.getSExtValue();
-
-      if (T == 0 && F == 1) {
-        // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
-        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
-        True = ZReg;
-        False = ZReg;
-        return true;
-      }
-
-      if (T == 0 && F == -1) {
-        // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
-        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
-        True = ZReg;
-        False = ZReg;
-        return true;
-      }
-    }
-
-    if (TrueCst) {
-      int64_t T = TrueCst->Value.getSExtValue();
-      if (T == 1) {
-        // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
-        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
-        True = False;
-        False = ZReg;
-        CC = AArch64CC::getInvertedCondCode(CC);
-        return true;
-      }
-
-      if (T == -1) {
-        // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
-        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
-        True = False;
-        False = ZReg;
-        CC = AArch64CC::getInvertedCondCode(CC);
-        return true;
-      }
-    }
-
-    if (FalseCst) {
-      int64_t F = FalseCst->Value.getSExtValue();
-      if (F == 1) {
-        // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
-        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
-        False = ZReg;
-        return true;
-      }
-
-      if (F == -1) {
-        // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
-        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
-        False = ZReg;
-        return true;
-      }
-    }
-    return false;
-  };
-
-  Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
-  Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
-  Optimized |= TryOptSelectCst();
-  auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
-  constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
-  return &*SelectInst;
+  }; 
+ 
+  // Helper lambda which tries to use CSINC/CSINV for the instruction when its 
+  // true/false values are constants. 
+  // FIXME: All of these patterns already exist in tablegen. We should be 
+  // able to import these. 
+  auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, 
+                          &Optimized]() { 
+    if (Optimized) 
+      return false; 
+    auto TrueCst = getConstantVRegValWithLookThrough(True, MRI); 
+    auto FalseCst = getConstantVRegValWithLookThrough(False, MRI); 
+    if (!TrueCst && !FalseCst) 
+      return false; 
+ 
+    Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 
+    if (TrueCst && FalseCst) { 
+      int64_t T = TrueCst->Value.getSExtValue(); 
+      int64_t F = FalseCst->Value.getSExtValue(); 
+ 
+      if (T == 0 && F == 1) { 
+        // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc 
+        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 
+        True = ZReg; 
+        False = ZReg; 
+        return true; 
+      } 
+ 
+      if (T == 0 && F == -1) { 
+        // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc 
+        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 
+        True = ZReg; 
+        False = ZReg; 
+        return true; 
+      } 
+    } 
+ 
+    if (TrueCst) { 
+      int64_t T = TrueCst->Value.getSExtValue(); 
+      if (T == 1) { 
+        // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc 
+        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 
+        True = False; 
+        False = ZReg; 
+        CC = AArch64CC::getInvertedCondCode(CC); 
+        return true; 
+      } 
+ 
+      if (T == -1) { 
+        // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc 
+        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 
+        True = False; 
+        False = ZReg; 
+        CC = AArch64CC::getInvertedCondCode(CC); 
+        return true; 
+      } 
+    } 
+ 
+    if (FalseCst) { 
+      int64_t F = FalseCst->Value.getSExtValue(); 
+      if (F == 1) { 
+        // G_SELECT cc, t, 1 -> CSINC t, zreg, cc 
+        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 
+        False = ZReg; 
+        return true; 
+      } 
+ 
+      if (F == -1) { 
+        // G_SELECT cc, t, -1 -> CSINC t, zreg, cc 
+        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 
+        False = ZReg; 
+        return true; 
+      } 
+    } 
+    return false; 
+  }; 
+ 
+  Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); 
+  Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); 
+  Optimized |= TryOptSelectCst(); 
+  auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); 
+  constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); 
+  return &*SelectInst; 
 }
 
 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
@@ -1308,7 +1308,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
         VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
       }
       if (VRegAndVal)
-        C = VRegAndVal->Value.getSExtValue();
+        C = VRegAndVal->Value.getSExtValue(); 
       break;
     }
     case TargetOpcode::G_ASHR:
@@ -1318,7 +1318,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
       auto VRegAndVal =
           getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
       if (VRegAndVal)
-        C = VRegAndVal->Value.getSExtValue();
+        C = VRegAndVal->Value.getSExtValue(); 
       break;
     }
     }
@@ -1420,9 +1420,9 @@ MachineInstr *AArch64InstructionSelector::emitTestBit(
 }
 
 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
-    MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
-    MachineIRBuilder &MIB) const {
-  assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
+    MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, 
+    MachineIRBuilder &MIB) const { 
+  assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?"); 
   // Given something like this:
   //
   //  %x = ...Something...
@@ -1444,92 +1444,92 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
   // Check if the AND has a constant on its RHS which we can use as a mask.
   // If it's a power of 2, then it's the same as checking a specific bit.
   // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
-  auto MaybeBit = getConstantVRegValWithLookThrough(
-      AndInst.getOperand(2).getReg(), *MIB.getMRI());
-  if (!MaybeBit)
+  auto MaybeBit = getConstantVRegValWithLookThrough( 
+      AndInst.getOperand(2).getReg(), *MIB.getMRI()); 
+  if (!MaybeBit) 
     return false;
 
-  int32_t Bit = MaybeBit->Value.exactLogBase2();
-  if (Bit < 0)
-    return false;
-
-  Register TestReg = AndInst.getOperand(1).getReg();
+  int32_t Bit = MaybeBit->Value.exactLogBase2(); 
+  if (Bit < 0) 
+    return false; 
 
+  Register TestReg = AndInst.getOperand(1).getReg(); 
+ 
   // Emit a TB(N)Z.
   emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
   return true;
 }
 
-MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
-                                                  bool IsNegative,
-                                                  MachineBasicBlock *DestMBB,
-                                                  MachineIRBuilder &MIB) const {
-  assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
-  MachineRegisterInfo &MRI = *MIB.getMRI();
-  assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
-             AArch64::GPRRegBankID &&
-         "Expected GPRs only?");
-  auto Ty = MRI.getType(CompareReg);
-  unsigned Width = Ty.getSizeInBits();
-  assert(!Ty.isVector() && "Expected scalar only?");
-  assert(Width <= 64 && "Expected width to be at most 64?");
-  static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
-                                          {AArch64::CBNZW, AArch64::CBNZX}};
-  unsigned Opc = OpcTable[IsNegative][Width == 64];
-  auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
-  constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
-  return &*BranchMI;
-}
-
-bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
-    MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
-  assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
-  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
-  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
-  // totally clean.  Some of them require two branches to implement.
-  auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
-  emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
-                Pred);
-  AArch64CC::CondCode CC1, CC2;
-  changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
+MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, 
+                                                  bool IsNegative, 
+                                                  MachineBasicBlock *DestMBB, 
+                                                  MachineIRBuilder &MIB) const { 
+  assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!"); 
+  MachineRegisterInfo &MRI = *MIB.getMRI(); 
+  assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() == 
+             AArch64::GPRRegBankID && 
+         "Expected GPRs only?"); 
+  auto Ty = MRI.getType(CompareReg); 
+  unsigned Width = Ty.getSizeInBits(); 
+  assert(!Ty.isVector() && "Expected scalar only?"); 
+  assert(Width <= 64 && "Expected width to be at most 64?"); 
+  static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, 
+                                          {AArch64::CBNZW, AArch64::CBNZX}}; 
+  unsigned Opc = OpcTable[IsNegative][Width == 64]; 
+  auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB); 
+  constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); 
+  return &*BranchMI; 
+} 
+
+bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( 
+    MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { 
+  assert(FCmp.getOpcode() == TargetOpcode::G_FCMP); 
+  assert(I.getOpcode() == TargetOpcode::G_BRCOND); 
+  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't 
+  // totally clean.  Some of them require two branches to implement. 
+  auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate(); 
+  emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB, 
+                Pred); 
+  AArch64CC::CondCode CC1, CC2; 
+  changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2); 
   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
-  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
-  if (CC2 != AArch64CC::AL)
-    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
-  I.eraseFromParent();
-  return true;
-}
-
-bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
-    MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
-  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
-  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
-  // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
-  //
-  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
-  // instructions will not be produced, as they are conditional branch
-  // instructions that do not set flags.
-  if (!ProduceNonFlagSettingCondBr)
+  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); 
+  if (CC2 != AArch64CC::AL) 
+    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); 
+  I.eraseFromParent(); 
+  return true; 
+} 
+ 
+bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( 
+    MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { 
+  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); 
+  assert(I.getOpcode() == TargetOpcode::G_BRCOND); 
+  // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. 
+  // 
+  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 
+  // instructions will not be produced, as they are conditional branch 
+  // instructions that do not set flags. 
+  if (!ProduceNonFlagSettingCondBr) 
     return false;
 
-  MachineRegisterInfo &MRI = *MIB.getMRI();
-  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
-  auto Pred =
-      static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
-  Register LHS = ICmp.getOperand(2).getReg();
-  Register RHS = ICmp.getOperand(3).getReg();
-
-  // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
+  MachineRegisterInfo &MRI = *MIB.getMRI(); 
+  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 
+  auto Pred = 
+      static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate()); 
+  Register LHS = ICmp.getOperand(2).getReg(); 
+  Register RHS = ICmp.getOperand(3).getReg(); 
+ 
+  // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. 
   auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
-  MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
+  MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); 
 
   // When we can emit a TB(N)Z, prefer that.
   //
   // Handle non-commutative condition codes first.
   // Note that we don't want to do this when we have a G_AND because it can
   // become a tst. The tst will make the test bit in the TB(N)Z redundant.
-  if (VRegAndVal && !AndInst) {
-    int64_t C = VRegAndVal->Value.getSExtValue();
+  if (VRegAndVal && !AndInst) { 
+    int64_t C = VRegAndVal->Value.getSExtValue(); 
 
     // When we have a greater-than comparison, we can just test if the msb is
     // zero.
@@ -1550,97 +1550,97 @@ bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
     }
   }
 
-  // Attempt to handle commutative condition codes. Right now, that's only
-  // eq/ne.
-  if (ICmpInst::isEquality(Pred)) {
-    if (!VRegAndVal) {
-      std::swap(RHS, LHS);
-      VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
-      AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
-    }
-
-    if (VRegAndVal && VRegAndVal->Value == 0) {
-      // If there's a G_AND feeding into this branch, try to fold it away by
-      // emitting a TB(N)Z instead.
-      //
-      // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
-      // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
-      // would be redundant.
-      if (AndInst &&
-          tryOptAndIntoCompareBranch(
-              *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
-        I.eraseFromParent();
-        return true;
-      }
-
-      // Otherwise, try to emit a CB(N)Z instead.
-      auto LHSTy = MRI.getType(LHS);
-      if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
-        emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
-        I.eraseFromParent();
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-
-bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
-    MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
-  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
-  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
-  if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
+  // Attempt to handle commutative condition codes. Right now, that's only 
+  // eq/ne. 
+  if (ICmpInst::isEquality(Pred)) { 
+    if (!VRegAndVal) { 
+      std::swap(RHS, LHS); 
+      VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 
+      AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); 
+    } 
+ 
+    if (VRegAndVal && VRegAndVal->Value == 0) { 
+      // If there's a G_AND feeding into this branch, try to fold it away by 
+      // emitting a TB(N)Z instead. 
+      // 
+      // Note: If we have LT, then it *is* possible to fold, but it wouldn't be 
+      // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding 
+      // would be redundant. 
+      if (AndInst && 
+          tryOptAndIntoCompareBranch( 
+              *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) { 
+        I.eraseFromParent(); 
+        return true; 
+      } 
+ 
+      // Otherwise, try to emit a CB(N)Z instead. 
+      auto LHSTy = MRI.getType(LHS); 
+      if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { 
+        emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); 
+        I.eraseFromParent(); 
+        return true; 
+      } 
+    } 
+  }
+
+  return false; 
+} 
+ 
+bool AArch64InstructionSelector::selectCompareBranchFedByICmp( 
+    MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { 
+  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); 
+  assert(I.getOpcode() == TargetOpcode::G_BRCOND); 
+  if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) 
     return true;
-
-  // Couldn't optimize. Emit a compare + a Bcc.
-  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
-  auto PredOp = ICmp.getOperand(1);
-  emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
-  const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
-      static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
-  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
-  I.eraseFromParent();
-  return true;
-}
-
-bool AArch64InstructionSelector::selectCompareBranch(
-    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
-  Register CondReg = I.getOperand(0).getReg();
-  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
-  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
-    CondReg = CCMI->getOperand(1).getReg();
-    CCMI = MRI.getVRegDef(CondReg);
-  }
-
-  // Try to select the G_BRCOND using whatever is feeding the condition if
-  // possible.
-  MachineIRBuilder MIB(I);
-  unsigned CCMIOpc = CCMI->getOpcode();
-  if (CCMIOpc == TargetOpcode::G_FCMP)
-    return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
-  if (CCMIOpc == TargetOpcode::G_ICMP)
-    return selectCompareBranchFedByICmp(I, *CCMI, MIB);
-
-  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
-  // instructions will not be produced, as they are conditional branch
-  // instructions that do not set flags.
-  if (ProduceNonFlagSettingCondBr) {
-    emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
-                I.getOperand(1).getMBB(), MIB);
+ 
+  // Couldn't optimize. Emit a compare + a Bcc. 
+  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 
+  auto PredOp = ICmp.getOperand(1); 
+  emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB); 
+  const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( 
+      static_cast<CmpInst::Predicate>(PredOp.getPredicate())); 
+  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); 
+  I.eraseFromParent(); 
+  return true; 
+} 
+ 
+bool AArch64InstructionSelector::selectCompareBranch( 
+    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 
+  Register CondReg = I.getOperand(0).getReg(); 
+  MachineInstr *CCMI = MRI.getVRegDef(CondReg); 
+  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) { 
+    CondReg = CCMI->getOperand(1).getReg(); 
+    CCMI = MRI.getVRegDef(CondReg); 
+  }
+
+  // Try to select the G_BRCOND using whatever is feeding the condition if 
+  // possible. 
+  MachineIRBuilder MIB(I); 
+  unsigned CCMIOpc = CCMI->getOpcode(); 
+  if (CCMIOpc == TargetOpcode::G_FCMP) 
+    return selectCompareBranchFedByFCmp(I, *CCMI, MIB); 
+  if (CCMIOpc == TargetOpcode::G_ICMP) 
+    return selectCompareBranchFedByICmp(I, *CCMI, MIB); 
+ 
+  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 
+  // instructions will not be produced, as they are conditional branch 
+  // instructions that do not set flags. 
+  if (ProduceNonFlagSettingCondBr) { 
+    emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true, 
+                I.getOperand(1).getMBB(), MIB); 
     I.eraseFromParent();
     return true;
   }
 
-  // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
-  auto TstMI =
-      MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
-  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
-  auto Bcc = MIB.buildInstr(AArch64::Bcc)
-                 .addImm(AArch64CC::EQ)
-                 .addMBB(I.getOperand(1).getMBB());
+  // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. 
+  auto TstMI = 
+      MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); 
+  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 
+  auto Bcc = MIB.buildInstr(AArch64::Bcc) 
+                 .addImm(AArch64CC::EQ) 
+                 .addMBB(I.getOperand(1).getMBB()); 
   I.eraseFromParent();
-  return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
+  return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); 
 }
 
 /// Returns the element immediate value of a vector shift operand if found.
@@ -1661,8 +1661,8 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
       return None;
 
     if (Idx == 1)
-      ImmVal = VRegAndVal->Value.getSExtValue();
-    if (ImmVal != VRegAndVal->Value.getSExtValue())
+      ImmVal = VRegAndVal->Value.getSExtValue(); 
+    if (ImmVal != VRegAndVal->Value.getSExtValue()) 
       return None;
   }
 
@@ -1725,14 +1725,14 @@ bool AArch64InstructionSelector::selectVectorSHL(
     Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
   } else if (Ty == LLT::vector(2, 32)) {
     Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
-  } else if (Ty == LLT::vector(4, 16)) {
-    Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
-  } else if (Ty == LLT::vector(8, 16)) {
-    Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
-  } else if (Ty == LLT::vector(16, 8)) {
-    Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
-  } else if (Ty == LLT::vector(8, 8)) {
-    Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
+  } else if (Ty == LLT::vector(4, 16)) { 
+    Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; 
+  } else if (Ty == LLT::vector(8, 16)) { 
+    Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; 
+  } else if (Ty == LLT::vector(16, 8)) { 
+    Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; 
+  } else if (Ty == LLT::vector(8, 8)) { 
+    Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; 
   } else {
     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
     return false;
@@ -1749,10 +1749,10 @@ bool AArch64InstructionSelector::selectVectorSHL(
   return true;
 }
 
-bool AArch64InstructionSelector::selectVectorAshrLshr(
+bool AArch64InstructionSelector::selectVectorAshrLshr( 
     MachineInstr &I, MachineRegisterInfo &MRI) const {
-  assert(I.getOpcode() == TargetOpcode::G_ASHR ||
-         I.getOpcode() == TargetOpcode::G_LSHR);
+  assert(I.getOpcode() == TargetOpcode::G_ASHR || 
+         I.getOpcode() == TargetOpcode::G_LSHR); 
   Register DstReg = I.getOperand(0).getReg();
   const LLT Ty = MRI.getType(DstReg);
   Register Src1Reg = I.getOperand(1).getReg();
@@ -1761,40 +1761,40 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
   if (!Ty.isVector())
     return false;
 
-  bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
-
-  // We expect the immediate case to be lowered in the PostLegalCombiner to
-  // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
-
+  bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; 
+ 
+  // We expect the immediate case to be lowered in the PostLegalCombiner to 
+  // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. 
+ 
   // There is not a shift right register instruction, but the shift left
   // register instruction takes a signed value, where negative numbers specify a
   // right shift.
 
   unsigned Opc = 0;
   unsigned NegOpc = 0;
-  const TargetRegisterClass *RC =
-      getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
+  const TargetRegisterClass *RC = 
+      getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI); 
   if (Ty == LLT::vector(2, 64)) {
-    Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
+    Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; 
     NegOpc = AArch64::NEGv2i64;
   } else if (Ty == LLT::vector(4, 32)) {
-    Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
+    Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; 
     NegOpc = AArch64::NEGv4i32;
   } else if (Ty == LLT::vector(2, 32)) {
-    Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
+    Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; 
     NegOpc = AArch64::NEGv2i32;
-  } else if (Ty == LLT::vector(4, 16)) {
-    Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
-    NegOpc = AArch64::NEGv4i16;
-  } else if (Ty == LLT::vector(8, 16)) {
-    Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
-    NegOpc = AArch64::NEGv8i16;
-  } else if (Ty == LLT::vector(16, 8)) {
-    Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
-    NegOpc = AArch64::NEGv16i8;
-  } else if (Ty == LLT::vector(8, 8)) {
-    Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
-    NegOpc = AArch64::NEGv8i8;
+  } else if (Ty == LLT::vector(4, 16)) { 
+    Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; 
+    NegOpc = AArch64::NEGv4i16; 
+  } else if (Ty == LLT::vector(8, 16)) { 
+    Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; 
+    NegOpc = AArch64::NEGv8i16; 
+  } else if (Ty == LLT::vector(16, 8)) { 
+    Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; 
+    NegOpc = AArch64::NEGv16i8; 
+  } else if (Ty == LLT::vector(8, 8)) { 
+    Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; 
+    NegOpc = AArch64::NEGv8i8; 
   } else {
     LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
     return false;
@@ -1931,40 +1931,40 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
     MRI.setType(DstReg, LLT::scalar(64));
     return true;
   }
-  case AArch64::G_DUP: {
-    // Convert the type from p0 to s64 to help selection.
-    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
-    if (!DstTy.getElementType().isPointer())
-      return false;
-    MachineIRBuilder MIB(I);
-    auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
-    MRI.setType(I.getOperand(0).getReg(),
-                DstTy.changeElementType(LLT::scalar(64)));
-    MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
-    I.getOperand(1).setReg(NewSrc.getReg(0));
-    return true;
-  }
-  case TargetOpcode::G_UITOFP:
-  case TargetOpcode::G_SITOFP: {
-    // If both source and destination regbanks are FPR, then convert the opcode
-    // to G_SITOF so that the importer can select it to an fpr variant.
-    // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
-    // copy.
-    Register SrcReg = I.getOperand(1).getReg();
-    LLT SrcTy = MRI.getType(SrcReg);
-    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
-    if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
-      return false;
-
-    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
-      if (I.getOpcode() == TargetOpcode::G_SITOFP)
-        I.setDesc(TII.get(AArch64::G_SITOF));
-      else
-        I.setDesc(TII.get(AArch64::G_UITOF));
-      return true;
-    }
-    return false;
-  }
+  case AArch64::G_DUP: { 
+    // Convert the type from p0 to s64 to help selection. 
+    LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 
+    if (!DstTy.getElementType().isPointer()) 
+      return false; 
+    MachineIRBuilder MIB(I); 
+    auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg()); 
+    MRI.setType(I.getOperand(0).getReg(), 
+                DstTy.changeElementType(LLT::scalar(64))); 
+    MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 
+    I.getOperand(1).setReg(NewSrc.getReg(0)); 
+    return true; 
+  } 
+  case TargetOpcode::G_UITOFP: 
+  case TargetOpcode::G_SITOFP: { 
+    // If both source and destination regbanks are FPR, then convert the opcode 
+    // to G_SITOF so that the importer can select it to an fpr variant. 
+    // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank 
+    // copy. 
+    Register SrcReg = I.getOperand(1).getReg(); 
+    LLT SrcTy = MRI.getType(SrcReg); 
+    LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 
+    if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) 
+      return false; 
+ 
+    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { 
+      if (I.getOpcode() == TargetOpcode::G_SITOFP) 
+        I.setDesc(TII.get(AArch64::G_SITOF)); 
+      else 
+        I.setDesc(TII.get(AArch64::G_UITOF)); 
+      return true; 
+    } 
+    return false; 
+  } 
   default:
     return false;
   }
@@ -2005,14 +2005,14 @@ bool AArch64InstructionSelector::convertPtrAddToAdd(
     LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
     return false;
   }
-
-  // Also take the opportunity here to try to do some optimization.
-  // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
-  Register NegatedReg;
-  if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
-    return true;
-  I.getOperand(2).setReg(NegatedReg);
-  I.setDesc(TII.get(TargetOpcode::G_SUB));
+ 
+  // Also take the opportunity here to try to do some optimization. 
+  // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. 
+  Register NegatedReg; 
+  if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg)))) 
+    return true; 
+  I.getOperand(2).setReg(NegatedReg); 
+  I.setDesc(TII.get(TargetOpcode::G_SUB)); 
   return true;
 }
 
@@ -2102,17 +2102,17 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   switch (I.getOpcode()) {
-  case TargetOpcode::G_BR: {
-    // If the branch jumps to the fallthrough block, don't bother emitting it.
-    // Only do this for -O0 for a good code size improvement, because when
-    // optimizations are enabled we want to leave this choice to
-    // MachineBlockPlacement.
-    bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
-    if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
-      return false;
-    I.eraseFromParent();
-    return true;
-  }
+  case TargetOpcode::G_BR: { 
+    // If the branch jumps to the fallthrough block, don't bother emitting it. 
+    // Only do this for -O0 for a good code size improvement, because when 
+    // optimizations are enabled we want to leave this choice to 
+    // MachineBlockPlacement. 
+    bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None; 
+    if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB())) 
+      return false; 
+    I.eraseFromParent(); 
+    return true; 
+  } 
   case TargetOpcode::G_SHL:
     return earlySelectSHL(I, MRI);
   case TargetOpcode::G_CONSTANT: {
@@ -2232,8 +2232,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
   MachineIRBuilder MIB(I);
 
   switch (Opcode) {
-  case TargetOpcode::G_BRCOND:
-    return selectCompareBranch(I, MF, MRI);
+  case TargetOpcode::G_BRCOND: 
+    return selectCompareBranch(I, MF, MRI); 
 
   case TargetOpcode::G_BRINDIRECT: {
     I.setDesc(TII.get(AArch64::BR));
@@ -2313,7 +2313,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     const LLT s16 = LLT::scalar(16);
     const LLT s32 = LLT::scalar(32);
     const LLT s64 = LLT::scalar(64);
-    const LLT s128 = LLT::scalar(128);
+    const LLT s128 = LLT::scalar(128); 
     const LLT p0 = LLT::pointer(0, 64);
 
     const Register DefReg = I.getOperand(0).getReg();
@@ -2323,10 +2323,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
 
     // FIXME: Redundant check, but even less readable when factored out.
     if (isFP) {
-      if (Ty != s32 && Ty != s64 && Ty != s128) {
+      if (Ty != s32 && Ty != s64 && Ty != s128) { 
         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
                           << " constant, expected: " << s32 << " or " << s64
-                          << " or " << s128 << '\n');
+                          << " or " << s128 << '\n'); 
         return false;
       }
 
@@ -2339,9 +2339,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
 
       // The case when we have 0.0 is covered by tablegen. Reject it here so we
       // can be sure tablegen works correctly and isn't rescued by this code.
-      // 0.0 is not covered by tablegen for FP128. So we will handle this 
-      // scenario in the code here.
-      if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
+      // 0.0 is not covered by tablegen for FP128. So we will handle this  
+      // scenario in the code here. 
+      if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0)) 
         return false;
     } else {
       // s32 and s64 are covered by tablegen.
@@ -2368,17 +2368,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
       // Either emit a FMOV, or emit a copy to emit a normal mov.
       const TargetRegisterClass &GPRRC =
           DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
-      const TargetRegisterClass &FPRRC = 
-          DefSize == 32 ? AArch64::FPR32RegClass 
-                        : (DefSize == 64 ? AArch64::FPR64RegClass 
-                                         : AArch64::FPR128RegClass);
+      const TargetRegisterClass &FPRRC =  
+          DefSize == 32 ? AArch64::FPR32RegClass  
+                        : (DefSize == 64 ? AArch64::FPR64RegClass  
+                                         : AArch64::FPR128RegClass); 
 
       // Can we use a FMOV instruction to represent the immediate?
       if (emitFMovForFConstant(I, MRI))
         return true;
 
       // For 64b values, emit a constant pool load instead.
-      if (DefSize == 64 || DefSize == 128) {
+      if (DefSize == 64 || DefSize == 128) { 
         auto *FPImm = I.getOperand(1).getFPImm();
         MachineIRBuilder MIB(I);
         auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
@@ -2571,21 +2571,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     }
 
     auto &MemOp = **I.memoperands_begin();
-    uint64_t MemSizeInBytes = MemOp.getSize();
+    uint64_t MemSizeInBytes = MemOp.getSize(); 
     if (MemOp.isAtomic()) {
       // For now we just support s8 acquire loads to be able to compile stack
       // protector code.
       if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
-          MemSizeInBytes == 1) {
+          MemSizeInBytes == 1) { 
         I.setDesc(TII.get(AArch64::LDARB));
         return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
       }
       LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
       return false;
     }
-    unsigned MemSizeInBits = MemSizeInBytes * 8;
+    unsigned MemSizeInBits = MemSizeInBytes * 8; 
 
-#ifndef NDEBUG
+#ifndef NDEBUG 
     const Register PtrReg = I.getOperand(1).getReg();
     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
     // Sanity-check the pointer register.
@@ -2598,78 +2598,78 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     const Register ValReg = I.getOperand(0).getReg();
     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
 
-    // Helper lambda for partially selecting I. Either returns the original
-    // instruction with an updated opcode, or a new instruction.
-    auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
-      bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
-      const unsigned NewOpc =
-          selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
-      if (NewOpc == I.getOpcode())
-        return nullptr;
-      // Check if we can fold anything into the addressing mode.
-      auto AddrModeFns =
-          selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
-      if (!AddrModeFns) {
-        // Can't fold anything. Use the original instruction.
-        I.setDesc(TII.get(NewOpc));
-        I.addOperand(MachineOperand::CreateImm(0));
-        return &I;
+    // Helper lambda for partially selecting I. Either returns the original 
+    // instruction with an updated opcode, or a new instruction. 
+    auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { 
+      bool IsStore = I.getOpcode() == TargetOpcode::G_STORE; 
+      const unsigned NewOpc = 
+          selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); 
+      if (NewOpc == I.getOpcode()) 
+        return nullptr; 
+      // Check if we can fold anything into the addressing mode. 
+      auto AddrModeFns = 
+          selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); 
+      if (!AddrModeFns) { 
+        // Can't fold anything. Use the original instruction. 
+        I.setDesc(TII.get(NewOpc)); 
+        I.addOperand(MachineOperand::CreateImm(0)); 
+        return &I; 
       }
 
-      // Folded something. Create a new instruction and return it.
-      auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
-      IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
-      NewInst.cloneMemRefs(I);
-      for (auto &Fn : *AddrModeFns)
-        Fn(NewInst);
-      I.eraseFromParent();
-      return &*NewInst;
-    };
+      // Folded something. Create a new instruction and return it. 
+      auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); 
+      IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg); 
+      NewInst.cloneMemRefs(I); 
+      for (auto &Fn : *AddrModeFns) 
+        Fn(NewInst); 
+      I.eraseFromParent(); 
+      return &*NewInst; 
+    }; 
 
-    MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
-    if (!LoadStore)
-      return false;
+    MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); 
+    if (!LoadStore) 
+      return false; 
 
     // If we're storing a 0, use WZR/XZR.
-    if (Opcode == TargetOpcode::G_STORE) {
-      auto CVal = getConstantVRegValWithLookThrough(
-          LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
-          /*HandleFConstants = */ false);
-      if (CVal && CVal->Value == 0) {
-        switch (LoadStore->getOpcode()) {
-        case AArch64::STRWui:
-        case AArch64::STRHHui:
-        case AArch64::STRBBui:
-          LoadStore->getOperand(0).setReg(AArch64::WZR);
-          break;
-        case AArch64::STRXui:
-          LoadStore->getOperand(0).setReg(AArch64::XZR);
-          break;
-        }
+    if (Opcode == TargetOpcode::G_STORE) { 
+      auto CVal = getConstantVRegValWithLookThrough( 
+          LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true, 
+          /*HandleFConstants = */ false); 
+      if (CVal && CVal->Value == 0) { 
+        switch (LoadStore->getOpcode()) { 
+        case AArch64::STRWui: 
+        case AArch64::STRHHui: 
+        case AArch64::STRBBui: 
+          LoadStore->getOperand(0).setReg(AArch64::WZR); 
+          break; 
+        case AArch64::STRXui: 
+          LoadStore->getOperand(0).setReg(AArch64::XZR); 
+          break; 
+        } 
       }
     }
 
     if (IsZExtLoad) {
-      // The zextload from a smaller type to i32 should be handled by the
-      // importer.
-      if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
+      // The zextload from a smaller type to i32 should be handled by the 
+      // importer. 
+      if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) 
         return false;
       // If we have a ZEXTLOAD then change the load's type to be a narrower reg
-      // and zero_extend with SUBREG_TO_REG.
+      // and zero_extend with SUBREG_TO_REG. 
       Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
-      Register DstReg = LoadStore->getOperand(0).getReg();
-      LoadStore->getOperand(0).setReg(LdReg);
+      Register DstReg = LoadStore->getOperand(0).getReg(); 
+      LoadStore->getOperand(0).setReg(LdReg); 
 
-      MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
+      MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator())); 
       MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
           .addImm(0)
           .addUse(LdReg)
           .addImm(AArch64::sub_32);
-      constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
+      constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); 
       return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
                                           MRI);
     }
-    return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
+    return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); 
   }
 
   case TargetOpcode::G_SMULH:
@@ -2700,21 +2700,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     // operands to use appropriate classes.
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
-  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_LSHR: 
   case TargetOpcode::G_ASHR:
     if (MRI.getType(I.getOperand(0).getReg()).isVector())
-      return selectVectorAshrLshr(I, MRI);
+      return selectVectorAshrLshr(I, MRI); 
     LLVM_FALLTHROUGH;
   case TargetOpcode::G_SHL:
     if (Opcode == TargetOpcode::G_SHL &&
         MRI.getType(I.getOperand(0).getReg()).isVector())
       return selectVectorSHL(I, MRI);
     LLVM_FALLTHROUGH;
-  case TargetOpcode::G_FADD:
-  case TargetOpcode::G_FSUB:
-  case TargetOpcode::G_FMUL:
-  case TargetOpcode::G_FDIV:
-  case TargetOpcode::G_OR: {
+  case TargetOpcode::G_FADD: 
+  case TargetOpcode::G_FSUB: 
+  case TargetOpcode::G_FMUL: 
+  case TargetOpcode::G_FDIV: 
+  case TargetOpcode::G_OR: { 
     // Reject the various things we don't support yet.
     if (unsupportedBinOp(I, RBI, MRI, TRI))
       return false;
@@ -2743,24 +2743,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     I.eraseFromParent();
     return true;
   }
-  case TargetOpcode::G_SADDO:
-  case TargetOpcode::G_UADDO:
-  case TargetOpcode::G_SSUBO:
-  case TargetOpcode::G_USUBO: {
-    // Emit the operation and get the correct condition code.
+  case TargetOpcode::G_SADDO: 
+  case TargetOpcode::G_UADDO: 
+  case TargetOpcode::G_SSUBO: 
+  case TargetOpcode::G_USUBO: { 
+    // Emit the operation and get the correct condition code. 
     MachineIRBuilder MIRBuilder(I);
-    auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
-                                  I.getOperand(2), I.getOperand(3), MIRBuilder);
+    auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), 
+                                  I.getOperand(2), I.getOperand(3), MIRBuilder); 
 
     // Now, put the overflow result in the register given by the first operand
-    // to the overflow op. CSINC increments the result when the predicate is
-    // false, so to get the increment when it's true, we need to use the
-    // inverse. In this case, we want to increment when carry is set.
-    Register ZReg = AArch64::WZR;
+    // to the overflow op. CSINC increments the result when the predicate is 
+    // false, so to get the increment when it's true, we need to use the 
+    // inverse. In this case, we want to increment when carry is set. 
+    Register ZReg = AArch64::WZR; 
     auto CsetMI = MIRBuilder
                       .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
-                                  {ZReg, ZReg})
-                      .addImm(getInvertedCondCode(OpAndCC.second));
+                                  {ZReg, ZReg}) 
+                      .addImm(getInvertedCondCode(OpAndCC.second)); 
     constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
     I.eraseFromParent();
     return true;
@@ -2768,7 +2768,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
 
   case TargetOpcode::G_PTRMASK: {
     Register MaskReg = I.getOperand(2).getReg();
-    Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
+    Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI); 
     // TODO: Implement arbitrary cases
     if (!MaskVal || !isShiftedMask_64(*MaskVal))
       return false;
@@ -3059,15 +3059,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     if (tryOptSelect(I))
       return true;
 
-    // Make sure to use an unused vreg instead of wzr, so that the peephole
-    // optimizations will be able to optimize these.
-    MachineIRBuilder MIB(I);
-    Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
-    auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
-                     .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
-    constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
-    if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
-      return false;
+    // Make sure to use an unused vreg instead of wzr, so that the peephole 
+    // optimizations will be able to optimize these. 
+    MachineIRBuilder MIB(I); 
+    Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 
+    auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) 
+                     .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 
+    constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 
+    if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB)) 
+      return false; 
     I.eraseFromParent();
     return true;
   }
@@ -3082,21 +3082,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     }
 
     MachineIRBuilder MIRBuilder(I);
-    auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
-    emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
-                       MIRBuilder);
+    auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); 
+    emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), 
+                       MIRBuilder); 
     emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
     I.eraseFromParent();
     return true;
   }
 
   case TargetOpcode::G_FCMP: {
-    MachineIRBuilder MIRBuilder(I);
-    CmpInst::Predicate Pred =
-        static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
-    if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
-                       MIRBuilder, Pred) ||
-        !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
+    MachineIRBuilder MIRBuilder(I); 
+    CmpInst::Predicate Pred = 
+        static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); 
+    if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), 
+                       MIRBuilder, Pred) || 
+        !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder)) 
       return false;
     I.eraseFromParent();
     return true;
@@ -3136,24 +3136,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
     }
   }
-  case AArch64::G_DUP: {
-    // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
-    // imported patterns. Do it manually here. Avoiding generating s16 gpr is
-    // difficult because at RBS we may end up pessimizing the fpr case if we
-    // decided to add an anyextend to fix this. Manual selection is the most
-    // robust solution for now.
-    Register SrcReg = I.getOperand(1).getReg();
-    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
-      return false; // We expect the fpr regbank case to be imported.
-    LLT SrcTy = MRI.getType(SrcReg);
-    if (SrcTy.getSizeInBits() == 16)
-      I.setDesc(TII.get(AArch64::DUPv8i16gpr));
-    else if (SrcTy.getSizeInBits() == 8)
-      I.setDesc(TII.get(AArch64::DUPv16i8gpr));
-    else
-      return false;
-    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-  }
+  case AArch64::G_DUP: { 
+    // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by 
+    // imported patterns. Do it manually here. Avoiding generating s16 gpr is 
+    // difficult because at RBS we may end up pessimizing the fpr case if we 
+    // decided to add an anyextend to fix this. Manual selection is the most 
+    // robust solution for now. 
+    Register SrcReg = I.getOperand(1).getReg(); 
+    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID) 
+      return false; // We expect the fpr regbank case to be imported. 
+    LLT SrcTy = MRI.getType(SrcReg); 
+    if (SrcTy.getSizeInBits() == 16) 
+      I.setDesc(TII.get(AArch64::DUPv8i16gpr)); 
+    else if (SrcTy.getSizeInBits() == 8) 
+      I.setDesc(TII.get(AArch64::DUPv16i8gpr)); 
+    else 
+      return false; 
+    return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 
+  } 
   case TargetOpcode::G_INTRINSIC_TRUNC:
     return selectIntrinsicTrunc(I, MRI);
   case TargetOpcode::G_INTRINSIC_ROUND:
@@ -3174,52 +3174,52 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     return selectConcatVectors(I, MRI);
   case TargetOpcode::G_JUMP_TABLE:
     return selectJumpTable(I, MRI);
-  case TargetOpcode::G_VECREDUCE_FADD:
-  case TargetOpcode::G_VECREDUCE_ADD:
-    return selectReduction(I, MRI);
-  }
-
-  return false;
-}
-
-bool AArch64InstructionSelector::selectReduction(
-    MachineInstr &I, MachineRegisterInfo &MRI) const {
-  Register VecReg = I.getOperand(1).getReg();
-  LLT VecTy = MRI.getType(VecReg);
-  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
-    unsigned Opc = 0;
-    if (VecTy == LLT::vector(16, 8))
-      Opc = AArch64::ADDVv16i8v;
-    else if (VecTy == LLT::vector(8, 16))
-      Opc = AArch64::ADDVv8i16v;
-    else if (VecTy == LLT::vector(4, 32))
-      Opc = AArch64::ADDVv4i32v;
-    else if (VecTy == LLT::vector(2, 64))
-      Opc = AArch64::ADDPv2i64p;
-    else {
-      LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
-      return false;
-    }
-    I.setDesc(TII.get(Opc));
-    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+  case TargetOpcode::G_VECREDUCE_FADD: 
+  case TargetOpcode::G_VECREDUCE_ADD: 
+    return selectReduction(I, MRI); 
   }
 
-  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
-    unsigned Opc = 0;
-    if (VecTy == LLT::vector(2, 32))
-      Opc = AArch64::FADDPv2i32p;
-    else if (VecTy == LLT::vector(2, 64))
-      Opc = AArch64::FADDPv2i64p;
-    else {
-      LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
-      return false;
-    }
-    I.setDesc(TII.get(Opc));
-    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-  }
   return false;
 }
 
+bool AArch64InstructionSelector::selectReduction( 
+    MachineInstr &I, MachineRegisterInfo &MRI) const { 
+  Register VecReg = I.getOperand(1).getReg(); 
+  LLT VecTy = MRI.getType(VecReg); 
+  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) { 
+    unsigned Opc = 0; 
+    if (VecTy == LLT::vector(16, 8)) 
+      Opc = AArch64::ADDVv16i8v; 
+    else if (VecTy == LLT::vector(8, 16)) 
+      Opc = AArch64::ADDVv8i16v; 
+    else if (VecTy == LLT::vector(4, 32)) 
+      Opc = AArch64::ADDVv4i32v; 
+    else if (VecTy == LLT::vector(2, 64)) 
+      Opc = AArch64::ADDPv2i64p; 
+    else { 
+      LLVM_DEBUG(dbgs() << "Unhandled type for add reduction"); 
+      return false; 
+    } 
+    I.setDesc(TII.get(Opc)); 
+    return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 
+  } 
+ 
+  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) { 
+    unsigned Opc = 0; 
+    if (VecTy == LLT::vector(2, 32)) 
+      Opc = AArch64::FADDPv2i32p; 
+    else if (VecTy == LLT::vector(2, 64)) 
+      Opc = AArch64::FADDPv2i64p; 
+    else { 
+      LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction"); 
+      return false; 
+    } 
+    I.setDesc(TII.get(Opc)); 
+    return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 
+  } 
+  return false; 
+} 
+ 
 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
                                             MachineRegisterInfo &MRI) const {
   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
@@ -3230,8 +3230,8 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
 
   Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
   Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
-
-  MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
+ 
+  MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr); 
   auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
                                       {TargetReg, ScratchReg}, {JTAddr, Index})
                            .addJumpTableIndex(JTI);
@@ -3268,20 +3268,20 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
   const GlobalValue &GV = *I.getOperand(1).getGlobal();
   MachineIRBuilder MIB(I);
 
-  auto LoadGOT =
-      MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
-          .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
+  auto LoadGOT = 
+      MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {}) 
+          .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); 
 
   auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
-                             {LoadGOT.getReg(0)})
+                             {LoadGOT.getReg(0)}) 
                   .addImm(0);
 
-  MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
+  MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0)); 
   // TLS calls preserve all registers except those that absolutely must be
   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
   // silly).
   MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
-      .addUse(AArch64::X0, RegState::Implicit)
+      .addUse(AArch64::X0, RegState::Implicit) 
       .addDef(AArch64::X0, RegState::Implicit)
       .addRegMask(TRI.getTLSCallPreservedMask());
 
@@ -3767,7 +3767,7 @@ bool AArch64InstructionSelector::selectExtractElt(
   (void)WideTy;
   assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
          "source register size too small!");
-  assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
+  assert(!NarrowTy.isVector() && "cannot extract vector into vector!"); 
 
   // Need the lane index to determine the correct copy opcode.
   MachineOperand &LaneIdxOp = I.getOperand(2);
@@ -3782,7 +3782,7 @@ bool AArch64InstructionSelector::selectExtractElt(
   auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
   if (!VRegAndVal)
     return false;
-  unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
+  unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); 
 
   MachineIRBuilder MIRBuilder(I);
 
@@ -4005,10 +4005,10 @@ static std::pair<unsigned, unsigned>
 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
   unsigned Opc, SubregIdx;
   if (RB.getID() == AArch64::GPRRegBankID) {
-    if (EltSize == 16) {
-      Opc = AArch64::INSvi16gpr;
-      SubregIdx = AArch64::ssub;
-    } else if (EltSize == 32) {
+    if (EltSize == 16) { 
+      Opc = AArch64::INSvi16gpr; 
+      SubregIdx = AArch64::ssub; 
+    } else if (EltSize == 32) { 
       Opc = AArch64::INSvi32gpr;
       SubregIdx = AArch64::ssub;
     } else if (EltSize == 64) {
@@ -4037,93 +4037,93 @@ getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
   return std::make_pair(Opc, SubregIdx);
 }
 
-MachineInstr *AArch64InstructionSelector::emitInstr(
-    unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
-    std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
-    const ComplexRendererFns &RenderFns) const {
-  assert(Opcode && "Expected an opcode?");
-  assert(!isPreISelGenericOpcode(Opcode) &&
-         "Function should only be used to produce selected instructions!");
-  auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
-  if (RenderFns)
-    for (auto &Fn : *RenderFns)
-      Fn(MI);
-  constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
-  return &*MI;
-}
-
-MachineInstr *AArch64InstructionSelector::emitAddSub(
-    const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
-    Register Dst, MachineOperand &LHS, MachineOperand &RHS,
-    MachineIRBuilder &MIRBuilder) const {
-  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
-  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
-  auto Ty = MRI.getType(LHS.getReg());
-  assert(!Ty.isVector() && "Expected a scalar or pointer?");
-  unsigned Size = Ty.getSizeInBits();
-  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
-  bool Is32Bit = Size == 32;
-
-  // INSTRri form with positive arithmetic immediate.
-  if (auto Fns = selectArithImmed(RHS))
-    return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
-                     MIRBuilder, Fns);
-
-  // INSTRri form with negative arithmetic immediate.
-  if (auto Fns = selectNegArithImmed(RHS))
-    return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
-                     MIRBuilder, Fns);
-
-  // INSTRrx form.
-  if (auto Fns = selectArithExtendedRegister(RHS))
-    return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
-                     MIRBuilder, Fns);
-
-  // INSTRrs form.
-  if (auto Fns = selectShiftedRegister(RHS))
-    return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
-                     MIRBuilder, Fns);
-  return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
-                   MIRBuilder);
-}
-
+MachineInstr *AArch64InstructionSelector::emitInstr( 
+    unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, 
+    std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, 
+    const ComplexRendererFns &RenderFns) const { 
+  assert(Opcode && "Expected an opcode?"); 
+  assert(!isPreISelGenericOpcode(Opcode) && 
+         "Function should only be used to produce selected instructions!"); 
+  auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps); 
+  if (RenderFns) 
+    for (auto &Fn : *RenderFns) 
+      Fn(MI); 
+  constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); 
+  return &*MI; 
+} 
+ 
+MachineInstr *AArch64InstructionSelector::emitAddSub( 
+    const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, 
+    Register Dst, MachineOperand &LHS, MachineOperand &RHS, 
+    MachineIRBuilder &MIRBuilder) const { 
+  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 
+  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 
+  auto Ty = MRI.getType(LHS.getReg()); 
+  assert(!Ty.isVector() && "Expected a scalar or pointer?"); 
+  unsigned Size = Ty.getSizeInBits(); 
+  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"); 
+  bool Is32Bit = Size == 32; 
+ 
+  // INSTRri form with positive arithmetic immediate. 
+  if (auto Fns = selectArithImmed(RHS)) 
+    return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS}, 
+                     MIRBuilder, Fns); 
+ 
+  // INSTRri form with negative arithmetic immediate. 
+  if (auto Fns = selectNegArithImmed(RHS)) 
+    return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS}, 
+                     MIRBuilder, Fns); 
+ 
+  // INSTRrx form. 
+  if (auto Fns = selectArithExtendedRegister(RHS)) 
+    return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS}, 
+                     MIRBuilder, Fns); 
+ 
+  // INSTRrs form. 
+  if (auto Fns = selectShiftedRegister(RHS)) 
+    return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS}, 
+                     MIRBuilder, Fns); 
+  return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS}, 
+                   MIRBuilder); 
+} 
+ 
 MachineInstr *
 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
                                     MachineOperand &RHS,
                                     MachineIRBuilder &MIRBuilder) const {
-  const std::array<std::array<unsigned, 2>, 5> OpcTable{
-      {{AArch64::ADDXri, AArch64::ADDWri},
-       {AArch64::ADDXrs, AArch64::ADDWrs},
-       {AArch64::ADDXrr, AArch64::ADDWrr},
-       {AArch64::SUBXri, AArch64::SUBWri},
-       {AArch64::ADDXrx, AArch64::ADDWrx}}};
-  return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
-}
-
-MachineInstr *
-AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
-                                     MachineOperand &RHS,
-                                     MachineIRBuilder &MIRBuilder) const {
-  const std::array<std::array<unsigned, 2>, 5> OpcTable{
-      {{AArch64::ADDSXri, AArch64::ADDSWri},
-       {AArch64::ADDSXrs, AArch64::ADDSWrs},
-       {AArch64::ADDSXrr, AArch64::ADDSWrr},
-       {AArch64::SUBSXri, AArch64::SUBSWri},
-       {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
-  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
-}
-
-MachineInstr *
-AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
-                                     MachineOperand &RHS,
-                                     MachineIRBuilder &MIRBuilder) const {
-  const std::array<std::array<unsigned, 2>, 5> OpcTable{
-      {{AArch64::SUBSXri, AArch64::SUBSWri},
-       {AArch64::SUBSXrs, AArch64::SUBSWrs},
-       {AArch64::SUBSXrr, AArch64::SUBSWrr},
-       {AArch64::ADDSXri, AArch64::ADDSWri},
-       {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
-  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
+  const std::array<std::array<unsigned, 2>, 5> OpcTable{ 
+      {{AArch64::ADDXri, AArch64::ADDWri}, 
+       {AArch64::ADDXrs, AArch64::ADDWrs}, 
+       {AArch64::ADDXrr, AArch64::ADDWrr}, 
+       {AArch64::SUBXri, AArch64::SUBWri}, 
+       {AArch64::ADDXrx, AArch64::ADDWrx}}}; 
+  return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder); 
+} 
+
+MachineInstr * 
+AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, 
+                                     MachineOperand &RHS, 
+                                     MachineIRBuilder &MIRBuilder) const { 
+  const std::array<std::array<unsigned, 2>, 5> OpcTable{ 
+      {{AArch64::ADDSXri, AArch64::ADDSWri}, 
+       {AArch64::ADDSXrs, AArch64::ADDSWrs}, 
+       {AArch64::ADDSXrr, AArch64::ADDSWrr}, 
+       {AArch64::SUBSXri, AArch64::SUBSWri}, 
+       {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; 
+  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); 
+} 
+
+MachineInstr * 
+AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, 
+                                     MachineOperand &RHS, 
+                                     MachineIRBuilder &MIRBuilder) const { 
+  const std::array<std::array<unsigned, 2>, 5> OpcTable{ 
+      {{AArch64::SUBSXri, AArch64::SUBSWri}, 
+       {AArch64::SUBSXrs, AArch64::SUBSWrs}, 
+       {AArch64::SUBSXrr, AArch64::SUBSWrr}, 
+       {AArch64::ADDSXri, AArch64::ADDSWri}, 
+       {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; 
+  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); 
 }
 
 MachineInstr *
@@ -4131,129 +4131,129 @@ AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
                                     MachineIRBuilder &MIRBuilder) const {
   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
   bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
-  auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
-  return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
+  auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; 
+  return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); 
 }
 
 MachineInstr *
-AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
+AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, 
                                     MachineIRBuilder &MIRBuilder) const {
-  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 
   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
-  LLT Ty = MRI.getType(LHS.getReg());
-  unsigned RegSize = Ty.getSizeInBits();
+  LLT Ty = MRI.getType(LHS.getReg()); 
+  unsigned RegSize = Ty.getSizeInBits(); 
   bool Is32Bit = (RegSize == 32);
-  const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
-                                   {AArch64::ANDSXrs, AArch64::ANDSWrs},
-                                   {AArch64::ANDSXrr, AArch64::ANDSWrr}};
-  // ANDS needs a logical immediate for its immediate form. Check if we can
-  // fold one in.
-  if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
-    int64_t Imm = ValAndVReg->Value.getSExtValue();
-
-    if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
-      auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
-      TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
-      constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
-      return &*TstMI;
-    }
-  }
-
-  if (auto Fns = selectLogicalShiftedRegister(RHS))
-    return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
-  return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
+  const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, 
+                                   {AArch64::ANDSXrs, AArch64::ANDSWrs}, 
+                                   {AArch64::ANDSXrr, AArch64::ANDSWrr}}; 
+  // ANDS needs a logical immediate for its immediate form. Check if we can 
+  // fold one in. 
+  if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) { 
+    int64_t Imm = ValAndVReg->Value.getSExtValue(); 
+
+    if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) { 
+      auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS}); 
+      TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 
+      constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 
+      return &*TstMI; 
+    } 
+  } 
+
+  if (auto Fns = selectLogicalShiftedRegister(RHS)) 
+    return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns); 
+  return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder); 
 }
 
-MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
+MachineInstr *AArch64InstructionSelector::emitIntegerCompare( 
     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
     MachineIRBuilder &MIRBuilder) const {
   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
   assert(Predicate.isPredicate() && "Expected predicate?");
   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
-  LLT CmpTy = MRI.getType(LHS.getReg());
-  assert(!CmpTy.isVector() && "Expected scalar or pointer");
-  unsigned Size = CmpTy.getSizeInBits();
-  (void)Size;
-  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
-  // Fold the compare into a cmn or tst if possible.
-  if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
-    return FoldCmp;
-  auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
-  return emitSUBS(Dst, LHS, RHS, MIRBuilder);
-}
-
-MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
-    Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
-  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
-#ifndef NDEBUG
-  LLT Ty = MRI.getType(Dst);
-  assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
-         "Expected a 32-bit scalar register?");
-#endif
-  const Register ZeroReg = AArch64::WZR;
-  auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
-    auto CSet =
-        MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
-            .addImm(getInvertedCondCode(CC));
-    constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
-    return &*CSet;
-  };
-
-  AArch64CC::CondCode CC1, CC2;
-  changeFCMPPredToAArch64CC(Pred, CC1, CC2);
-  if (CC2 == AArch64CC::AL)
-    return EmitCSet(Dst, CC1);
-
-  const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
-  Register Def1Reg = MRI.createVirtualRegister(RC);
-  Register Def2Reg = MRI.createVirtualRegister(RC);
-  EmitCSet(Def1Reg, CC1);
-  EmitCSet(Def2Reg, CC2);
-  auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
-  constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
-  return &*OrMI;
-}
-
-MachineInstr *
-AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
-                                          MachineIRBuilder &MIRBuilder,
-                                          Optional<CmpInst::Predicate> Pred) const {
-  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
-  LLT Ty = MRI.getType(LHS);
-  if (Ty.isVector())
-    return nullptr;
-  unsigned OpSize = Ty.getSizeInBits();
-  if (OpSize != 32 && OpSize != 64)
-    return nullptr;
-
-  // If this is a compare against +0.0, then we don't have
-  // to explicitly materialize a constant.
-  const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
-  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
-
-  auto IsEqualityPred = [](CmpInst::Predicate P) {
-    return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
-           P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
-  };
-  if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
-    // Try commutating the operands.
-    const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
-    if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
-      ShouldUseImm = true;
-      std::swap(LHS, RHS);
-    }
-  }
-  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
-                              {AArch64::FCMPSri, AArch64::FCMPDri}};
-  unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
-
-  // Partially build the compare. Decide if we need to add a use for the
-  // third operand based off whether or not we're comparing against 0.0.
-  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
-  if (!ShouldUseImm)
-    CmpMI.addUse(RHS);
+  LLT CmpTy = MRI.getType(LHS.getReg()); 
+  assert(!CmpTy.isVector() && "Expected scalar or pointer"); 
+  unsigned Size = CmpTy.getSizeInBits(); 
+  (void)Size; 
+  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?"); 
+  // Fold the compare into a cmn or tst if possible. 
+  if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) 
+    return FoldCmp; 
+  auto Dst = MRI.cloneVirtualRegister(LHS.getReg()); 
+  return emitSUBS(Dst, LHS, RHS, MIRBuilder); 
+} 
+
+MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( 
+    Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { 
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 
+#ifndef NDEBUG 
+  LLT Ty = MRI.getType(Dst); 
+  assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && 
+         "Expected a 32-bit scalar register?"); 
+#endif 
+  const Register ZeroReg = AArch64::WZR; 
+  auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) { 
+    auto CSet = 
+        MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg}) 
+            .addImm(getInvertedCondCode(CC)); 
+    constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI); 
+    return &*CSet; 
+  }; 
+
+  AArch64CC::CondCode CC1, CC2; 
+  changeFCMPPredToAArch64CC(Pred, CC1, CC2); 
+  if (CC2 == AArch64CC::AL) 
+    return EmitCSet(Dst, CC1); 
+
+  const TargetRegisterClass *RC = &AArch64::GPR32RegClass; 
+  Register Def1Reg = MRI.createVirtualRegister(RC); 
+  Register Def2Reg = MRI.createVirtualRegister(RC); 
+  EmitCSet(Def1Reg, CC1); 
+  EmitCSet(Def2Reg, CC2); 
+  auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); 
+  constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); 
+  return &*OrMI; 
+} 
+
+MachineInstr * 
+AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS, 
+                                          MachineIRBuilder &MIRBuilder, 
+                                          Optional<CmpInst::Predicate> Pred) const { 
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 
+  LLT Ty = MRI.getType(LHS); 
+  if (Ty.isVector()) 
+    return nullptr; 
+  unsigned OpSize = Ty.getSizeInBits(); 
+  if (OpSize != 32 && OpSize != 64) 
+    return nullptr; 
+
+  // If this is a compare against +0.0, then we don't have 
+  // to explicitly materialize a constant. 
+  const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI); 
+  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); 
+
+  auto IsEqualityPred = [](CmpInst::Predicate P) { 
+    return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || 
+           P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; 
+  }; 
+  if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { 
+    // Try commutating the operands. 
+    const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI); 
+    if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { 
+      ShouldUseImm = true; 
+      std::swap(LHS, RHS); 
+    } 
+  } 
+  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, 
+                              {AArch64::FCMPSri, AArch64::FCMPDri}}; 
+  unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64]; 
+
+  // Partially build the compare. Decide if we need to add a use for the 
+  // third operand based off whether or not we're comparing against 0.0. 
+  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS); 
+  if (!ShouldUseImm) 
+    CmpMI.addUse(RHS); 
   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
-  return &*CmpMI;
+  return &*CmpMI; 
 }
 
 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
@@ -4363,25 +4363,25 @@ AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
   return &*I;
 }
 
-std::pair<MachineInstr *, AArch64CC::CondCode>
-AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
-                                           MachineOperand &LHS,
-                                           MachineOperand &RHS,
-                                           MachineIRBuilder &MIRBuilder) const {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("Unexpected opcode!");
-  case TargetOpcode::G_SADDO:
-    return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
-  case TargetOpcode::G_UADDO:
-    return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
-  case TargetOpcode::G_SSUBO:
-    return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
-  case TargetOpcode::G_USUBO:
-    return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
-  }
-}
-
+std::pair<MachineInstr *, AArch64CC::CondCode> 
+AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, 
+                                           MachineOperand &LHS, 
+                                           MachineOperand &RHS, 
+                                           MachineIRBuilder &MIRBuilder) const { 
+  switch (Opcode) { 
+  default: 
+    llvm_unreachable("Unexpected opcode!"); 
+  case TargetOpcode::G_SADDO: 
+    return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 
+  case TargetOpcode::G_UADDO: 
+    return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); 
+  case TargetOpcode::G_SSUBO: 
+    return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 
+  case TargetOpcode::G_USUBO: 
+    return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); 
+  } 
+} 
+ 
 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
   MachineIRBuilder MIB(I);
   MachineRegisterInfo &MRI = *MIB.getMRI();
@@ -4441,17 +4441,17 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
 
   AArch64CC::CondCode CondCode;
   if (CondOpc == TargetOpcode::G_ICMP) {
-    auto Pred =
-        static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
+    auto Pred = 
+        static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); 
     CondCode = changeICMPPredToAArch64CC(Pred);
-    emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
-                       CondDef->getOperand(1), MIB);
+    emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), 
+                       CondDef->getOperand(1), MIB); 
   } else {
     // Get the condition code for the select.
-    auto Pred =
-        static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
+    auto Pred = 
+        static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); 
     AArch64CC::CondCode CondCode2;
-    changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
+    changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2); 
 
     // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
     // instructions to emit the comparison.
@@ -4460,16 +4460,16 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
     if (CondCode2 != AArch64CC::AL)
       return false;
 
-    if (!emitFPCompare(CondDef->getOperand(2).getReg(),
-                       CondDef->getOperand(3).getReg(), MIB)) {
-      LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
+    if (!emitFPCompare(CondDef->getOperand(2).getReg(), 
+                       CondDef->getOperand(3).getReg(), MIB)) { 
+      LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); 
       return false;
-    }
+    } 
   }
 
   // Emit the select.
-  emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
-             I.getOperand(3).getReg(), CondCode, MIB);
+  emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(), 
+             I.getOperand(3).getReg(), CondCode, MIB); 
   I.eraseFromParent();
   return true;
 }
@@ -4552,15 +4552,15 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
   // Produce this if the compare is signed:
   //
   // tst x, y
-  if (!CmpInst::isUnsigned(P) && LHSDef &&
+  if (!CmpInst::isUnsigned(P) && LHSDef && 
       LHSDef->getOpcode() == TargetOpcode::G_AND) {
     // Make sure that the RHS is 0.
     auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
     if (!ValAndVReg || ValAndVReg->Value != 0)
       return nullptr;
 
-    return emitTST(LHSDef->getOperand(1),
-                   LHSDef->getOperand(2), MIRBuilder);
+    return emitTST(LHSDef->getOperand(1), 
+                   LHSDef->getOperand(2), MIRBuilder); 
   }
 
   return nullptr;
@@ -4708,7 +4708,7 @@ bool AArch64InstructionSelector::selectInsertElt(
   auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
   if (!VRegAndVal)
     return false;
-  unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
+  unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); 
 
   // Perform the lane insert.
   Register SrcReg = I.getOperand(1).getReg();
@@ -4765,9 +4765,9 @@ bool AArch64InstructionSelector::selectInsertElt(
 bool AArch64InstructionSelector::tryOptConstantBuildVec(
     MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
-  unsigned DstSize = DstTy.getSizeInBits();
-  assert(DstSize <= 128 && "Unexpected build_vec type!");
-  if (DstSize < 32)
+  unsigned DstSize = DstTy.getSizeInBits(); 
+  assert(DstSize <= 128 && "Unexpected build_vec type!"); 
+  if (DstSize < 32) 
     return false;
   // Check if we're building a constant vector, in which case we want to
   // generate a constant pool load instead of a vector insert sequence.
@@ -4788,24 +4788,24 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
   }
   Constant *CV = ConstantVector::get(Csts);
   MachineIRBuilder MIB(I);
-  if (CV->isNullValue()) {
-    // Until the importer can support immAllZerosV in pattern leaf nodes,
-    // select a zero move manually here.
-    Register DstReg = I.getOperand(0).getReg();
-    if (DstSize == 128) {
-      auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
-      I.eraseFromParent();
-      return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-    } else if (DstSize == 64) {
-      auto Mov =
-          MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
-              .addImm(0);
-      MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
-          .addReg(Mov.getReg(0), 0, AArch64::dsub);
-      I.eraseFromParent();
-      return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
-    }
-  }
+  if (CV->isNullValue()) { 
+    // Until the importer can support immAllZerosV in pattern leaf nodes, 
+    // select a zero move manually here. 
+    Register DstReg = I.getOperand(0).getReg(); 
+    if (DstSize == 128) { 
+      auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0); 
+      I.eraseFromParent(); 
+      return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); 
+    } else if (DstSize == 64) { 
+      auto Mov = 
+          MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) 
+              .addImm(0); 
+      MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 
+          .addReg(Mov.getReg(0), 0, AArch64::dsub); 
+      I.eraseFromParent(); 
+      return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI); 
+    } 
+  } 
   auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
   if (!CPLoad) {
     LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
@@ -4927,10 +4927,10 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
   case Intrinsic::debugtrap:
     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
     break;
-  case Intrinsic::ubsantrap:
-    MIRBuilder.buildInstr(AArch64::BRK, {}, {})
-        .addImm(I.getOperand(1).getImm() | ('U' << 8));
-    break;
+  case Intrinsic::ubsantrap: 
+    MIRBuilder.buildInstr(AArch64::BRK, {}, {}) 
+        .addImm(I.getOperand(1).getImm() | ('U' << 8)); 
+    break; 
   }
 
   I.eraseFromParent();
@@ -4996,22 +4996,22 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
     RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
 
     if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
-      if (!MFReturnAddr) {
-        // Insert the copy from LR/X30 into the entry block, before it can be
-        // clobbered by anything.
-        MFI.setReturnAddressIsTaken(true);
-        MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
-                                                AArch64::GPR64RegClass);
-      }
-
-      if (STI.hasPAuth()) {
-        MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
-      } else {
-        MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
-        MIRBuilder.buildInstr(AArch64::XPACLRI);
-        MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+      if (!MFReturnAddr) { 
+        // Insert the copy from LR/X30 into the entry block, before it can be 
+        // clobbered by anything. 
+        MFI.setReturnAddressIsTaken(true); 
+        MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR, 
+                                                AArch64::GPR64RegClass); 
       }
-
+ 
+      if (STI.hasPAuth()) { 
+        MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr}); 
+      } else { 
+        MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr}); 
+        MIRBuilder.buildInstr(AArch64::XPACLRI); 
+        MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); 
+      } 
+ 
       I.eraseFromParent();
       return true;
     }
@@ -5031,16 +5031,16 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
       MIRBuilder.buildCopy({DstReg}, {FrameAddr});
     else {
       MFI.setReturnAddressIsTaken(true);
-
-      if (STI.hasPAuth()) {
-        Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
-        MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
-        MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
-      } else {
-        MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
-        MIRBuilder.buildInstr(AArch64::XPACLRI);
-        MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
-      }
+ 
+      if (STI.hasPAuth()) { 
+        Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 
+        MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); 
+        MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); 
+      } else { 
+        MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1); 
+        MIRBuilder.buildInstr(AArch64::XPACLRI); 
+        MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); 
+      } 
     }
 
     I.eraseFromParent();
@@ -5248,7 +5248,7 @@ AArch64InstructionSelector::selectExtendedSHL(
 
   // The value must fit into 3 bits, and must be positive. Make sure that is
   // true.
-  int64_t ImmVal = ValAndVReg->Value.getSExtValue();
+  int64_t ImmVal = ValAndVReg->Value.getSExtValue(); 
 
   // Since we're going to pull this into a shift, the constant value must be
   // a power of 2. If we got a multiply, then we need to check this.
@@ -5388,60 +5388,60 @@ InstructionSelector::ComplexRendererFns
 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
                                               unsigned SizeInBytes) const {
   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
-  if (!Root.isReg())
-    return None;
-  MachineInstr *PtrAdd =
-      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
-  if (!PtrAdd)
+  if (!Root.isReg()) 
     return None;
-
-  // Check for an immediates which cannot be encoded in the [base + imm]
-  // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
-  // end up with code like:
-  //
-  // mov x0, wide
-  // add x1 base, x0
-  // ldr x2, [x1, x0]
-  //
-  // In this situation, we can use the [base, xreg] addressing mode to save an
-  // add/sub:
-  //
-  // mov x0, wide
-  // ldr x2, [base, x0]
-  auto ValAndVReg =
-      getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
-  if (ValAndVReg) {
-    unsigned Scale = Log2_32(SizeInBytes);
-    int64_t ImmOff = ValAndVReg->Value.getSExtValue();
-
-    // Skip immediates that can be selected in the load/store addresing
-    // mode.
-    if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
-        ImmOff < (0x1000 << Scale))
-      return None;
-
-    // Helper lambda to decide whether or not it is preferable to emit an add.
-    auto isPreferredADD = [](int64_t ImmOff) {
-      // Constants in [0x0, 0xfff] can be encoded in an add.
-      if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
-        return true;
-
-      // Can it be encoded in an add lsl #12?
-      if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
-        return false;
-
-      // It can be encoded in an add lsl #12, but we may not want to. If it is
-      // possible to select this as a single movz, then prefer that. A single
-      // movz is faster than an add with a shift.
-      return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
-             (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
-    };
-
-    // If the immediate can be encoded in a single add/sub, then bail out.
-    if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
-      return None;
-  }
-
+  MachineInstr *PtrAdd = 
+      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 
+  if (!PtrAdd) 
+    return None; 
+
+  // Check for an immediates which cannot be encoded in the [base + imm] 
+  // addressing mode, and can't be encoded in an add/sub. If this happens, we'll 
+  // end up with code like: 
+  // 
+  // mov x0, wide 
+  // add x1 base, x0 
+  // ldr x2, [x1, x0] 
+  // 
+  // In this situation, we can use the [base, xreg] addressing mode to save an 
+  // add/sub: 
+  // 
+  // mov x0, wide 
+  // ldr x2, [base, x0] 
+  auto ValAndVReg = 
+      getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI); 
+  if (ValAndVReg) { 
+    unsigned Scale = Log2_32(SizeInBytes); 
+    int64_t ImmOff = ValAndVReg->Value.getSExtValue(); 
+ 
+    // Skip immediates that can be selected in the load/store addresing 
+    // mode. 
+    if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && 
+        ImmOff < (0x1000 << Scale)) 
+      return None; 
+ 
+    // Helper lambda to decide whether or not it is preferable to emit an add. 
+    auto isPreferredADD = [](int64_t ImmOff) { 
+      // Constants in [0x0, 0xfff] can be encoded in an add. 
+      if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 
+        return true; 
+ 
+      // Can it be encoded in an add lsl #12? 
+      if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) 
+        return false; 
+ 
+      // It can be encoded in an add lsl #12, but we may not want to. If it is 
+      // possible to select this as a single movz, then prefer that. A single 
+      // movz is faster than an add with a shift. 
+      return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 
+             (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 
+    }; 
+ 
+    // If the immediate can be encoded in a single add/sub, then bail out. 
+    if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 
+      return None; 
+  } 
+ 
   // Try to fold shifts into the addressing mode.
   auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
   if (AddrModeFns)
@@ -5871,8 +5871,8 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
          "Expected G_CONSTANT");
-  Optional<int64_t> CstVal =
-      getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
+  Optional<int64_t> CstVal = 
+      getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI); 
   assert(CstVal && "Expected constant value");
   MIB.addImm(CstVal.getValue());
 }
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 5a6c904e3f..af24267bf2 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -14,7 +14,7 @@
 #include "AArch64LegalizerInfo.h"
 #include "AArch64Subtarget.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
-#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -23,8 +23,8 @@
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Type.h"
-#include <initializer_list>
-#include "llvm/Support/MathExtras.h"
+#include <initializer_list> 
+#include "llvm/Support/MathExtras.h" 
 
 #define DEBUG_TYPE "aarch64-legalinfo"
 
@@ -56,13 +56,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   const LLT v2s64 = LLT::vector(2, 64);
   const LLT v2p0 = LLT::vector(2, p0);
 
-  std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
-                                                        v16s8, v8s16, v4s32,
-                                                        v2s64, v2p0,
-                                                        /* End 128bit types */
-                                                        /* Begin 64bit types */
-                                                        v8s8, v4s16, v2s32};
-
+  std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */ 
+                                                        v16s8, v8s16, v4s32, 
+                                                        v2s64, v2p0, 
+                                                        /* End 128bit types */ 
+                                                        /* Begin 64bit types */ 
+                                                        v8s8, v4s16, v2s32}; 
+ 
   const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
 
   // FIXME: support subtargets which have neon/fp-armv8 disabled.
@@ -71,31 +71,31 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
     return;
   }
 
-  // Some instructions only support s16 if the subtarget has full 16-bit FP
-  // support.
-  const bool HasFP16 = ST.hasFullFP16();
-  const LLT &MinFPScalar = HasFP16 ? s16 : s32;
-
+  // Some instructions only support s16 if the subtarget has full 16-bit FP 
+  // support. 
+  const bool HasFP16 = ST.hasFullFP16(); 
+  const LLT &MinFPScalar = HasFP16 ? s16 : s32; 
+ 
   getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
-      .legalFor({p0, s1, s8, s16, s32, s64})
-      .legalFor(PackedVectorAllTypeList)
-      .clampScalar(0, s1, s64)
-      .widenScalarToNextPow2(0, 8)
-      .fewerElementsIf(
-          [=](const LegalityQuery &Query) {
-            return Query.Types[0].isVector() &&
-                   (Query.Types[0].getElementType() != s64 ||
-                    Query.Types[0].getNumElements() != 2);
-          },
-          [=](const LegalityQuery &Query) {
-            LLT EltTy = Query.Types[0].getElementType();
-            if (EltTy == s64)
-              return std::make_pair(0, LLT::vector(2, 64));
-            return std::make_pair(0, EltTy);
-          });
-
-  getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64})
-      .legalFor(PackedVectorAllTypeList)
+      .legalFor({p0, s1, s8, s16, s32, s64}) 
+      .legalFor(PackedVectorAllTypeList) 
+      .clampScalar(0, s1, s64) 
+      .widenScalarToNextPow2(0, 8) 
+      .fewerElementsIf( 
+          [=](const LegalityQuery &Query) { 
+            return Query.Types[0].isVector() && 
+                   (Query.Types[0].getElementType() != s64 || 
+                    Query.Types[0].getNumElements() != 2); 
+          }, 
+          [=](const LegalityQuery &Query) { 
+            LLT EltTy = Query.Types[0].getElementType(); 
+            if (EltTy == s64) 
+              return std::make_pair(0, LLT::vector(2, 64)); 
+            return std::make_pair(0, EltTy); 
+          }); 
+
+  getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64}) 
+      .legalFor(PackedVectorAllTypeList) 
       .clampScalar(0, s16, s64)
       .widenScalarToNextPow2(0);
 
@@ -105,38 +105,38 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .widenScalarToNextPow2(0);
 
   getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
-      .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
-      .scalarizeIf(
-          [=](const LegalityQuery &Query) {
-            return Query.Opcode == G_MUL && Query.Types[0] == v2s64;
-          },
-          0)
-      .legalFor({v2s64})
+      .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8}) 
+      .scalarizeIf( 
+          [=](const LegalityQuery &Query) { 
+            return Query.Opcode == G_MUL && Query.Types[0] == v2s64; 
+          }, 
+          0) 
+      .legalFor({v2s64}) 
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0)
       .clampNumElements(0, v2s32, v4s32)
       .clampNumElements(0, v2s64, v2s64)
       .moreElementsToNextPow2(0);
 
-  getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
+  getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR}) 
       .customIf([=](const LegalityQuery &Query) {
         const auto &SrcTy = Query.Types[0];
         const auto &AmtTy = Query.Types[1];
         return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
                AmtTy.getSizeInBits() == 32;
       })
-      .legalFor({
-          {s32, s32},
-          {s32, s64},
-          {s64, s64},
-          {v8s8, v8s8},
-          {v16s8, v16s8},
-          {v4s16, v4s16},
-          {v8s16, v8s16},
-          {v2s32, v2s32},
-          {v4s32, v4s32},
-          {v2s64, v2s64},
-      })
+      .legalFor({ 
+          {s32, s32}, 
+          {s32, s64}, 
+          {s64, s64}, 
+          {v8s8, v8s8}, 
+          {v16s8, v16s8}, 
+          {v4s16, v4s16}, 
+          {v8s16, v8s16}, 
+          {v2s32, v2s32}, 
+          {v4s32, v4s32}, 
+          {v2s64, v2s64}, 
+      }) 
       .clampScalar(1, s32, s64)
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0)
@@ -161,25 +161,25 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   getActionDefinitionsBuilder({G_SREM, G_UREM})
       .lowerFor({s1, s8, s16, s32, s64});
 
-  getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}});
+  getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}}); 
 
   getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
 
-  getActionDefinitionsBuilder(
-      {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
+  getActionDefinitionsBuilder( 
+      {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO}) 
       .legalFor({{s32, s1}, {s64, s1}})
       .minScalar(0, s32);
 
   getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
-      .legalFor({s32, s64, v2s64, v4s32, v2s32})
-      .clampNumElements(0, v2s32, v4s32)
-      .clampNumElements(0, v2s64, v2s64);
+      .legalFor({s32, s64, v2s64, v4s32, v2s32}) 
+      .clampNumElements(0, v2s32, v4s32) 
+      .clampNumElements(0, v2s64, v2s64); 
 
   getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
 
   getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
                                G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
-                               G_FNEARBYINT, G_INTRINSIC_LRINT})
+                               G_FNEARBYINT, G_INTRINSIC_LRINT}) 
       // If we don't have full FP16 support, then scalarize the elements of
       // vectors containing fp16 types.
       .fewerElementsIf(
@@ -285,7 +285,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
                                  {v4s32, p0, 128, 8},
                                  {v2s64, p0, 128, 8}})
       // These extends are also legal
-      .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}})
+      .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}}) 
       .clampScalar(0, s8, s64)
       .lowerIfMemSizeNotPow2()
       // Lower any any-extending loads left into G_ANYEXT and G_LOAD
@@ -307,7 +307,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
                                  {p0, p0, 64, 8},
                                  {s128, p0, 128, 8},
                                  {v16s8, p0, 128, 8},
-                                 {v8s8, p0, 64, 8},
+                                 {v8s8, p0, 64, 8}, 
                                  {v4s16, p0, 64, 8},
                                  {v8s16, p0, 128, 8},
                                  {v2s32, p0, 64, 8},
@@ -325,19 +325,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   // Constants
   getActionDefinitionsBuilder(G_CONSTANT)
-      .legalFor({p0, s8, s16, s32, s64})
+      .legalFor({p0, s8, s16, s32, s64}) 
       .clampScalar(0, s8, s64)
       .widenScalarToNextPow2(0);
   getActionDefinitionsBuilder(G_FCONSTANT)
-      .legalIf([=](const LegalityQuery &Query) {
-        const auto &Ty = Query.Types[0];
-        if (HasFP16 && Ty == s16)
-          return true;
-        return Ty == s32 || Ty == s64 || Ty == s128;
-      })
-      .clampScalar(0, MinFPScalar, s128);
-
-  getActionDefinitionsBuilder({G_ICMP, G_FCMP})
+      .legalIf([=](const LegalityQuery &Query) { 
+        const auto &Ty = Query.Types[0]; 
+        if (HasFP16 && Ty == s16) 
+          return true; 
+        return Ty == s32 || Ty == s64 || Ty == s128; 
+      }) 
+      .clampScalar(0, MinFPScalar, s128); 
+
+  getActionDefinitionsBuilder({G_ICMP, G_FCMP}) 
       .legalFor({{s32, s32},
                  {s32, s64},
                  {s32, p0},
@@ -365,8 +365,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .minScalarOrEltIf(
           [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
           s64)
-      .widenScalarOrEltToNextPow2(1)
-      .clampNumElements(0, v2s32, v4s32);
+      .widenScalarOrEltToNextPow2(1) 
+      .clampNumElements(0, v2s32, v4s32); 
 
   // Extensions
   auto ExtLegalFunc = [=](const LegalityQuery &Query) {
@@ -374,7 +374,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
     if (DstSize == 128 && !Query.Types[0].isVector())
       return false; // Extending to a scalar s128 needs narrowing.
-
+ 
     // Make sure that we have something that will fit in a register, and
     // make sure it's a power of 2.
     if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
@@ -399,28 +399,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .legalIf(ExtLegalFunc)
       .clampScalar(0, s64, s64); // Just for s128, others are handled above.
 
-  getActionDefinitionsBuilder(G_TRUNC)
-      .minScalarOrEltIf(
-          [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
-          0, s8)
-      .customIf([=](const LegalityQuery &Query) {
-        LLT DstTy = Query.Types[0];
-        LLT SrcTy = Query.Types[1];
-        return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
-      })
-      .alwaysLegal();
+  getActionDefinitionsBuilder(G_TRUNC) 
+      .minScalarOrEltIf( 
+          [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); }, 
+          0, s8) 
+      .customIf([=](const LegalityQuery &Query) { 
+        LLT DstTy = Query.Types[0]; 
+        LLT SrcTy = Query.Types[1]; 
+        return DstTy == v8s8 && SrcTy.getSizeInBits() > 128; 
+      }) 
+      .alwaysLegal(); 
 
-  getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower();
+  getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower(); 
 
   // FP conversions
-  getActionDefinitionsBuilder(G_FPTRUNC)
-      .legalFor(
-          {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
-      .clampMaxNumElements(0, s32, 2);
-  getActionDefinitionsBuilder(G_FPEXT)
-      .legalFor(
-          {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
-      .clampMaxNumElements(0, s64, 2);
+  getActionDefinitionsBuilder(G_FPTRUNC) 
+      .legalFor( 
+          {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}) 
+      .clampMaxNumElements(0, s32, 2); 
+  getActionDefinitionsBuilder(G_FPEXT) 
+      .legalFor( 
+          {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}) 
+      .clampMaxNumElements(0, s64, 2); 
 
   // Conversions
   getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
@@ -433,7 +433,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
       .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
       .clampScalar(1, s32, s64)
-      .minScalarSameAs(1, 0)
+      .minScalarSameAs(1, 0) 
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0);
 
@@ -445,8 +445,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0)
-      .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)
-      .lowerIf(isVector(0));
+      .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0) 
+      .lowerIf(isVector(0)); 
 
   // Pointer-handling
   getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
@@ -576,8 +576,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
           return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
         })
         // Any vectors left are the wrong size. Scalarize them.
-        .scalarize(0)
-        .scalarize(1);
+        .scalarize(0) 
+        .scalarize(1); 
   }
 
   getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
@@ -589,40 +589,40 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .legalIf([=](const LegalityQuery &Query) {
         const LLT &VecTy = Query.Types[1];
         return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
-               VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
-               VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0;
-      })
-      .minScalarOrEltIf(
-          [=](const LegalityQuery &Query) {
-            // We want to promote to <M x s1> to <M x s64> if that wouldn't
-            // cause the total vec size to be > 128b.
-            return Query.Types[1].getNumElements() <= 2;
-          },
-          0, s64)
-      .minScalarOrEltIf(
-          [=](const LegalityQuery &Query) {
-            return Query.Types[1].getNumElements() <= 4;
-          },
-          0, s32)
-      .minScalarOrEltIf(
-          [=](const LegalityQuery &Query) {
-            return Query.Types[1].getNumElements() <= 8;
-          },
-          0, s16)
-      .minScalarOrEltIf(
-          [=](const LegalityQuery &Query) {
-            return Query.Types[1].getNumElements() <= 16;
-          },
-          0, s8)
-      .minScalarOrElt(0, s8); // Worst case, we need at least s8.
+               VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 || 
+               VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0; 
+      }) 
+      .minScalarOrEltIf( 
+          [=](const LegalityQuery &Query) { 
+            // We want to promote to <M x s1> to <M x s64> if that wouldn't 
+            // cause the total vec size to be > 128b. 
+            return Query.Types[1].getNumElements() <= 2; 
+          }, 
+          0, s64) 
+      .minScalarOrEltIf( 
+          [=](const LegalityQuery &Query) { 
+            return Query.Types[1].getNumElements() <= 4; 
+          }, 
+          0, s32) 
+      .minScalarOrEltIf( 
+          [=](const LegalityQuery &Query) { 
+            return Query.Types[1].getNumElements() <= 8; 
+          }, 
+          0, s16) 
+      .minScalarOrEltIf( 
+          [=](const LegalityQuery &Query) { 
+            return Query.Types[1].getNumElements() <= 16; 
+          }, 
+          0, s8) 
+      .minScalarOrElt(0, s8); // Worst case, we need at least s8. 
 
   getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
-      .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64}));
+      .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64})); 
 
   getActionDefinitionsBuilder(G_BUILD_VECTOR)
-      .legalFor({{v8s8, s8},
-                 {v16s8, s8},
-                 {v4s16, s16},
+      .legalFor({{v8s8, s8}, 
+                 {v16s8, s8}, 
+                 {v4s16, s16}, 
                  {v8s16, s16},
                  {v2s32, s32},
                  {v4s32, s32},
@@ -638,9 +638,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       })
       .minScalarSameAs(1, 0);
 
-  getActionDefinitionsBuilder(G_CTLZ)
-      .legalForCartesianProduct(
-          {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
+  getActionDefinitionsBuilder(G_CTLZ) 
+      .legalForCartesianProduct( 
+          {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) 
       .scalarize(1);
 
   getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
@@ -651,7 +651,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
         // to be the same size as the dest.
         if (DstTy != SrcTy)
           return false;
-        for (auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) {
+        for (auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) { 
           if (DstTy == Ty)
             return true;
         }
@@ -668,7 +668,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   getActionDefinitionsBuilder(G_CONCAT_VECTORS)
       .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
 
-  getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}});
+  getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}}); 
 
   getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
     return Query.Types[0] == p0 && Query.Types[1] == s64;
@@ -676,20 +676,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
 
-  getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
-
-  getActionDefinitionsBuilder(G_ABS).lowerIf(
-      [=](const LegalityQuery &Query) { return Query.Types[0].isScalar(); });
-
-  getActionDefinitionsBuilder(G_VECREDUCE_FADD)
-      // We only have FADDP to do reduction-like operations. Lower the rest.
-      .legalFor({{s32, v2s32}, {s64, v2s64}})
-      .lower();
-
-  getActionDefinitionsBuilder(G_VECREDUCE_ADD)
-      .legalFor({{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s64, v2s64}})
-      .lower();
-
+  getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); 
+ 
+  getActionDefinitionsBuilder(G_ABS).lowerIf( 
+      [=](const LegalityQuery &Query) { return Query.Types[0].isScalar(); }); 
+ 
+  getActionDefinitionsBuilder(G_VECREDUCE_FADD) 
+      // We only have FADDP to do reduction-like operations. Lower the rest. 
+      .legalFor({{s32, v2s32}, {s64, v2s64}}) 
+      .lower(); 
+ 
+  getActionDefinitionsBuilder(G_VECREDUCE_ADD) 
+      .legalFor({{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s64, v2s64}}) 
+      .lower(); 
+ 
   computeTables();
   verify(*ST.getInstrInfo());
 }
@@ -714,63 +714,63 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
     return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
   case TargetOpcode::G_GLOBAL_VALUE:
     return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
-  case TargetOpcode::G_TRUNC:
-    return legalizeVectorTrunc(MI, Helper);
+  case TargetOpcode::G_TRUNC: 
+    return legalizeVectorTrunc(MI, Helper); 
   }
 
   llvm_unreachable("expected switch to return");
 }
 
-static void extractParts(Register Reg, MachineRegisterInfo &MRI,
-                         MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
-                         SmallVectorImpl<Register> &VRegs) {
-  for (int I = 0; I < NumParts; ++I)
-    VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
-  MIRBuilder.buildUnmerge(VRegs, Reg);
-}
-
-bool AArch64LegalizerInfo::legalizeVectorTrunc(
-    MachineInstr &MI, LegalizerHelper &Helper) const {
-  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
-  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
-  // Similar to how operand splitting is done in SelectiondDAG, we can handle
-  // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
-  //   %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
-  //   %lo16(<4 x s16>) = G_TRUNC %inlo
-  //   %hi16(<4 x s16>) = G_TRUNC %inhi
-  //   %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
-  //   %res(<8 x s8>) = G_TRUNC %in16
-
-  Register DstReg = MI.getOperand(0).getReg();
-  Register SrcReg = MI.getOperand(1).getReg();
-  LLT DstTy = MRI.getType(DstReg);
-  LLT SrcTy = MRI.getType(SrcReg);
-  assert(isPowerOf2_32(DstTy.getSizeInBits()) &&
-         isPowerOf2_32(SrcTy.getSizeInBits()));
-
-  // Split input type.
-  LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2);
-  // First, split the source into two smaller vectors.
-  SmallVector<Register, 2> SplitSrcs;
-  extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
-
-  // Truncate the splits into intermediate narrower elements.
-  LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
-  for (unsigned I = 0; I < SplitSrcs.size(); ++I)
-    SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
-
-  auto Concat = MIRBuilder.buildConcatVectors(
-      DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
-
-  Helper.Observer.changingInstr(MI);
-  MI.getOperand(1).setReg(Concat.getReg(0));
-  Helper.Observer.changedInstr(MI);
-  return true;
-}
-
-bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
-    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
-    GISelChangeObserver &Observer) const {
+static void extractParts(Register Reg, MachineRegisterInfo &MRI, 
+                         MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts, 
+                         SmallVectorImpl<Register> &VRegs) { 
+  for (int I = 0; I < NumParts; ++I) 
+    VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 
+  MIRBuilder.buildUnmerge(VRegs, Reg); 
+} 
+ 
+bool AArch64LegalizerInfo::legalizeVectorTrunc( 
+    MachineInstr &MI, LegalizerHelper &Helper) const { 
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 
+  // Similar to how operand splitting is done in SelectiondDAG, we can handle 
+  // %res(v8s8) = G_TRUNC %in(v8s32) by generating: 
+  //   %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>) 
+  //   %lo16(<4 x s16>) = G_TRUNC %inlo 
+  //   %hi16(<4 x s16>) = G_TRUNC %inhi 
+  //   %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16 
+  //   %res(<8 x s8>) = G_TRUNC %in16 
+ 
+  Register DstReg = MI.getOperand(0).getReg(); 
+  Register SrcReg = MI.getOperand(1).getReg(); 
+  LLT DstTy = MRI.getType(DstReg); 
+  LLT SrcTy = MRI.getType(SrcReg); 
+  assert(isPowerOf2_32(DstTy.getSizeInBits()) && 
+         isPowerOf2_32(SrcTy.getSizeInBits())); 
+ 
+  // Split input type. 
+  LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2); 
+  // First, split the source into two smaller vectors. 
+  SmallVector<Register, 2> SplitSrcs; 
+  extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs); 
+ 
+  // Truncate the splits into intermediate narrower elements. 
+  LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2); 
+  for (unsigned I = 0; I < SplitSrcs.size(); ++I) 
+    SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0); 
+ 
+  auto Concat = MIRBuilder.buildConcatVectors( 
+      DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs); 
+ 
+  Helper.Observer.changingInstr(MI); 
+  MI.getOperand(1).setReg(Concat.getReg(0)); 
+  Helper.Observer.changedInstr(MI); 
+  return true; 
+} 
+ 
+bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( 
+    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 
+    GISelChangeObserver &Observer) const { 
   assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
   // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
   // G_ADD_LOW instructions.
@@ -792,27 +792,27 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
   // Set the regclass on the dest reg too.
   MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
 
-  // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
-  // by creating a MOVK that sets bits 48-63 of the register to (global address
-  // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
-  // prevent an incorrect tag being generated during relocation when the the
-  // global appears before the code section. Without the offset, a global at
-  // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
-  // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
-  // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
-  // instead of `0xf`.
-  // This assumes that we're in the small code model so we can assume a binary
-  // size of <= 4GB, which makes the untagged PC relative offset positive. The
-  // binary must also be loaded into address range [0, 2^48). Both of these
-  // properties need to be ensured at runtime when using tagged addresses.
-  if (OpFlags & AArch64II::MO_TAGGED) {
-    ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
-               .addGlobalAddress(GV, 0x100000000,
-                                 AArch64II::MO_PREL | AArch64II::MO_G3)
-               .addImm(48);
-    MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
-  }
-
+  // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so 
+  // by creating a MOVK that sets bits 48-63 of the register to (global address 
+  // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to 
+  // prevent an incorrect tag being generated during relocation when the the 
+  // global appears before the code section. Without the offset, a global at 
+  // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced 
+  // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 = 
+  // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe` 
+  // instead of `0xf`. 
+  // This assumes that we're in the small code model so we can assume a binary 
+  // size of <= 4GB, which makes the untagged PC relative offset positive. The 
+  // binary must also be loaded into address range [0, 2^48). Both of these 
+  // properties need to be ensured at runtime when using tagged addresses. 
+  if (OpFlags & AArch64II::MO_TAGGED) { 
+    ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP}) 
+               .addGlobalAddress(GV, 0x100000000, 
+                                 AArch64II::MO_PREL | AArch64II::MO_G3) 
+               .addImm(48); 
+    MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); 
+  } 
+ 
   MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
       .addGlobalAddress(GV, 0,
                         OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
@@ -820,8 +820,8 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
   return true;
 }
 
-bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
-                                             MachineInstr &MI) const {
+bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, 
+                                             MachineInstr &MI) const { 
   return true;
 }
 
@@ -838,13 +838,13 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr(
   if (!VRegAndVal)
     return true;
   // Check the shift amount is in range for an immediate form.
-  int64_t Amount = VRegAndVal->Value.getSExtValue();
+  int64_t Amount = VRegAndVal->Value.getSExtValue(); 
   if (Amount > 31)
     return true; // This will have to remain a register variant.
   auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
-  Observer.changingInstr(MI);
+  Observer.changingInstr(MI); 
   MI.getOperand(2).setReg(ExtCst.getReg(0));
-  Observer.changedInstr(MI);
+  Observer.changedInstr(MI); 
   return true;
 }
 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 8217e37c85..c22cb26608 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -15,7 +15,7 @@
 #define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
 
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
-#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
 
 namespace llvm {
@@ -46,7 +46,7 @@ private:
   bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
                                   MachineIRBuilder &MIRBuilder,
                                   GISelChangeObserver &Observer) const;
-  bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
+  bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const; 
   const AArch64Subtarget *ST;
 };
 } // End llvm namespace.
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index fdd04cb77f..bf3190ce93 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -1,22 +1,22 @@
-//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===//
+//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===// 
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-///
-/// \file
-/// Post-legalization combines on generic MachineInstrs.
-///
-/// The combines here must preserve instruction legality.
-///
-/// Lowering combines (e.g. pseudo matching) should be handled by
-/// AArch64PostLegalizerLowering.
-///
-/// Combines which don't rely on instruction legality should go in the
-/// AArch64PreLegalizerCombiner.
-///
+/// 
+/// \file 
+/// Post-legalization combines on generic MachineInstrs. 
+/// 
+/// The combines here must preserve instruction legality. 
+/// 
+/// Lowering combines (e.g. pseudo matching) should be handled by 
+/// AArch64PostLegalizerLowering. 
+/// 
+/// Combines which don't rely on instruction legality should go in the 
+/// AArch64PreLegalizerCombiner. 
+/// 
 //===----------------------------------------------------------------------===//
 
 #include "AArch64TargetMachine.h"
@@ -24,12 +24,12 @@
 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 
+#include "llvm/CodeGen/GlobalISel/Utils.h" 
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h" 
+#include "llvm/CodeGen/TargetOpcodes.h" 
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/Support/Debug.h"
 
@@ -37,202 +37,202 @@
 
 using namespace llvm;
 
-/// This combine tries do what performExtractVectorEltCombine does in SDAG.
-/// Rewrite for pairwise fadd pattern
-///   (s32 (g_extract_vector_elt
-///           (g_fadd (vXs32 Other)
-///                  (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
-/// ->
-///   (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
-///              (g_extract_vector_elt (vXs32 Other) 1))
-bool matchExtractVecEltPairwiseAdd(
-    MachineInstr &MI, MachineRegisterInfo &MRI,
-    std::tuple<unsigned, LLT, Register> &MatchInfo) {
-  Register Src1 = MI.getOperand(1).getReg();
-  Register Src2 = MI.getOperand(2).getReg();
-  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
-
-  auto Cst = getConstantVRegValWithLookThrough(Src2, MRI);
-  if (!Cst || Cst->Value != 0)
+/// This combine tries do what performExtractVectorEltCombine does in SDAG. 
+/// Rewrite for pairwise fadd pattern 
+///   (s32 (g_extract_vector_elt 
+///           (g_fadd (vXs32 Other) 
+///                  (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0)) 
+/// -> 
+///   (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0) 
+///              (g_extract_vector_elt (vXs32 Other) 1)) 
+bool matchExtractVecEltPairwiseAdd( 
+    MachineInstr &MI, MachineRegisterInfo &MRI, 
+    std::tuple<unsigned, LLT, Register> &MatchInfo) { 
+  Register Src1 = MI.getOperand(1).getReg(); 
+  Register Src2 = MI.getOperand(2).getReg(); 
+  LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 
+
+  auto Cst = getConstantVRegValWithLookThrough(Src2, MRI); 
+  if (!Cst || Cst->Value != 0) 
     return false;
-  // SDAG also checks for FullFP16, but this looks to be beneficial anyway.
+  // SDAG also checks for FullFP16, but this looks to be beneficial anyway. 
 
-  // Now check for an fadd operation. TODO: expand this for integer add?
-  auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);
-  if (!FAddMI)
+  // Now check for an fadd operation. TODO: expand this for integer add? 
+  auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI); 
+  if (!FAddMI) 
     return false;
 
-  // If we add support for integer add, must restrict these types to just s64.
-  unsigned DstSize = DstTy.getSizeInBits();
-  if (DstSize != 16 && DstSize != 32 && DstSize != 64)
+  // If we add support for integer add, must restrict these types to just s64. 
+  unsigned DstSize = DstTy.getSizeInBits(); 
+  if (DstSize != 16 && DstSize != 32 && DstSize != 64) 
     return false;
 
-  Register Src1Op1 = FAddMI->getOperand(1).getReg();
-  Register Src1Op2 = FAddMI->getOperand(2).getReg();
-  MachineInstr *Shuffle =
-      getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);
-  MachineInstr *Other = MRI.getVRegDef(Src1Op1);
-  if (!Shuffle) {
-    Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);
-    Other = MRI.getVRegDef(Src1Op2);
+  Register Src1Op1 = FAddMI->getOperand(1).getReg(); 
+  Register Src1Op2 = FAddMI->getOperand(2).getReg(); 
+  MachineInstr *Shuffle = 
+      getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI); 
+  MachineInstr *Other = MRI.getVRegDef(Src1Op1); 
+  if (!Shuffle) { 
+    Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI); 
+    Other = MRI.getVRegDef(Src1Op2); 
   }
 
-  // We're looking for a shuffle that moves the second element to index 0.
-  if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&
-      Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {
-    std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
-    std::get<1>(MatchInfo) = DstTy;
-    std::get<2>(MatchInfo) = Other->getOperand(0).getReg();
+  // We're looking for a shuffle that moves the second element to index 0. 
+  if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 && 
+      Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) { 
+    std::get<0>(MatchInfo) = TargetOpcode::G_FADD; 
+    std::get<1>(MatchInfo) = DstTy; 
+    std::get<2>(MatchInfo) = Other->getOperand(0).getReg(); 
     return true;
   }
   return false;
 }
 
-bool applyExtractVecEltPairwiseAdd(
-    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
-    std::tuple<unsigned, LLT, Register> &MatchInfo) {
-  unsigned Opc = std::get<0>(MatchInfo);
-  assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
-  // We want to generate two extracts of elements 0 and 1, and add them.
-  LLT Ty = std::get<1>(MatchInfo);
-  Register Src = std::get<2>(MatchInfo);
-  LLT s64 = LLT::scalar(64);
-  B.setInstrAndDebugLoc(MI);
-  auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));
-  auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
-  B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
-  MI.eraseFromParent();
+bool applyExtractVecEltPairwiseAdd( 
+    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, 
+    std::tuple<unsigned, LLT, Register> &MatchInfo) { 
+  unsigned Opc = std::get<0>(MatchInfo); 
+  assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!"); 
+  // We want to generate two extracts of elements 0 and 1, and add them. 
+  LLT Ty = std::get<1>(MatchInfo); 
+  Register Src = std::get<2>(MatchInfo); 
+  LLT s64 = LLT::scalar(64); 
+  B.setInstrAndDebugLoc(MI); 
+  auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0)); 
+  auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1)); 
+  B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1}); 
+  MI.eraseFromParent(); 
   return true;
 }
 
-static bool isSignExtended(Register R, MachineRegisterInfo &MRI) {
-  // TODO: check if extended build vector as well.
-  unsigned Opc = MRI.getVRegDef(R)->getOpcode();
-  return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
+static bool isSignExtended(Register R, MachineRegisterInfo &MRI) { 
+  // TODO: check if extended build vector as well. 
+  unsigned Opc = MRI.getVRegDef(R)->getOpcode(); 
+  return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG; 
 }
 
-static bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
-  // TODO: check if extended build vector as well.
-  return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
+static bool isZeroExtended(Register R, MachineRegisterInfo &MRI) { 
+  // TODO: check if extended build vector as well. 
+  return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT; 
 }
 
-bool matchAArch64MulConstCombine(
-    MachineInstr &MI, MachineRegisterInfo &MRI,
-    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
-  assert(MI.getOpcode() == TargetOpcode::G_MUL);
-  Register LHS = MI.getOperand(1).getReg();
-  Register RHS = MI.getOperand(2).getReg();
-  Register Dst = MI.getOperand(0).getReg();
-  const LLT Ty = MRI.getType(LHS);
-
-  // The below optimizations require a constant RHS.
-  auto Const = getConstantVRegValWithLookThrough(RHS, MRI);
-  if (!Const)
+bool matchAArch64MulConstCombine( 
+    MachineInstr &MI, MachineRegisterInfo &MRI, 
+    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) { 
+  assert(MI.getOpcode() == TargetOpcode::G_MUL); 
+  Register LHS = MI.getOperand(1).getReg(); 
+  Register RHS = MI.getOperand(2).getReg(); 
+  Register Dst = MI.getOperand(0).getReg(); 
+  const LLT Ty = MRI.getType(LHS); 
+
+  // The below optimizations require a constant RHS. 
+  auto Const = getConstantVRegValWithLookThrough(RHS, MRI); 
+  if (!Const) 
     return false;
 
-  const APInt ConstValue = Const->Value.sextOrSelf(Ty.getSizeInBits());
-  // The following code is ported from AArch64ISelLowering.
-  // Multiplication of a power of two plus/minus one can be done more
-  // cheaply as as shift+add/sub. For now, this is true unilaterally. If
-  // future CPUs have a cheaper MADD instruction, this may need to be
-  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
-  // 64-bit is 5 cycles, so this is always a win.
-  // More aggressively, some multiplications N0 * C can be lowered to
-  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
-  // e.g. 6=3*2=(2+1)*2.
-  // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
-  // which equals to (1+2)*16-(1+2).
-  // TrailingZeroes is used to test if the mul can be lowered to
-  // shift+add+shift.
-  unsigned TrailingZeroes = ConstValue.countTrailingZeros();
-  if (TrailingZeroes) {
-    // Conservatively do not lower to shift+add+shift if the mul might be
-    // folded into smul or umul.
-    if (MRI.hasOneNonDBGUse(LHS) &&
-        (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI)))
-      return false;
-    // Conservatively do not lower to shift+add+shift if the mul might be
-    // folded into madd or msub.
-    if (MRI.hasOneNonDBGUse(Dst)) {
-      MachineInstr &UseMI = *MRI.use_instr_begin(Dst);
-      if (UseMI.getOpcode() == TargetOpcode::G_ADD ||
-          UseMI.getOpcode() == TargetOpcode::G_SUB)
-        return false;
-    }
-  }
-  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
-  // and shift+add+shift.
-  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
-
-  unsigned ShiftAmt, AddSubOpc;
-  // Is the shifted value the LHS operand of the add/sub?
-  bool ShiftValUseIsLHS = true;
-  // Do we need to negate the result?
-  bool NegateResult = false;
-
-  if (ConstValue.isNonNegative()) {
-    // (mul x, 2^N + 1) => (add (shl x, N), x)
-    // (mul x, 2^N - 1) => (sub (shl x, N), x)
-    // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
-    APInt SCVMinus1 = ShiftedConstValue - 1;
-    APInt CVPlus1 = ConstValue + 1;
-    if (SCVMinus1.isPowerOf2()) {
-      ShiftAmt = SCVMinus1.logBase2();
-      AddSubOpc = TargetOpcode::G_ADD;
-    } else if (CVPlus1.isPowerOf2()) {
-      ShiftAmt = CVPlus1.logBase2();
-      AddSubOpc = TargetOpcode::G_SUB;
-    } else
-      return false;
-  } else {
-    // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
-    // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
-    APInt CVNegPlus1 = -ConstValue + 1;
-    APInt CVNegMinus1 = -ConstValue - 1;
-    if (CVNegPlus1.isPowerOf2()) {
-      ShiftAmt = CVNegPlus1.logBase2();
-      AddSubOpc = TargetOpcode::G_SUB;
-      ShiftValUseIsLHS = false;
-    } else if (CVNegMinus1.isPowerOf2()) {
-      ShiftAmt = CVNegMinus1.logBase2();
-      AddSubOpc = TargetOpcode::G_ADD;
-      NegateResult = true;
-    } else
-      return false;
-  }
-
-  if (NegateResult && TrailingZeroes)
+  const APInt ConstValue = Const->Value.sextOrSelf(Ty.getSizeInBits()); 
+  // The following code is ported from AArch64ISelLowering. 
+  // Multiplication of a power of two plus/minus one can be done more 
+  // cheaply as as shift+add/sub. For now, this is true unilaterally. If 
+  // future CPUs have a cheaper MADD instruction, this may need to be 
+  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and 
+  // 64-bit is 5 cycles, so this is always a win. 
+  // More aggressively, some multiplications N0 * C can be lowered to 
+  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M, 
+  // e.g. 6=3*2=(2+1)*2. 
+  // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45 
+  // which equals to (1+2)*16-(1+2). 
+  // TrailingZeroes is used to test if the mul can be lowered to 
+  // shift+add+shift. 
+  unsigned TrailingZeroes = ConstValue.countTrailingZeros(); 
+  if (TrailingZeroes) { 
+    // Conservatively do not lower to shift+add+shift if the mul might be 
+    // folded into smul or umul. 
+    if (MRI.hasOneNonDBGUse(LHS) && 
+        (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI))) 
+      return false; 
+    // Conservatively do not lower to shift+add+shift if the mul might be 
+    // folded into madd or msub. 
+    if (MRI.hasOneNonDBGUse(Dst)) { 
+      MachineInstr &UseMI = *MRI.use_instr_begin(Dst); 
+      if (UseMI.getOpcode() == TargetOpcode::G_ADD || 
+          UseMI.getOpcode() == TargetOpcode::G_SUB) 
+        return false; 
+    } 
+  } 
+  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub 
+  // and shift+add+shift. 
+  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes); 
+
+  unsigned ShiftAmt, AddSubOpc; 
+  // Is the shifted value the LHS operand of the add/sub? 
+  bool ShiftValUseIsLHS = true; 
+  // Do we need to negate the result? 
+  bool NegateResult = false; 
+
+  if (ConstValue.isNonNegative()) { 
+    // (mul x, 2^N + 1) => (add (shl x, N), x) 
+    // (mul x, 2^N - 1) => (sub (shl x, N), x) 
+    // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M) 
+    APInt SCVMinus1 = ShiftedConstValue - 1; 
+    APInt CVPlus1 = ConstValue + 1; 
+    if (SCVMinus1.isPowerOf2()) { 
+      ShiftAmt = SCVMinus1.logBase2(); 
+      AddSubOpc = TargetOpcode::G_ADD; 
+    } else if (CVPlus1.isPowerOf2()) { 
+      ShiftAmt = CVPlus1.logBase2(); 
+      AddSubOpc = TargetOpcode::G_SUB; 
+    } else 
+      return false; 
+  } else { 
+    // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) 
+    // (mul x, -(2^N + 1)) => - (add (shl x, N), x) 
+    APInt CVNegPlus1 = -ConstValue + 1; 
+    APInt CVNegMinus1 = -ConstValue - 1; 
+    if (CVNegPlus1.isPowerOf2()) { 
+      ShiftAmt = CVNegPlus1.logBase2(); 
+      AddSubOpc = TargetOpcode::G_SUB; 
+      ShiftValUseIsLHS = false; 
+    } else if (CVNegMinus1.isPowerOf2()) { 
+      ShiftAmt = CVNegMinus1.logBase2(); 
+      AddSubOpc = TargetOpcode::G_ADD; 
+      NegateResult = true; 
+    } else 
+      return false; 
+  } 
+
+  if (NegateResult && TrailingZeroes) 
     return false;
 
-  ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
-    auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt);
-    auto ShiftedVal = B.buildShl(Ty, LHS, Shift);
-
-    Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;
-    Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);
-    auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});
-    assert(!(NegateResult && TrailingZeroes) &&
-           "NegateResult and TrailingZeroes cannot both be true for now.");
-    // Negate the result.
-    if (NegateResult) {
-      B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);
-      return;
-    }
-    // Shift the result.
-    if (TrailingZeroes) {
-      B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes));
-      return;
-    }
-    B.buildCopy(DstReg, Res.getReg(0));
-  };
+  ApplyFn = [=](MachineIRBuilder &B, Register DstReg) { 
+    auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt); 
+    auto ShiftedVal = B.buildShl(Ty, LHS, Shift); 
+
+    Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS; 
+    Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0); 
+    auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS}); 
+    assert(!(NegateResult && TrailingZeroes) && 
+           "NegateResult and TrailingZeroes cannot both be true for now."); 
+    // Negate the result. 
+    if (NegateResult) { 
+      B.buildSub(DstReg, B.buildConstant(Ty, 0), Res); 
+      return; 
+    } 
+    // Shift the result. 
+    if (TrailingZeroes) { 
+      B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes)); 
+      return; 
+    } 
+    B.buildCopy(DstReg, Res.getReg(0)); 
+  }; 
   return true;
 }
 
-bool applyAArch64MulConstCombine(
-    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
-    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
-  B.setInstrAndDebugLoc(MI);
-  ApplyFn(B, MI.getOperand(0).getReg());
+bool applyAArch64MulConstCombine( 
+    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, 
+    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) { 
+  B.setInstrAndDebugLoc(MI); 
+  ApplyFn(B, MI.getOperand(0).getReg()); 
   MI.eraseFromParent();
   return true;
 }
@@ -348,7 +348,7 @@ INITIALIZE_PASS_END(AArch64PostLegalizerCombiner, DEBUG_TYPE,
                     false)
 
 namespace llvm {
-FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {
+FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) { 
   return new AArch64PostLegalizerCombiner(IsOptNone);
 }
 } // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index a06ff4b541..0447c3e8a0 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -1,704 +1,704 @@
-//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Post-legalization lowering for instructions.
-///
-/// This is used to offload pattern matching from the selector.
-///
-/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
-/// a G_ZIP, G_UZP, etc.
-///
-/// General optimization combines should be handled by either the
-/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AArch64TargetMachine.h"
-#include "AArch64GlobalISelUtils.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/CodeGen/GlobalISel/Combiner.h"
-#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
-#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
-
-using namespace llvm;
-using namespace MIPatternMatch;
-using namespace AArch64GISelUtils;
-
-/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
-///
-/// Used for matching target-supported shuffles before codegen.
-struct ShuffleVectorPseudo {
-  unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
-  Register Dst; ///< Destination register.
-  SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
-  ShuffleVectorPseudo(unsigned Opc, Register Dst,
-                      std::initializer_list<SrcOp> SrcOps)
-      : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
-  ShuffleVectorPseudo() {}
-};
-
-/// Check if a vector shuffle corresponds to a REV instruction with the
-/// specified blocksize.
-static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
-                      unsigned BlockSize) {
-  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
-         "Only possible block sizes for REV are: 16, 32, 64");
-  assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
-
-  unsigned BlockElts = M[0] + 1;
-
-  // If the first shuffle index is UNDEF, be optimistic.
-  if (M[0] < 0)
-    BlockElts = BlockSize / EltSize;
-
-  if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
-    return false;
-
-  for (unsigned i = 0; i < NumElts; ++i) {
-    // Ignore undef indices.
-    if (M[i] < 0)
-      continue;
-    if (static_cast<unsigned>(M[i]) !=
-        (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
-      return false;
-  }
-
-  return true;
-}
-
-/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
-/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
-static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
-                      unsigned &WhichResult) {
-  if (NumElts % 2 != 0)
-    return false;
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  for (unsigned i = 0; i < NumElts; i += 2) {
-    if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
-        (M[i + 1] >= 0 &&
-         static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
-      return false;
-  }
-  return true;
-}
-
-/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
-/// sources of the shuffle are different.
-static Optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
-                                                      unsigned NumElts) {
-  // Look for the first non-undef element.
-  auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
-  if (FirstRealElt == M.end())
-    return None;
-
-  // Use APInt to handle overflow when calculating expected element.
-  unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
-  APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
-
-  // The following shuffle indices must be the successive elements after the
-  // first real element.
-  if (any_of(
-          make_range(std::next(FirstRealElt), M.end()),
-          [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
-    return None;
-
-  // The index of an EXT is the first element if it is not UNDEF.
-  // Watch out for the beginning UNDEFs. The EXT index should be the expected
-  // value of the first element.  E.g.
-  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
-  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
-  // ExpectedElt is the last mask index plus 1.
-  uint64_t Imm = ExpectedElt.getZExtValue();
-  bool ReverseExt = false;
-
-  // There are two difference cases requiring to reverse input vectors.
-  // For example, for vector <4 x i32> we have the following cases,
-  // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
-  // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
-  // For both cases, we finally use mask <5, 6, 7, 0>, which requires
-  // to reverse two input vectors.
-  if (Imm < NumElts)
-    ReverseExt = true;
-  else
-    Imm -= NumElts;
-  return std::make_pair(ReverseExt, Imm);
-}
-
-/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
-/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
-static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
-                      unsigned &WhichResult) {
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  for (unsigned i = 0; i != NumElts; ++i) {
-    // Skip undef indices.
-    if (M[i] < 0)
-      continue;
-    if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
-      return false;
-  }
-  return true;
-}
-
-/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
-/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
-static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
-                      unsigned &WhichResult) {
-  if (NumElts % 2 != 0)
-    return false;
-
-  // 0 means use ZIP1, 1 means use ZIP2.
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  unsigned Idx = WhichResult * NumElts / 2;
-  for (unsigned i = 0; i != NumElts; i += 2) {
-      if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
-          (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
-        return false;
-    Idx += 1;
-  }
-  return true;
-}
-
-/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
-/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
-static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     ShuffleVectorPseudo &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
-  Register Dst = MI.getOperand(0).getReg();
-  Register Src = MI.getOperand(1).getReg();
-  LLT Ty = MRI.getType(Dst);
-  unsigned EltSize = Ty.getScalarSizeInBits();
-
-  // Element size for a rev cannot be 64.
-  if (EltSize == 64)
-    return false;
-
-  unsigned NumElts = Ty.getNumElements();
-
-  // Try to produce G_REV64
-  if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) {
-    MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src});
-    return true;
-  }
-
-  // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support.
-  // This should be identical to above, but with a constant 32 and constant
-  // 16.
-  return false;
-}
-
-/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
-/// a G_TRN1 or G_TRN2 instruction.
-static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     ShuffleVectorPseudo &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  unsigned WhichResult;
-  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
-  Register Dst = MI.getOperand(0).getReg();
-  unsigned NumElts = MRI.getType(Dst).getNumElements();
-  if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
-    return false;
-  unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
-  Register V1 = MI.getOperand(1).getReg();
-  Register V2 = MI.getOperand(2).getReg();
-  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
-  return true;
-}
-
-/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
-/// a G_UZP1 or G_UZP2 instruction.
-///
-/// \param [in] MI - The shuffle vector instruction.
-/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
-static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     ShuffleVectorPseudo &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  unsigned WhichResult;
-  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
-  Register Dst = MI.getOperand(0).getReg();
-  unsigned NumElts = MRI.getType(Dst).getNumElements();
-  if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
-    return false;
-  unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
-  Register V1 = MI.getOperand(1).getReg();
-  Register V2 = MI.getOperand(2).getReg();
-  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
-  return true;
-}
-
-static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     ShuffleVectorPseudo &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  unsigned WhichResult;
-  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
-  Register Dst = MI.getOperand(0).getReg();
-  unsigned NumElts = MRI.getType(Dst).getNumElements();
-  if (!isZipMask(ShuffleMask, NumElts, WhichResult))
-    return false;
-  unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
-  Register V1 = MI.getOperand(1).getReg();
-  Register V2 = MI.getOperand(2).getReg();
-  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
-  return true;
-}
-
-/// Helper function for matchDup.
-static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
-                                        MachineRegisterInfo &MRI,
-                                        ShuffleVectorPseudo &MatchInfo) {
-  if (Lane != 0)
-    return false;
-
-  // Try to match a vector splat operation into a dup instruction.
-  // We're looking for this pattern:
-  //
-  // %scalar:gpr(s64) = COPY $x0
-  // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
-  // %cst0:gpr(s32) = G_CONSTANT i32 0
-  // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
-  // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
-  // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>)
-  //
-  // ...into:
-  // %splat = G_DUP %scalar
-
-  // Begin matching the insert.
-  auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
-                             MI.getOperand(1).getReg(), MRI);
-  if (!InsMI)
-    return false;
-  // Match the undef vector operand.
-  if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
-                    MRI))
-    return false;
-
-  // Match the index constant 0.
-  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
-    return false;
-
-  MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
-                                  {InsMI->getOperand(2).getReg()});
-  return true;
-}
-
-/// Helper function for matchDup.
-static bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
-                                    MachineRegisterInfo &MRI,
-                                    ShuffleVectorPseudo &MatchInfo) {
-  assert(Lane >= 0 && "Expected positive lane?");
-  // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
-  // lane's definition directly.
-  auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
-                                  MI.getOperand(1).getReg(), MRI);
-  if (!BuildVecMI)
-    return false;
-  Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
-  MatchInfo =
-      ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
-  return true;
-}
-
-static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     ShuffleVectorPseudo &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  auto MaybeLane = getSplatIndex(MI);
-  if (!MaybeLane)
-    return false;
-  int Lane = *MaybeLane;
-  // If this is undef splat, generate it via "just" vdup, if possible.
-  if (Lane < 0)
-    Lane = 0;
-  if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
-    return true;
-  if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
-    return true;
-  return false;
-}
-
-static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     ShuffleVectorPseudo &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  Register Dst = MI.getOperand(0).getReg();
-  auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(),
-                            MRI.getType(Dst).getNumElements());
-  if (!ExtInfo)
-    return false;
-  bool ReverseExt;
-  uint64_t Imm;
-  std::tie(ReverseExt, Imm) = *ExtInfo;
-  Register V1 = MI.getOperand(1).getReg();
-  Register V2 = MI.getOperand(2).getReg();
-  if (ReverseExt)
-    std::swap(V1, V2);
-  uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
-  Imm *= ExtFactor;
-  MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
-  return true;
-}
-
-/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
-/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
-static bool applyShuffleVectorPseudo(MachineInstr &MI,
-                                     ShuffleVectorPseudo &MatchInfo) {
-  MachineIRBuilder MIRBuilder(MI);
-  MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
-  MI.eraseFromParent();
-  return true;
-}
-
-/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
-/// Special-cased because the constant operand must be emitted as a G_CONSTANT
-/// for the imported tablegen patterns to work.
-static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
-  MachineIRBuilder MIRBuilder(MI);
-  // Tablegen patterns expect an i32 G_CONSTANT as the final op.
-  auto Cst =
-      MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
-  MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
-                        {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
-  MI.eraseFromParent();
-  return true;
-}
-
-/// isVShiftRImm - Check if this is a valid vector for the immediate
-/// operand of a vector shift right operation. The value must be in the range:
-///   1 <= Value <= ElementBits for a right shift.
-static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
-                         int64_t &Cnt) {
-  assert(Ty.isVector() && "vector shift count is not a vector type");
-  MachineInstr *MI = MRI.getVRegDef(Reg);
-  auto Cst = getBuildVectorConstantSplat(*MI, MRI);
-  if (!Cst)
-    return false;
-  Cnt = *Cst;
-  int64_t ElementBits = Ty.getScalarSizeInBits();
-  return Cnt >= 1 && Cnt <= ElementBits;
-}
-
-/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
-static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
-                              int64_t &Imm) {
-  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
-         MI.getOpcode() == TargetOpcode::G_LSHR);
-  LLT Ty = MRI.getType(MI.getOperand(1).getReg());
-  if (!Ty.isVector())
-    return false;
-  return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
-}
-
-static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
-                              int64_t &Imm) {
-  unsigned Opc = MI.getOpcode();
-  assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
-  unsigned NewOpc =
-      Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
-  MachineIRBuilder MIB(MI);
-  auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
-  MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
-  MI.eraseFromParent();
-  return true;
-}
-
-/// Determine if it is possible to modify the \p RHS and predicate \p P of a
-/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
-///
-/// \returns A pair containing the updated immediate and predicate which may
-/// be used to optimize the instruction.
-///
-/// \note This assumes that the comparison has been legalized.
-Optional<std::pair<uint64_t, CmpInst::Predicate>>
-tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
-                          const MachineRegisterInfo &MRI) {
-  const auto &Ty = MRI.getType(RHS);
-  if (Ty.isVector())
-    return None;
-  unsigned Size = Ty.getSizeInBits();
-  assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
-
-  // If the RHS is not a constant, or the RHS is already a valid arithmetic
-  // immediate, then there is nothing to change.
-  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
-  if (!ValAndVReg)
-    return None;
-  uint64_t C = ValAndVReg->Value.getZExtValue();
-  if (isLegalArithImmed(C))
-    return None;
-
-  // We have a non-arithmetic immediate. Check if adjusting the immediate and
-  // adjusting the predicate will result in a legal arithmetic immediate.
-  switch (P) {
-  default:
-    return None;
-  case CmpInst::ICMP_SLT:
-  case CmpInst::ICMP_SGE:
-    // Check for
-    //
-    // x slt c => x sle c - 1
-    // x sge c => x sgt c - 1
-    //
-    // When c is not the smallest possible negative number.
-    if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
-        (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
-      return None;
-    P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
-    C -= 1;
-    break;
-  case CmpInst::ICMP_ULT:
-  case CmpInst::ICMP_UGE:
-    // Check for
-    //
-    // x ult c => x ule c - 1
-    // x uge c => x ugt c - 1
-    //
-    // When c is not zero.
-    if (C == 0)
-      return None;
-    P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
-    C -= 1;
-    break;
-  case CmpInst::ICMP_SLE:
-  case CmpInst::ICMP_SGT:
-    // Check for
-    //
-    // x sle c => x slt c + 1
-    // x sgt c => s sge c + 1
-    //
-    // When c is not the largest possible signed integer.
-    if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
-        (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
-      return None;
-    P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
-    C += 1;
-    break;
-  case CmpInst::ICMP_ULE:
-  case CmpInst::ICMP_UGT:
-    // Check for
-    //
-    // x ule c => x ult c + 1
-    // x ugt c => s uge c + 1
-    //
-    // When c is not the largest possible unsigned integer.
-    if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
-        (Size == 64 && C == UINT64_MAX))
-      return None;
-    P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
-    C += 1;
-    break;
-  }
-
-  // Check if the new constant is valid, and return the updated constant and
-  // predicate if it is.
-  if (Size == 32)
-    C = static_cast<uint32_t>(C);
-  if (!isLegalArithImmed(C))
-    return None;
-  return {{C, P}};
-}
-
-/// Determine whether or not it is possible to update the RHS and predicate of
-/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
-/// immediate.
-///
-/// \p MI - The G_ICMP instruction
-/// \p MatchInfo - The new RHS immediate and predicate on success
-///
-/// See tryAdjustICmpImmAndPred for valid transformations.
-bool matchAdjustICmpImmAndPred(
-    MachineInstr &MI, const MachineRegisterInfo &MRI,
-    std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_ICMP);
-  Register RHS = MI.getOperand(3).getReg();
-  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
-  if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
-    MatchInfo = *MaybeNewImmAndPred;
-    return true;
-  }
-  return false;
-}
-
-bool applyAdjustICmpImmAndPred(
-    MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
-    MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
-  MIB.setInstrAndDebugLoc(MI);
-  MachineOperand &RHS = MI.getOperand(3);
-  MachineRegisterInfo &MRI = *MIB.getMRI();
-  auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
-                               MatchInfo.first);
-  Observer.changingInstr(MI);
-  RHS.setReg(Cst->getOperand(0).getReg());
-  MI.getOperand(1).setPredicate(MatchInfo.second);
-  Observer.changedInstr(MI);
-  return true;
-}
-
-bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
-                  std::pair<unsigned, int> &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  Register Src1Reg = MI.getOperand(1).getReg();
-  const LLT SrcTy = MRI.getType(Src1Reg);
-  const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
-
-  auto LaneIdx = getSplatIndex(MI);
-  if (!LaneIdx)
-    return false;
-
-  // The lane idx should be within the first source vector.
-  if (*LaneIdx >= SrcTy.getNumElements())
-    return false;
-
-  if (DstTy != SrcTy)
-    return false;
-
-  LLT ScalarTy = SrcTy.getElementType();
-  unsigned ScalarSize = ScalarTy.getSizeInBits();
-
-  unsigned Opc = 0;
-  switch (SrcTy.getNumElements()) {
-  case 2:
-    if (ScalarSize == 64)
-      Opc = AArch64::G_DUPLANE64;
-    break;
-  case 4:
-    if (ScalarSize == 32)
-      Opc = AArch64::G_DUPLANE32;
-    break;
-  case 8:
-    if (ScalarSize == 16)
-      Opc = AArch64::G_DUPLANE16;
-    break;
-  case 16:
-    if (ScalarSize == 8)
-      Opc = AArch64::G_DUPLANE8;
-    break;
-  default:
-    break;
-  }
-  if (!Opc)
-    return false;
-
-  MatchInfo.first = Opc;
-  MatchInfo.second = *LaneIdx;
-  return true;
-}
-
-bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
-                  MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  B.setInstrAndDebugLoc(MI);
-  auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
-  B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()},
-               {MI.getOperand(1).getReg(), Lane});
-  MI.eraseFromParent();
-  return true;
-}
-
-#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
-#include "AArch64GenPostLegalizeGILowering.inc"
-#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
-
-namespace {
-#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
-#include "AArch64GenPostLegalizeGILowering.inc"
-#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
-
-class AArch64PostLegalizerLoweringInfo : public CombinerInfo {
-public:
-  AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg;
-
-  AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize)
-      : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
-                     /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize,
-                     MinSize) {
-    if (!GeneratedRuleCfg.parseCommandLineOption())
-      report_fatal_error("Invalid rule identifier");
-  }
-
-  virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
-                       MachineIRBuilder &B) const override;
-};
-
-bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer,
-                                               MachineInstr &MI,
-                                               MachineIRBuilder &B) const {
-  CombinerHelper Helper(Observer, B);
-  AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg);
-  return Generated.tryCombineAll(Observer, MI, B, Helper);
-}
-
-#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
-#include "AArch64GenPostLegalizeGILowering.inc"
-#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
-
-class AArch64PostLegalizerLowering : public MachineFunctionPass {
-public:
-  static char ID;
-
-  AArch64PostLegalizerLowering();
-
-  StringRef getPassName() const override {
-    return "AArch64PostLegalizerLowering";
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-};
-} // end anonymous namespace
-
-void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<TargetPassConfig>();
-  AU.setPreservesCFG();
-  getSelectionDAGFallbackAnalysisUsage(AU);
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
-    : MachineFunctionPass(ID) {
-  initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry());
-}
-
-bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
-  if (MF.getProperties().hasProperty(
-          MachineFunctionProperties::Property::FailedISel))
-    return false;
-  assert(MF.getProperties().hasProperty(
-             MachineFunctionProperties::Property::Legalized) &&
-         "Expected a legalized function?");
-  auto *TPC = &getAnalysis<TargetPassConfig>();
-  const Function &F = MF.getFunction();
-  AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize());
-  Combiner C(PCInfo, TPC);
-  return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
-}
-
-char AArch64PostLegalizerLowering::ID = 0;
-INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
-                      "Lower AArch64 MachineInstrs after legalization", false,
-                      false)
-INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
-                    "Lower AArch64 MachineInstrs after legalization", false,
-                    false)
-
-namespace llvm {
-FunctionPass *createAArch64PostLegalizerLowering() {
-  return new AArch64PostLegalizerLowering();
-}
-} // end namespace llvm
+//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===// 
+// 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
+// See https://llvm.org/LICENSE.txt for license information. 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
+// 
+//===----------------------------------------------------------------------===// 
+/// 
+/// \file 
+/// Post-legalization lowering for instructions. 
+/// 
+/// This is used to offload pattern matching from the selector. 
+/// 
+/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually 
+/// a G_ZIP, G_UZP, etc. 
+/// 
+/// General optimization combines should be handled by either the 
+/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner. 
+/// 
+//===----------------------------------------------------------------------===// 
+ 
+#include "AArch64TargetMachine.h" 
+#include "AArch64GlobalISelUtils.h" 
+#include "MCTargetDesc/AArch64MCTargetDesc.h" 
+#include "llvm/CodeGen/GlobalISel/Combiner.h" 
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 
+#include "llvm/CodeGen/GlobalISel/Utils.h" 
+#include "llvm/CodeGen/MachineFunctionPass.h" 
+#include "llvm/CodeGen/MachineInstrBuilder.h" 
+#include "llvm/CodeGen/MachineRegisterInfo.h" 
+#include "llvm/CodeGen/TargetOpcodes.h" 
+#include "llvm/CodeGen/TargetPassConfig.h" 
+#include "llvm/InitializePasses.h" 
+#include "llvm/Support/Debug.h" 
+ 
+#define DEBUG_TYPE "aarch64-postlegalizer-lowering" 
+ 
+using namespace llvm; 
+using namespace MIPatternMatch; 
+using namespace AArch64GISelUtils; 
+ 
+/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR. 
+/// 
+/// Used for matching target-supported shuffles before codegen. 
+struct ShuffleVectorPseudo { 
+  unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1) 
+  Register Dst; ///< Destination register. 
+  SmallVector<SrcOp, 2> SrcOps; ///< Source registers. 
+  ShuffleVectorPseudo(unsigned Opc, Register Dst, 
+                      std::initializer_list<SrcOp> SrcOps) 
+      : Opc(Opc), Dst(Dst), SrcOps(SrcOps){}; 
+  ShuffleVectorPseudo() {} 
+}; 
+ 
+/// Check if a vector shuffle corresponds to a REV instruction with the 
+/// specified blocksize. 
+static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts, 
+                      unsigned BlockSize) { 
+  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && 
+         "Only possible block sizes for REV are: 16, 32, 64"); 
+  assert(EltSize != 64 && "EltSize cannot be 64 for REV mask."); 
+ 
+  unsigned BlockElts = M[0] + 1; 
+ 
+  // If the first shuffle index is UNDEF, be optimistic. 
+  if (M[0] < 0) 
+    BlockElts = BlockSize / EltSize; 
+ 
+  if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize) 
+    return false; 
+ 
+  for (unsigned i = 0; i < NumElts; ++i) { 
+    // Ignore undef indices. 
+    if (M[i] < 0) 
+      continue; 
+    if (static_cast<unsigned>(M[i]) != 
+        (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) 
+      return false; 
+  } 
+ 
+  return true; 
+} 
+ 
+/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts. 
+/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult. 
+static bool isTRNMask(ArrayRef<int> M, unsigned NumElts, 
+                      unsigned &WhichResult) { 
+  if (NumElts % 2 != 0) 
+    return false; 
+  WhichResult = (M[0] == 0 ? 0 : 1); 
+  for (unsigned i = 0; i < NumElts; i += 2) { 
+    if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) || 
+        (M[i + 1] >= 0 && 
+         static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult)) 
+      return false; 
+  } 
+  return true; 
+} 
+ 
+/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector 
+/// sources of the shuffle are different. 
+static Optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M, 
+                                                      unsigned NumElts) { 
+  // Look for the first non-undef element. 
+  auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); 
+  if (FirstRealElt == M.end()) 
+    return None; 
+ 
+  // Use APInt to handle overflow when calculating expected element. 
+  unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); 
+  APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); 
+ 
+  // The following shuffle indices must be the successive elements after the 
+  // first real element. 
+  if (any_of( 
+          make_range(std::next(FirstRealElt), M.end()), 
+          [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; })) 
+    return None; 
+ 
+  // The index of an EXT is the first element if it is not UNDEF. 
+  // Watch out for the beginning UNDEFs. The EXT index should be the expected 
+  // value of the first element.  E.g. 
+  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. 
+  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. 
+  // ExpectedElt is the last mask index plus 1. 
+  uint64_t Imm = ExpectedElt.getZExtValue(); 
+  bool ReverseExt = false; 
+ 
+  // There are two difference cases requiring to reverse input vectors. 
+  // For example, for vector <4 x i32> we have the following cases, 
+  // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) 
+  // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) 
+  // For both cases, we finally use mask <5, 6, 7, 0>, which requires 
+  // to reverse two input vectors. 
+  if (Imm < NumElts) 
+    ReverseExt = true; 
+  else 
+    Imm -= NumElts; 
+  return std::make_pair(ReverseExt, Imm); 
+} 
+ 
+/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. 
+/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. 
+static bool isUZPMask(ArrayRef<int> M, unsigned NumElts, 
+                      unsigned &WhichResult) { 
+  WhichResult = (M[0] == 0 ? 0 : 1); 
+  for (unsigned i = 0; i != NumElts; ++i) { 
+    // Skip undef indices. 
+    if (M[i] < 0) 
+      continue; 
+    if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult) 
+      return false; 
+  } 
+  return true; 
+} 
+ 
+/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts. 
+/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult. 
+static bool isZipMask(ArrayRef<int> M, unsigned NumElts, 
+                      unsigned &WhichResult) { 
+  if (NumElts % 2 != 0) 
+    return false; 
+ 
+  // 0 means use ZIP1, 1 means use ZIP2. 
+  WhichResult = (M[0] == 0 ? 0 : 1); 
+  unsigned Idx = WhichResult * NumElts / 2; 
+  for (unsigned i = 0; i != NumElts; i += 2) { 
+      if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) || 
+          (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts)) 
+        return false; 
+    Idx += 1; 
+  } 
+  return true; 
+} 
+ 
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a 
+/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc. 
+static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                     ShuffleVectorPseudo &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 
+  Register Dst = MI.getOperand(0).getReg(); 
+  Register Src = MI.getOperand(1).getReg(); 
+  LLT Ty = MRI.getType(Dst); 
+  unsigned EltSize = Ty.getScalarSizeInBits(); 
+ 
+  // Element size for a rev cannot be 64. 
+  if (EltSize == 64) 
+    return false; 
+ 
+  unsigned NumElts = Ty.getNumElements(); 
+ 
+  // Try to produce G_REV64 
+  if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) { 
+    MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src}); 
+    return true; 
+  } 
+ 
+  // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support. 
+  // This should be identical to above, but with a constant 32 and constant 
+  // 16. 
+  return false; 
+} 
+ 
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with 
+/// a G_TRN1 or G_TRN2 instruction. 
+static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                     ShuffleVectorPseudo &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  unsigned WhichResult; 
+  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 
+  Register Dst = MI.getOperand(0).getReg(); 
+  unsigned NumElts = MRI.getType(Dst).getNumElements(); 
+  if (!isTRNMask(ShuffleMask, NumElts, WhichResult)) 
+    return false; 
+  unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2; 
+  Register V1 = MI.getOperand(1).getReg(); 
+  Register V2 = MI.getOperand(2).getReg(); 
+  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 
+  return true; 
+} 
+ 
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with 
+/// a G_UZP1 or G_UZP2 instruction. 
+/// 
+/// \param [in] MI - The shuffle vector instruction. 
+/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success. 
+static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                     ShuffleVectorPseudo &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  unsigned WhichResult; 
+  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 
+  Register Dst = MI.getOperand(0).getReg(); 
+  unsigned NumElts = MRI.getType(Dst).getNumElements(); 
+  if (!isUZPMask(ShuffleMask, NumElts, WhichResult)) 
+    return false; 
+  unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2; 
+  Register V1 = MI.getOperand(1).getReg(); 
+  Register V2 = MI.getOperand(2).getReg(); 
+  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 
+  return true; 
+} 
+ 
+static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                     ShuffleVectorPseudo &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  unsigned WhichResult; 
+  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 
+  Register Dst = MI.getOperand(0).getReg(); 
+  unsigned NumElts = MRI.getType(Dst).getNumElements(); 
+  if (!isZipMask(ShuffleMask, NumElts, WhichResult)) 
+    return false; 
+  unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2; 
+  Register V1 = MI.getOperand(1).getReg(); 
+  Register V2 = MI.getOperand(2).getReg(); 
+  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 
+  return true; 
+} 
+ 
+/// Helper function for matchDup. 
+static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI, 
+                                        MachineRegisterInfo &MRI, 
+                                        ShuffleVectorPseudo &MatchInfo) { 
+  if (Lane != 0) 
+    return false; 
+ 
+  // Try to match a vector splat operation into a dup instruction. 
+  // We're looking for this pattern: 
+  // 
+  // %scalar:gpr(s64) = COPY $x0 
+  // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF 
+  // %cst0:gpr(s32) = G_CONSTANT i32 0 
+  // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32) 
+  // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32) 
+  // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>) 
+  // 
+  // ...into: 
+  // %splat = G_DUP %scalar 
+ 
+  // Begin matching the insert. 
+  auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT, 
+                             MI.getOperand(1).getReg(), MRI); 
+  if (!InsMI) 
+    return false; 
+  // Match the undef vector operand. 
+  if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), 
+                    MRI)) 
+    return false; 
+ 
+  // Match the index constant 0. 
+  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt())) 
+    return false; 
+ 
+  MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), 
+                                  {InsMI->getOperand(2).getReg()}); 
+  return true; 
+} 
+ 
+/// Helper function for matchDup. 
+static bool matchDupFromBuildVector(int Lane, MachineInstr &MI, 
+                                    MachineRegisterInfo &MRI, 
+                                    ShuffleVectorPseudo &MatchInfo) { 
+  assert(Lane >= 0 && "Expected positive lane?"); 
+  // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the 
+  // lane's definition directly. 
+  auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, 
+                                  MI.getOperand(1).getReg(), MRI); 
+  if (!BuildVecMI) 
+    return false; 
+  Register Reg = BuildVecMI->getOperand(Lane + 1).getReg(); 
+  MatchInfo = 
+      ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg}); 
+  return true; 
+} 
+ 
+static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                     ShuffleVectorPseudo &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  auto MaybeLane = getSplatIndex(MI); 
+  if (!MaybeLane) 
+    return false; 
+  int Lane = *MaybeLane; 
+  // If this is undef splat, generate it via "just" vdup, if possible. 
+  if (Lane < 0) 
+    Lane = 0; 
+  if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo)) 
+    return true; 
+  if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo)) 
+    return true; 
+  return false; 
+} 
+ 
+static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                     ShuffleVectorPseudo &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  Register Dst = MI.getOperand(0).getReg(); 
+  auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(), 
+                            MRI.getType(Dst).getNumElements()); 
+  if (!ExtInfo) 
+    return false; 
+  bool ReverseExt; 
+  uint64_t Imm; 
+  std::tie(ReverseExt, Imm) = *ExtInfo; 
+  Register V1 = MI.getOperand(1).getReg(); 
+  Register V2 = MI.getOperand(2).getReg(); 
+  if (ReverseExt) 
+    std::swap(V1, V2); 
+  uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; 
+  Imm *= ExtFactor; 
+  MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm}); 
+  return true; 
+} 
+ 
+/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo. 
+/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR. 
+static bool applyShuffleVectorPseudo(MachineInstr &MI, 
+                                     ShuffleVectorPseudo &MatchInfo) { 
+  MachineIRBuilder MIRBuilder(MI); 
+  MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps); 
+  MI.eraseFromParent(); 
+  return true; 
+} 
+ 
+/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT. 
+/// Special-cased because the constant operand must be emitted as a G_CONSTANT 
+/// for the imported tablegen patterns to work. 
+static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { 
+  MachineIRBuilder MIRBuilder(MI); 
+  // Tablegen patterns expect an i32 G_CONSTANT as the final op. 
+  auto Cst = 
+      MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm()); 
+  MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, 
+                        {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst}); 
+  MI.eraseFromParent(); 
+  return true; 
+} 
+ 
+/// isVShiftRImm - Check if this is a valid vector for the immediate 
+/// operand of a vector shift right operation. The value must be in the range: 
+///   1 <= Value <= ElementBits for a right shift. 
+static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty, 
+                         int64_t &Cnt) { 
+  assert(Ty.isVector() && "vector shift count is not a vector type"); 
+  MachineInstr *MI = MRI.getVRegDef(Reg); 
+  auto Cst = getBuildVectorConstantSplat(*MI, MRI); 
+  if (!Cst) 
+    return false; 
+  Cnt = *Cst; 
+  int64_t ElementBits = Ty.getScalarSizeInBits(); 
+  return Cnt >= 1 && Cnt <= ElementBits; 
+} 
+ 
+/// Match a vector G_ASHR or G_LSHR with a valid immediate shift. 
+static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                              int64_t &Imm) { 
+  assert(MI.getOpcode() == TargetOpcode::G_ASHR || 
+         MI.getOpcode() == TargetOpcode::G_LSHR); 
+  LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 
+  if (!Ty.isVector()) 
+    return false; 
+  return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm); 
+} 
+ 
+static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                              int64_t &Imm) { 
+  unsigned Opc = MI.getOpcode(); 
+  assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR); 
+  unsigned NewOpc = 
+      Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR; 
+  MachineIRBuilder MIB(MI); 
+  auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm); 
+  MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef}); 
+  MI.eraseFromParent(); 
+  return true; 
+} 
+ 
+/// Determine if it is possible to modify the \p RHS and predicate \p P of a 
+/// G_ICMP instruction such that the right-hand side is an arithmetic immediate. 
+/// 
+/// \returns A pair containing the updated immediate and predicate which may 
+/// be used to optimize the instruction. 
+/// 
+/// \note This assumes that the comparison has been legalized. 
+Optional<std::pair<uint64_t, CmpInst::Predicate>> 
+tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P, 
+                          const MachineRegisterInfo &MRI) { 
+  const auto &Ty = MRI.getType(RHS); 
+  if (Ty.isVector()) 
+    return None; 
+  unsigned Size = Ty.getSizeInBits(); 
+  assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?"); 
+ 
+  // If the RHS is not a constant, or the RHS is already a valid arithmetic 
+  // immediate, then there is nothing to change. 
+  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI); 
+  if (!ValAndVReg) 
+    return None; 
+  uint64_t C = ValAndVReg->Value.getZExtValue(); 
+  if (isLegalArithImmed(C)) 
+    return None; 
+ 
+  // We have a non-arithmetic immediate. Check if adjusting the immediate and 
+  // adjusting the predicate will result in a legal arithmetic immediate. 
+  switch (P) { 
+  default: 
+    return None; 
+  case CmpInst::ICMP_SLT: 
+  case CmpInst::ICMP_SGE: 
+    // Check for 
+    // 
+    // x slt c => x sle c - 1 
+    // x sge c => x sgt c - 1 
+    // 
+    // When c is not the smallest possible negative number. 
+    if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) || 
+        (Size == 32 && static_cast<int32_t>(C) == INT32_MIN)) 
+      return None; 
+    P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; 
+    C -= 1; 
+    break; 
+  case CmpInst::ICMP_ULT: 
+  case CmpInst::ICMP_UGE: 
+    // Check for 
+    // 
+    // x ult c => x ule c - 1 
+    // x uge c => x ugt c - 1 
+    // 
+    // When c is not zero. 
+    if (C == 0) 
+      return None; 
+    P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; 
+    C -= 1; 
+    break; 
+  case CmpInst::ICMP_SLE: 
+  case CmpInst::ICMP_SGT: 
+    // Check for 
+    // 
+    // x sle c => x slt c + 1 
+    // x sgt c => s sge c + 1 
+    // 
+    // When c is not the largest possible signed integer. 
+    if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) || 
+        (Size == 64 && static_cast<int64_t>(C) == INT64_MAX)) 
+      return None; 
+    P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; 
+    C += 1; 
+    break; 
+  case CmpInst::ICMP_ULE: 
+  case CmpInst::ICMP_UGT: 
+    // Check for 
+    // 
+    // x ule c => x ult c + 1 
+    // x ugt c => s uge c + 1 
+    // 
+    // When c is not the largest possible unsigned integer. 
+    if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) || 
+        (Size == 64 && C == UINT64_MAX)) 
+      return None; 
+    P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; 
+    C += 1; 
+    break; 
+  } 
+ 
+  // Check if the new constant is valid, and return the updated constant and 
+  // predicate if it is. 
+  if (Size == 32) 
+    C = static_cast<uint32_t>(C); 
+  if (!isLegalArithImmed(C)) 
+    return None; 
+  return {{C, P}}; 
+} 
+ 
+/// Determine whether or not it is possible to update the RHS and predicate of 
+/// a G_ICMP instruction such that the RHS will be selected as an arithmetic 
+/// immediate. 
+/// 
+/// \p MI - The G_ICMP instruction 
+/// \p MatchInfo - The new RHS immediate and predicate on success 
+/// 
+/// See tryAdjustICmpImmAndPred for valid transformations. 
+bool matchAdjustICmpImmAndPred( 
+    MachineInstr &MI, const MachineRegisterInfo &MRI, 
+    std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_ICMP); 
+  Register RHS = MI.getOperand(3).getReg(); 
+  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 
+  if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) { 
+    MatchInfo = *MaybeNewImmAndPred; 
+    return true; 
+  } 
+  return false; 
+} 
+ 
+bool applyAdjustICmpImmAndPred( 
+    MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo, 
+    MachineIRBuilder &MIB, GISelChangeObserver &Observer) { 
+  MIB.setInstrAndDebugLoc(MI); 
+  MachineOperand &RHS = MI.getOperand(3); 
+  MachineRegisterInfo &MRI = *MIB.getMRI(); 
+  auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()), 
+                               MatchInfo.first); 
+  Observer.changingInstr(MI); 
+  RHS.setReg(Cst->getOperand(0).getReg()); 
+  MI.getOperand(1).setPredicate(MatchInfo.second); 
+  Observer.changedInstr(MI); 
+  return true; 
+} 
+ 
+bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                  std::pair<unsigned, int> &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  Register Src1Reg = MI.getOperand(1).getReg(); 
+  const LLT SrcTy = MRI.getType(Src1Reg); 
+  const LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 
+ 
+  auto LaneIdx = getSplatIndex(MI); 
+  if (!LaneIdx) 
+    return false; 
+ 
+  // The lane idx should be within the first source vector. 
+  if (*LaneIdx >= SrcTy.getNumElements()) 
+    return false; 
+ 
+  if (DstTy != SrcTy) 
+    return false; 
+ 
+  LLT ScalarTy = SrcTy.getElementType(); 
+  unsigned ScalarSize = ScalarTy.getSizeInBits(); 
+ 
+  unsigned Opc = 0; 
+  switch (SrcTy.getNumElements()) { 
+  case 2: 
+    if (ScalarSize == 64) 
+      Opc = AArch64::G_DUPLANE64; 
+    break; 
+  case 4: 
+    if (ScalarSize == 32) 
+      Opc = AArch64::G_DUPLANE32; 
+    break; 
+  case 8: 
+    if (ScalarSize == 16) 
+      Opc = AArch64::G_DUPLANE16; 
+    break; 
+  case 16: 
+    if (ScalarSize == 8) 
+      Opc = AArch64::G_DUPLANE8; 
+    break; 
+  default: 
+    break; 
+  } 
+  if (!Opc) 
+    return false; 
+ 
+  MatchInfo.first = Opc; 
+  MatchInfo.second = *LaneIdx; 
+  return true; 
+} 
+ 
+bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                  MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  B.setInstrAndDebugLoc(MI); 
+  auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second); 
+  B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, 
+               {MI.getOperand(1).getReg(), Lane}); 
+  MI.eraseFromParent(); 
+  return true; 
+} 
+ 
+#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS 
+#include "AArch64GenPostLegalizeGILowering.inc" 
+#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS 
+ 
+namespace { 
+#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H 
+#include "AArch64GenPostLegalizeGILowering.inc" 
+#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H 
+ 
+class AArch64PostLegalizerLoweringInfo : public CombinerInfo { 
+public: 
+  AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg; 
+ 
+  AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize) 
+      : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 
+                     /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize, 
+                     MinSize) { 
+    if (!GeneratedRuleCfg.parseCommandLineOption()) 
+      report_fatal_error("Invalid rule identifier"); 
+  } 
+ 
+  virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 
+                       MachineIRBuilder &B) const override; 
+}; 
+ 
+bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer, 
+                                               MachineInstr &MI, 
+                                               MachineIRBuilder &B) const { 
+  CombinerHelper Helper(Observer, B); 
+  AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg); 
+  return Generated.tryCombineAll(Observer, MI, B, Helper); 
+} 
+ 
+#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP 
+#include "AArch64GenPostLegalizeGILowering.inc" 
+#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP 
+ 
+class AArch64PostLegalizerLowering : public MachineFunctionPass { 
+public: 
+  static char ID; 
+ 
+  AArch64PostLegalizerLowering(); 
+ 
+  StringRef getPassName() const override { 
+    return "AArch64PostLegalizerLowering"; 
+  } 
+ 
+  bool runOnMachineFunction(MachineFunction &MF) override; 
+  void getAnalysisUsage(AnalysisUsage &AU) const override; 
+}; 
+} // end anonymous namespace 
+ 
+void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const { 
+  AU.addRequired<TargetPassConfig>(); 
+  AU.setPreservesCFG(); 
+  getSelectionDAGFallbackAnalysisUsage(AU); 
+  MachineFunctionPass::getAnalysisUsage(AU); 
+} 
+ 
+AArch64PostLegalizerLowering::AArch64PostLegalizerLowering() 
+    : MachineFunctionPass(ID) { 
+  initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry()); 
+} 
+ 
+bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) { 
+  if (MF.getProperties().hasProperty( 
+          MachineFunctionProperties::Property::FailedISel)) 
+    return false; 
+  assert(MF.getProperties().hasProperty( 
+             MachineFunctionProperties::Property::Legalized) && 
+         "Expected a legalized function?"); 
+  auto *TPC = &getAnalysis<TargetPassConfig>(); 
+  const Function &F = MF.getFunction(); 
+  AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize()); 
+  Combiner C(PCInfo, TPC); 
+  return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 
+} 
+ 
+char AArch64PostLegalizerLowering::ID = 0; 
+INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE, 
+                      "Lower AArch64 MachineInstrs after legalization", false, 
+                      false) 
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 
+INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE, 
+                    "Lower AArch64 MachineInstrs after legalization", false, 
+                    false) 
+ 
+namespace llvm { 
+FunctionPass *createAArch64PostLegalizerLowering() { 
+  return new AArch64PostLegalizerLowering(); 
+} 
+} // end namespace llvm 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
index 2f882ecb1f..00436b5924 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
@@ -1,187 +1,187 @@
-//=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass does post-instruction-selection optimizations in the GlobalISel
-// pipeline, before the rest of codegen runs.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AArch64.h"
-#include "AArch64TargetMachine.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "aarch64-post-select-optimize"
-
-using namespace llvm;
-
-namespace {
-class AArch64PostSelectOptimize : public MachineFunctionPass {
-public:
-  static char ID;
-
-  AArch64PostSelectOptimize();
-
-  StringRef getPassName() const override {
-    return "AArch64 Post Select Optimizer";
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-private:
-  bool optimizeNZCVDefs(MachineBasicBlock &MBB);
-};
-} // end anonymous namespace
-
-void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<TargetPassConfig>();
-  AU.setPreservesCFG();
-  getSelectionDAGFallbackAnalysisUsage(AU);
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-AArch64PostSelectOptimize::AArch64PostSelectOptimize()
-    : MachineFunctionPass(ID) {
-  initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
-}
-
-unsigned getNonFlagSettingVariant(unsigned Opc) {
-  switch (Opc) {
-  default:
-    return 0;
-  case AArch64::SUBSXrr:
-    return AArch64::SUBXrr;
-  case AArch64::SUBSWrr:
-    return AArch64::SUBWrr;
-  case AArch64::SUBSXrs:
-    return AArch64::SUBXrs;
-  case AArch64::SUBSXri:
-    return AArch64::SUBXri;
-  case AArch64::SUBSWri:
-    return AArch64::SUBWri;
-  }
-}
-
-bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
-  // Consider the following code:
-  //  FCMPSrr %0, %1, implicit-def $nzcv
-  //  %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
-  //  %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
-  //  FCMPSrr %0, %1, implicit-def $nzcv
-  //  %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
-  // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
-  // when we have a single IR fcmp being used by two selects. During selection,
-  // to ensure that there can be no clobbering of nzcv between the fcmp and the
-  // csel, we have to generate an fcmp immediately before each csel is
-  // selected.
-  // However, often we can essentially CSE these together later in MachineCSE.
-  // This doesn't work though if there are unrelated flag-setting instructions
-  // in between the two FCMPs. In this case, the SUBS defines NZCV
-  // but it doesn't have any users, being overwritten by the second FCMP.
-  //
-  // Our solution here is to try to convert flag setting operations between
-  // a interval of identical FCMPs, so that CSE will be able to eliminate one.
-  bool Changed = false;
-  const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo();
-
-  // The first step is to find the first and last FCMPs. If we have found
-  // at least two, then set the limit of the bottom-up walk to the first FCMP
-  // found since we're only interested in dealing with instructions between
-  // them.
-  MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
-  for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
-    if (MI.getOpcode() == AArch64::FCMPSrr ||
-        MI.getOpcode() == AArch64::FCMPDrr) {
-      if (!FirstCmp)
-        FirstCmp = &MI;
-      else
-        LastCmp = &MI;
-    }
-  }
-
-  // In addition to converting flag-setting ops in fcmp ranges into non-flag
-  // setting ops, across the whole basic block we also detect when nzcv
-  // implicit-defs are dead, and mark them as dead. Peephole optimizations need
-  // this information later.
-
-  LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
-  LRU.addLiveOuts(MBB);
-  bool NZCVDead = LRU.available(AArch64::NZCV);
-  bool InsideCmpRange = false;
-  for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
-    LRU.stepBackward(II);
-
-    if (LastCmp) { // There's a range present in this block.
-      // If we're inside an fcmp range, look for begin instruction.
-      if (InsideCmpRange && &II == FirstCmp)
-        InsideCmpRange = false;
-      else if (&II == LastCmp)
-        InsideCmpRange = true;
-    }
-
-    // Did this instruction define NZCV?
-    bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
-    if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
-      // If we have a def and NZCV is dead, then we may convert this op.
-      unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
-      int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
-      if (DeadNZCVIdx != -1) {
-        // If we're inside an fcmp range, then convert flag setting ops.
-        if (InsideCmpRange && NewOpc) {
-          LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
-                               "op in fcmp range: "
-                            << II);
-          II.setDesc(TII->get(NewOpc));
-          II.RemoveOperand(DeadNZCVIdx);
-          Changed |= true;
-        } else {
-          // Otherwise, we just set the nzcv imp-def operand to be dead, so the
-          // peephole optimizations can optimize them further.
-          II.getOperand(DeadNZCVIdx).setIsDead();
-        }
-      }
-    }
-
-    NZCVDead = NZCVDeadAtCurrInstr;
-  }
-  return Changed;
-}
-
-bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
-  if (MF.getProperties().hasProperty(
-          MachineFunctionProperties::Property::FailedISel))
-    return false;
-  assert(MF.getProperties().hasProperty(
-             MachineFunctionProperties::Property::Selected) &&
-         "Expected a selected MF");
-
-  bool Changed = false;
-  for (auto &BB : MF)
-    Changed |= optimizeNZCVDefs(BB);
-  return true;
-}
-
-char AArch64PostSelectOptimize::ID = 0;
-INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
-                      "Optimize AArch64 selected instructions",
-                      false, false)
-INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
-                    "Optimize AArch64 selected instructions", false,
-                    false)
-
-namespace llvm {
-FunctionPass *createAArch64PostSelectOptimize() {
-  return new AArch64PostSelectOptimize();
-}
-} // end namespace llvm
+//=== AArch64PostSelectOptimize.cpp ---------------------------------------===// 
+// 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
+// See https://llvm.org/LICENSE.txt for license information. 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
+// 
+//===----------------------------------------------------------------------===// 
+// 
+// This pass does post-instruction-selection optimizations in the GlobalISel 
+// pipeline, before the rest of codegen runs. 
+// 
+//===----------------------------------------------------------------------===// 
+ 
+#include "AArch64.h" 
+#include "AArch64TargetMachine.h" 
+#include "MCTargetDesc/AArch64MCTargetDesc.h" 
+#include "llvm/CodeGen/MachineBasicBlock.h" 
+#include "llvm/CodeGen/MachineFunctionPass.h" 
+#include "llvm/CodeGen/MachineOperand.h" 
+#include "llvm/CodeGen/TargetPassConfig.h" 
+#include "llvm/Support/Debug.h" 
+ 
+#define DEBUG_TYPE "aarch64-post-select-optimize" 
+ 
+using namespace llvm; 
+ 
+namespace { 
+class AArch64PostSelectOptimize : public MachineFunctionPass { 
+public: 
+  static char ID; 
+ 
+  AArch64PostSelectOptimize(); 
+ 
+  StringRef getPassName() const override { 
+    return "AArch64 Post Select Optimizer"; 
+  } 
+ 
+  bool runOnMachineFunction(MachineFunction &MF) override; 
+ 
+  void getAnalysisUsage(AnalysisUsage &AU) const override; 
+ 
+private: 
+  bool optimizeNZCVDefs(MachineBasicBlock &MBB); 
+}; 
+} // end anonymous namespace 
+ 
+void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const { 
+  AU.addRequired<TargetPassConfig>(); 
+  AU.setPreservesCFG(); 
+  getSelectionDAGFallbackAnalysisUsage(AU); 
+  MachineFunctionPass::getAnalysisUsage(AU); 
+} 
+ 
+AArch64PostSelectOptimize::AArch64PostSelectOptimize() 
+    : MachineFunctionPass(ID) { 
+  initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry()); 
+} 
+ 
+unsigned getNonFlagSettingVariant(unsigned Opc) { 
+  switch (Opc) { 
+  default: 
+    return 0; 
+  case AArch64::SUBSXrr: 
+    return AArch64::SUBXrr; 
+  case AArch64::SUBSWrr: 
+    return AArch64::SUBWrr; 
+  case AArch64::SUBSXrs: 
+    return AArch64::SUBXrs; 
+  case AArch64::SUBSXri: 
+    return AArch64::SUBXri; 
+  case AArch64::SUBSWri: 
+    return AArch64::SUBWri; 
+  } 
+} 
+ 
+bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { 
+  // Consider the following code: 
+  //  FCMPSrr %0, %1, implicit-def $nzcv 
+  //  %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 
+  //  %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv 
+  //  FCMPSrr %0, %1, implicit-def $nzcv 
+  //  %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 
+  // This kind of code where we have 2 FCMPs each feeding a CSEL can happen 
+  // when we have a single IR fcmp being used by two selects. During selection, 
+  // to ensure that there can be no clobbering of nzcv between the fcmp and the 
+  // csel, we have to generate an fcmp immediately before each csel is 
+  // selected. 
+  // However, often we can essentially CSE these together later in MachineCSE. 
+  // This doesn't work though if there are unrelated flag-setting instructions 
+  // in between the two FCMPs. In this case, the SUBS defines NZCV 
+  // but it doesn't have any users, being overwritten by the second FCMP. 
+  // 
+  // Our solution here is to try to convert flag setting operations between 
+  // a interval of identical FCMPs, so that CSE will be able to eliminate one. 
+  bool Changed = false; 
+  const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo(); 
+ 
+  // The first step is to find the first and last FCMPs. If we have found 
+  // at least two, then set the limit of the bottom-up walk to the first FCMP 
+  // found since we're only interested in dealing with instructions between 
+  // them. 
+  MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr; 
+  for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) { 
+    if (MI.getOpcode() == AArch64::FCMPSrr || 
+        MI.getOpcode() == AArch64::FCMPDrr) { 
+      if (!FirstCmp) 
+        FirstCmp = &MI; 
+      else 
+        LastCmp = &MI; 
+    } 
+  } 
+ 
+  // In addition to converting flag-setting ops in fcmp ranges into non-flag 
+  // setting ops, across the whole basic block we also detect when nzcv 
+  // implicit-defs are dead, and mark them as dead. Peephole optimizations need 
+  // this information later. 
+ 
+  LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo()); 
+  LRU.addLiveOuts(MBB); 
+  bool NZCVDead = LRU.available(AArch64::NZCV); 
+  bool InsideCmpRange = false; 
+  for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) { 
+    LRU.stepBackward(II); 
+ 
+    if (LastCmp) { // There's a range present in this block. 
+      // If we're inside an fcmp range, look for begin instruction. 
+      if (InsideCmpRange && &II == FirstCmp) 
+        InsideCmpRange = false; 
+      else if (&II == LastCmp) 
+        InsideCmpRange = true; 
+    } 
+ 
+    // Did this instruction define NZCV? 
+    bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV); 
+    if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) { 
+      // If we have a def and NZCV is dead, then we may convert this op. 
+      unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode()); 
+      int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV); 
+      if (DeadNZCVIdx != -1) { 
+        // If we're inside an fcmp range, then convert flag setting ops. 
+        if (InsideCmpRange && NewOpc) { 
+          LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting " 
+                               "op in fcmp range: " 
+                            << II); 
+          II.setDesc(TII->get(NewOpc)); 
+          II.RemoveOperand(DeadNZCVIdx); 
+          Changed |= true; 
+        } else { 
+          // Otherwise, we just set the nzcv imp-def operand to be dead, so the 
+          // peephole optimizations can optimize them further. 
+          II.getOperand(DeadNZCVIdx).setIsDead(); 
+        } 
+      } 
+    } 
+ 
+    NZCVDead = NZCVDeadAtCurrInstr; 
+  } 
+  return Changed; 
+} 
+ 
+bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { 
+  if (MF.getProperties().hasProperty( 
+          MachineFunctionProperties::Property::FailedISel)) 
+    return false; 
+  assert(MF.getProperties().hasProperty( 
+             MachineFunctionProperties::Property::Selected) && 
+         "Expected a selected MF"); 
+ 
+  bool Changed = false; 
+  for (auto &BB : MF) 
+    Changed |= optimizeNZCVDefs(BB); 
+  return true; 
+} 
+ 
+char AArch64PostSelectOptimize::ID = 0; 
+INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE, 
+                      "Optimize AArch64 selected instructions", 
+                      false, false) 
+INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE, 
+                    "Optimize AArch64 selected instructions", false, 
+                    false) 
+ 
+namespace llvm { 
+FunctionPass *createAArch64PostSelectOptimize() { 
+  return new AArch64PostSelectOptimize(); 
+} 
+} // end namespace llvm 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 5f9b64e274..2686f6dc46 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -104,16 +104,16 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
     return Helper.tryCombineConcatVectors(MI);
   case TargetOpcode::G_SHUFFLE_VECTOR:
     return Helper.tryCombineShuffleVector(MI);
-  case TargetOpcode::G_MEMCPY:
-  case TargetOpcode::G_MEMMOVE:
-  case TargetOpcode::G_MEMSET: {
-    // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
-    // heuristics decide.
-    unsigned MaxLen = EnableOpt ? 0 : 32;
-    // Try to inline memcpy type calls if optimizations are enabled.
-    return !EnableMinSize ? Helper.tryCombineMemCpyFamily(MI, MaxLen) : false;
-  }
+  case TargetOpcode::G_MEMCPY: 
+  case TargetOpcode::G_MEMMOVE: 
+  case TargetOpcode::G_MEMSET: { 
+    // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other 
+    // heuristics decide. 
+    unsigned MaxLen = EnableOpt ? 0 : 32; 
+    // Try to inline memcpy type calls if optimizations are enabled. 
+    return !EnableMinSize ? Helper.tryCombineMemCpyFamily(MI, MaxLen) : false; 
   }
+  } 
 
   return false;
 }
@@ -188,7 +188,7 @@ INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
 
 
 namespace llvm {
-FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone) {
+FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone) { 
   return new AArch64PreLegalizerCombiner(IsOptNone);
 }
 } // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index c76c43389b..e26fe60d93 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -13,7 +13,7 @@
 
 #include "AArch64RegisterBankInfo.h"
 #include "AArch64InstrInfo.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLExtras.h" 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
@@ -466,10 +466,10 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
                                getValueMapping(RBIdx, Size), NumOperands);
 }
 
-bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
-                                               const MachineRegisterInfo &MRI,
-                                               const TargetRegisterInfo &TRI,
-                                               unsigned Depth) const {
+bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, 
+                                               const MachineRegisterInfo &MRI, 
+                                               const TargetRegisterInfo &TRI, 
+                                               unsigned Depth) const { 
   unsigned Op = MI.getOpcode();
 
   // Do we have an explicit floating point instruction?
@@ -481,30 +481,30 @@ bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
   if (Op != TargetOpcode::COPY && !MI.isPHI())
     return false;
 
-  // Check if we already know the register bank.
-  auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
-  if (RB == &AArch64::FPRRegBank)
-    return true;
-  if (RB == &AArch64::GPRRegBank)
-    return false;
-
-  // We don't know anything.
-  //
-  // If we have a phi, we may be able to infer that it will be assigned a FPR
-  // based off of its inputs.
-  if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
-    return false;
-
-  return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
-    return Op.isReg() &&
-           onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
-  });
+  // Check if we already know the register bank. 
+  auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); 
+  if (RB == &AArch64::FPRRegBank) 
+    return true; 
+  if (RB == &AArch64::GPRRegBank) 
+    return false; 
+ 
+  // We don't know anything. 
+  // 
+  // If we have a phi, we may be able to infer that it will be assigned a FPR 
+  // based off of its inputs. 
+  if (!MI.isPHI() || Depth > MaxFPRSearchDepth) 
+    return false; 
+ 
+  return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { 
+    return Op.isReg() && 
+           onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); 
+  }); 
 }
 
 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
                                          const MachineRegisterInfo &MRI,
-                                         const TargetRegisterInfo &TRI,
-                                         unsigned Depth) const {
+                                         const TargetRegisterInfo &TRI, 
+                                         unsigned Depth) const { 
   switch (MI.getOpcode()) {
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
@@ -513,13 +513,13 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
   default:
     break;
   }
-  return hasFPConstraints(MI, MRI, TRI, Depth);
+  return hasFPConstraints(MI, MRI, TRI, Depth); 
 }
 
-bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
-                                            const MachineRegisterInfo &MRI,
-                                            const TargetRegisterInfo &TRI,
-                                            unsigned Depth) const {
+bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, 
+                                            const MachineRegisterInfo &MRI, 
+                                            const TargetRegisterInfo &TRI, 
+                                            unsigned Depth) const { 
   switch (MI.getOpcode()) {
   case AArch64::G_DUP:
   case TargetOpcode::G_SITOFP:
@@ -530,7 +530,7 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
   default:
     break;
   }
-  return hasFPConstraints(MI, MRI, TRI, Depth);
+  return hasFPConstraints(MI, MRI, TRI, Depth); 
 }
 
 const RegisterBankInfo::InstructionMapping &
@@ -680,18 +680,18 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     break;
   }
   case TargetOpcode::G_SITOFP:
-  case TargetOpcode::G_UITOFP: {
+  case TargetOpcode::G_UITOFP: { 
     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
       break;
-    // Integer to FP conversions don't necessarily happen between GPR -> FPR
-    // regbanks. They can also be done within an FPR register.
-    Register SrcReg = MI.getOperand(1).getReg();
-    if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
-      OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
-    else
-      OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
+    // Integer to FP conversions don't necessarily happen between GPR -> FPR 
+    // regbanks. They can also be done within an FPR register. 
+    Register SrcReg = MI.getOperand(1).getReg(); 
+    if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank) 
+      OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 
+    else 
+      OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 
     break;
-  }
+  } 
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
@@ -729,8 +729,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
         // assume this was a floating point load in the IR.
         // If it was not, we would have had a bitcast before
         // reaching that instruction.
-        // Int->FP conversion operations are also captured in onlyDefinesFP().
-        if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
+        // Int->FP conversion operations are also captured in onlyDefinesFP(). 
+        if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) { 
           OpRegBankIdx[0] = PMI_FirstFPR;
           break;
         }
@@ -853,7 +853,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     }
     break;
   }
-  case TargetOpcode::G_BUILD_VECTOR: {
+  case TargetOpcode::G_BUILD_VECTOR: { 
     // If the first source operand belongs to a FPR register bank, then make
     // sure that we preserve that.
     if (OpRegBankIdx[1] != PMI_FirstGPR)
@@ -864,17 +864,17 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
 
     // Get the instruction that defined the source operand reg, and check if
     // it's a floating point operation. Or, if it's a type like s16 which
-    // doesn't have a exact size gpr register class. The exception is if the
-    // build_vector has all constant operands, which may be better to leave as
-    // gpr without copies, so it can be matched in imported patterns.
+    // doesn't have a exact size gpr register class. The exception is if the 
+    // build_vector has all constant operands, which may be better to leave as 
+    // gpr without copies, so it can be matched in imported patterns. 
     MachineInstr *DefMI = MRI.getVRegDef(VReg);
     unsigned DefOpc = DefMI->getOpcode();
     const LLT SrcTy = MRI.getType(VReg);
-    if (all_of(MI.operands(), [&](const MachineOperand &Op) {
-          return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
-                                   TargetOpcode::G_CONSTANT;
-        }))
-      break;
+    if (all_of(MI.operands(), [&](const MachineOperand &Op) { 
+          return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() == 
+                                   TargetOpcode::G_CONSTANT; 
+        })) 
+      break; 
     if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
         SrcTy.getSizeInBits() < 32) {
       // Have a floating point op.
@@ -885,30 +885,30 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     }
     break;
   }
-  case TargetOpcode::G_VECREDUCE_FADD:
-  case TargetOpcode::G_VECREDUCE_FMUL:
-  case TargetOpcode::G_VECREDUCE_FMAX:
-  case TargetOpcode::G_VECREDUCE_FMIN:
-  case TargetOpcode::G_VECREDUCE_ADD:
-  case TargetOpcode::G_VECREDUCE_MUL:
-  case TargetOpcode::G_VECREDUCE_AND:
-  case TargetOpcode::G_VECREDUCE_OR:
-  case TargetOpcode::G_VECREDUCE_XOR:
-  case TargetOpcode::G_VECREDUCE_SMAX:
-  case TargetOpcode::G_VECREDUCE_SMIN:
-  case TargetOpcode::G_VECREDUCE_UMAX:
-  case TargetOpcode::G_VECREDUCE_UMIN:
-    // Reductions produce a scalar value from a vector, the scalar should be on
-    // FPR bank.
-    OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
-    break;
-  case TargetOpcode::G_VECREDUCE_SEQ_FADD:
-  case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
-    // These reductions also take a scalar accumulator input.
-    // Assign them FPR for now.
-    OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
-    break;
-  }
+  case TargetOpcode::G_VECREDUCE_FADD: 
+  case TargetOpcode::G_VECREDUCE_FMUL: 
+  case TargetOpcode::G_VECREDUCE_FMAX: 
+  case TargetOpcode::G_VECREDUCE_FMIN: 
+  case TargetOpcode::G_VECREDUCE_ADD: 
+  case TargetOpcode::G_VECREDUCE_MUL: 
+  case TargetOpcode::G_VECREDUCE_AND: 
+  case TargetOpcode::G_VECREDUCE_OR: 
+  case TargetOpcode::G_VECREDUCE_XOR: 
+  case TargetOpcode::G_VECREDUCE_SMAX: 
+  case TargetOpcode::G_VECREDUCE_SMIN: 
+  case TargetOpcode::G_VECREDUCE_UMAX: 
+  case TargetOpcode::G_VECREDUCE_UMIN: 
+    // Reductions produce a scalar value from a vector, the scalar should be on 
+    // FPR bank. 
+    OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 
+    break; 
+  case TargetOpcode::G_VECREDUCE_SEQ_FADD: 
+  case TargetOpcode::G_VECREDUCE_SEQ_FMUL: 
+    // These reductions also take a scalar accumulator input. 
+    // Assign them FPR for now. 
+    OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR}; 
+    break; 
+  } 
 
   // Finally construct the computed mapping.
   SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
index 019017bc3e..c8cfe53299 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
@@ -114,20 +114,20 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
   const InstructionMapping &
   getSameKindOfOperandsMapping(const MachineInstr &MI) const;
 
-  /// Maximum recursion depth for hasFPConstraints.
-  const unsigned MaxFPRSearchDepth = 2;
-
-  /// \returns true if \p MI only uses and defines FPRs.
+  /// Maximum recursion depth for hasFPConstraints. 
+  const unsigned MaxFPRSearchDepth = 2; 
+ 
+  /// \returns true if \p MI only uses and defines FPRs. 
   bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
-                     const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+                     const TargetRegisterInfo &TRI, unsigned Depth = 0) const; 
 
-  /// \returns true if \p MI only uses FPRs.
+  /// \returns true if \p MI only uses FPRs. 
   bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
-                  const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+                  const TargetRegisterInfo &TRI, unsigned Depth = 0) const; 
 
-  /// \returns true if \p MI only defines FPRs.
+  /// \returns true if \p MI only defines FPRs. 
   bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
-                     const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+                     const TargetRegisterInfo &TRI, unsigned Depth = 0) const; 
 
 public:
   AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
author	shadchin <shadchin@yandex-team.ru>	2022-02-10 16:44:30 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:44:30 +0300
commit	2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree	012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/AArch64/GISel
parent	6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
download	ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz